LCOV - differential code coverage report
Current view: top level - src/backend/utils/adt - like_support.c (source / functions) Coverage Total Hit UBC CBC
Current: Differential Code Coverage HEAD vs 15 Lines: 83.3 % 569 474 95 474
Current Date: 2023-04-08 15:15:32 Functions: 78.0 % 41 32 9 32
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * like_support.c
       4                 :  *    Planner support functions for LIKE, regex, and related operators.
       5                 :  *
       6                 :  * These routines handle special optimization of operators that can be
       7                 :  * used with index scans even though they are not known to the executor's
       8                 :  * indexscan machinery.  The key idea is that these operators allow us
       9                 :  * to derive approximate indexscan qual clauses, such that any tuples
      10                 :  * that pass the operator clause itself must also satisfy the simpler
      11                 :  * indexscan condition(s).  Then we can use the indexscan machinery
      12                 :  * to avoid scanning as much of the table as we'd otherwise have to,
      13                 :  * while applying the original operator as a qpqual condition to ensure
      14                 :  * we deliver only the tuples we want.  (In essence, we're using a regular
      15                 :  * index as if it were a lossy index.)
      16                 :  *
      17                 :  * An example of what we're doing is
      18                 :  *          textfield LIKE 'abc%def'
      19                 :  * from which we can generate the indexscanable conditions
      20                 :  *          textfield >= 'abc' AND textfield < 'abd'
      21                 :  * which allow efficient scanning of an index on textfield.
      22                 :  * (In reality, character set and collation issues make the transformation
      23                 :  * from LIKE to indexscan limits rather harder than one might think ...
      24                 :  * but that's the basic idea.)
      25                 :  *
      26                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      27                 :  * Portions Copyright (c) 1994, Regents of the University of California
      28                 :  *
      29                 :  *
      30                 :  * IDENTIFICATION
      31                 :  *    src/backend/utils/adt/like_support.c
      32                 :  *
      33                 :  *-------------------------------------------------------------------------
      34                 :  */
      35                 : #include "postgres.h"
      36                 : 
      37                 : #include <math.h>
      38                 : 
      39                 : #include "access/htup_details.h"
      40                 : #include "access/stratnum.h"
      41                 : #include "catalog/pg_collation.h"
      42                 : #include "catalog/pg_operator.h"
      43                 : #include "catalog/pg_opfamily.h"
      44                 : #include "catalog/pg_statistic.h"
      45                 : #include "catalog/pg_type.h"
      46                 : #include "mb/pg_wchar.h"
      47                 : #include "miscadmin.h"
      48                 : #include "nodes/makefuncs.h"
      49                 : #include "nodes/nodeFuncs.h"
      50                 : #include "nodes/supportnodes.h"
      51                 : #include "utils/builtins.h"
      52                 : #include "utils/datum.h"
      53                 : #include "utils/lsyscache.h"
      54                 : #include "utils/pg_locale.h"
      55                 : #include "utils/selfuncs.h"
      56                 : #include "utils/varlena.h"
      57                 : 
      58                 : 
      59                 : typedef enum
      60                 : {
      61                 :     Pattern_Type_Like,
      62                 :     Pattern_Type_Like_IC,
      63                 :     Pattern_Type_Regex,
      64                 :     Pattern_Type_Regex_IC,
      65                 :     Pattern_Type_Prefix
      66                 : } Pattern_Type;
      67                 : 
      68                 : typedef enum
      69                 : {
      70                 :     Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact
      71                 : } Pattern_Prefix_Status;
      72                 : 
      73                 : static Node *like_regex_support(Node *rawreq, Pattern_Type ptype);
      74                 : static List *match_pattern_prefix(Node *leftop,
      75                 :                                   Node *rightop,
      76                 :                                   Pattern_Type ptype,
      77                 :                                   Oid expr_coll,
      78                 :                                   Oid opfamily,
      79                 :                                   Oid indexcollation);
      80                 : static double patternsel_common(PlannerInfo *root,
      81                 :                                 Oid oprid,
      82                 :                                 Oid opfuncid,
      83                 :                                 List *args,
      84                 :                                 int varRelid,
      85                 :                                 Oid collation,
      86                 :                                 Pattern_Type ptype,
      87                 :                                 bool negate);
      88                 : static Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
      89                 :                                                   Pattern_Type ptype,
      90                 :                                                   Oid collation,
      91                 :                                                   Const **prefix,
      92                 :                                                   Selectivity *rest_selec);
      93                 : static Selectivity prefix_selectivity(PlannerInfo *root,
      94                 :                                       VariableStatData *vardata,
      95                 :                                       Oid eqopr, Oid ltopr, Oid geopr,
      96                 :                                       Oid collation,
      97                 :                                       Const *prefixcon);
      98                 : static Selectivity like_selectivity(const char *patt, int pattlen,
      99                 :                                     bool case_insensitive);
     100                 : static Selectivity regex_selectivity(const char *patt, int pattlen,
     101                 :                                      bool case_insensitive,
     102                 :                                      int fixed_prefix_len);
     103                 : static int  pattern_char_isalpha(char c, bool is_multibyte,
     104                 :                                  pg_locale_t locale, bool locale_is_c);
     105                 : static Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc,
     106                 :                                   Oid collation);
     107                 : static Datum string_to_datum(const char *str, Oid datatype);
     108                 : static Const *string_to_const(const char *str, Oid datatype);
     109                 : static Const *string_to_bytea_const(const char *str, size_t str_len);
     110                 : 
     111                 : 
     112                 : /*
     113                 :  * Planner support functions for LIKE, regex, and related operators
     114                 :  */
     115                 : Datum
     116 CBC        2956 : textlike_support(PG_FUNCTION_ARGS)
     117                 : {
     118            2956 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     119                 : 
     120            2956 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like));
     121                 : }
     122                 : 
     123                 : Datum
     124             206 : texticlike_support(PG_FUNCTION_ARGS)
     125                 : {
     126             206 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     127                 : 
     128             206 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like_IC));
     129                 : }
     130                 : 
     131                 : Datum
     132            9670 : textregexeq_support(PG_FUNCTION_ARGS)
     133                 : {
     134            9670 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     135                 : 
     136            9670 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex));
     137                 : }
     138                 : 
     139                 : Datum
     140              86 : texticregexeq_support(PG_FUNCTION_ARGS)
     141                 : {
     142              86 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     143                 : 
     144              86 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex_IC));
     145                 : }
     146                 : 
     147                 : Datum
     148              78 : text_starts_with_support(PG_FUNCTION_ARGS)
     149                 : {
     150              78 :     Node       *rawreq = (Node *) PG_GETARG_POINTER(0);
     151                 : 
     152              78 :     PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Prefix));
     153                 : }
     154                 : 
     155                 : /* Common code for the above */
     156                 : static Node *
     157           12996 : like_regex_support(Node *rawreq, Pattern_Type ptype)
     158                 : {
     159           12996 :     Node       *ret = NULL;
     160                 : 
     161           12996 :     if (IsA(rawreq, SupportRequestSelectivity))
     162                 :     {
     163                 :         /*
     164                 :          * Make a selectivity estimate for a function call, just as we'd do if
     165                 :          * the call was via the corresponding operator.
     166                 :          */
     167              12 :         SupportRequestSelectivity *req = (SupportRequestSelectivity *) rawreq;
     168                 :         Selectivity s1;
     169                 : 
     170              12 :         if (req->is_join)
     171                 :         {
     172                 :             /*
     173                 :              * For the moment we just punt.  If patternjoinsel is ever
     174                 :              * improved to do better, this should be made to call it.
     175                 :              */
     176 UBC           0 :             s1 = DEFAULT_MATCH_SEL;
     177                 :         }
     178                 :         else
     179                 :         {
     180                 :             /* Share code with operator restriction selectivity functions */
     181 CBC          12 :             s1 = patternsel_common(req->root,
     182                 :                                    InvalidOid,
     183                 :                                    req->funcid,
     184                 :                                    req->args,
     185                 :                                    req->varRelid,
     186                 :                                    req->inputcollid,
     187                 :                                    ptype,
     188                 :                                    false);
     189                 :         }
     190              12 :         req->selectivity = s1;
     191              12 :         ret = (Node *) req;
     192                 :     }
     193           12984 :     else if (IsA(rawreq, SupportRequestIndexCondition))
     194                 :     {
     195                 :         /* Try to convert operator/function call to index conditions */
     196            3534 :         SupportRequestIndexCondition *req = (SupportRequestIndexCondition *) rawreq;
     197                 : 
     198                 :         /*
     199                 :          * Currently we have no "reverse" match operators with the pattern on
     200                 :          * the left, so we only need consider cases with the indexkey on the
     201                 :          * left.
     202                 :          */
     203            3534 :         if (req->indexarg != 0)
     204 UBC           0 :             return NULL;
     205                 : 
     206 CBC        3534 :         if (is_opclause(req->node))
     207                 :         {
     208            3522 :             OpExpr     *clause = (OpExpr *) req->node;
     209                 : 
     210            3522 :             Assert(list_length(clause->args) == 2);
     211                 :             ret = (Node *)
     212            3522 :                 match_pattern_prefix((Node *) linitial(clause->args),
     213            3522 :                                      (Node *) lsecond(clause->args),
     214                 :                                      ptype,
     215                 :                                      clause->inputcollid,
     216                 :                                      req->opfamily,
     217                 :                                      req->indexcollation);
     218                 :         }
     219              12 :         else if (is_funcclause(req->node))   /* be paranoid */
     220                 :         {
     221              12 :             FuncExpr   *clause = (FuncExpr *) req->node;
     222                 : 
     223              12 :             Assert(list_length(clause->args) == 2);
     224                 :             ret = (Node *)
     225              12 :                 match_pattern_prefix((Node *) linitial(clause->args),
     226              12 :                                      (Node *) lsecond(clause->args),
     227                 :                                      ptype,
     228                 :                                      clause->inputcollid,
     229                 :                                      req->opfamily,
     230                 :                                      req->indexcollation);
     231                 :         }
     232                 :     }
     233                 : 
     234           12996 :     return ret;
     235                 : }
     236                 : 
     237                 : /*
     238                 :  * match_pattern_prefix
     239                 :  *    Try to generate an indexqual for a LIKE or regex operator.
     240                 :  */
     241                 : static List *
     242            3534 : match_pattern_prefix(Node *leftop,
     243                 :                      Node *rightop,
     244                 :                      Pattern_Type ptype,
     245                 :                      Oid expr_coll,
     246                 :                      Oid opfamily,
     247                 :                      Oid indexcollation)
     248                 : {
     249                 :     List       *result;
     250                 :     Const      *patt;
     251                 :     Const      *prefix;
     252                 :     Pattern_Prefix_Status pstatus;
     253                 :     Oid         ldatatype;
     254                 :     Oid         rdatatype;
     255                 :     Oid         eqopr;
     256                 :     Oid         ltopr;
     257                 :     Oid         geopr;
     258            3534 :     Oid         preopr = InvalidOid;
     259                 :     bool        collation_aware;
     260                 :     Expr       *expr;
     261                 :     FmgrInfo    ltproc;
     262                 :     Const      *greaterstr;
     263                 : 
     264                 :     /*
     265                 :      * Can't do anything with a non-constant or NULL pattern argument.
     266                 :      *
     267                 :      * Note that since we restrict ourselves to cases with a hard constant on
     268                 :      * the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry
     269                 :      * about verifying that.
     270                 :      */
     271            3534 :     if (!IsA(rightop, Const) ||
     272            3484 :         ((Const *) rightop)->constisnull)
     273              50 :         return NIL;
     274            3484 :     patt = (Const *) rightop;
     275                 : 
     276                 :     /*
     277                 :      * Not supported if the expression collation is nondeterministic.  The
     278                 :      * optimized equality or prefix tests use bytewise comparisons, which is
     279                 :      * not consistent with nondeterministic collations.  The actual
     280                 :      * pattern-matching implementation functions will later error out that
     281                 :      * pattern-matching is not supported with nondeterministic collations. (We
     282                 :      * could also error out here, but by doing it later we get more precise
     283                 :      * error messages.)  (It should be possible to support at least
     284                 :      * Pattern_Prefix_Exact, but no point as long as the actual
     285                 :      * pattern-matching implementations don't support it.)
     286                 :      *
     287                 :      * expr_coll is not set for a non-collation-aware data type such as bytea.
     288                 :      */
     289            3484 :     if (expr_coll && !get_collation_isdeterministic(expr_coll))
     290               6 :         return NIL;
     291                 : 
     292                 :     /*
     293                 :      * Try to extract a fixed prefix from the pattern.
     294                 :      */
     295            3478 :     pstatus = pattern_fixed_prefix(patt, ptype, expr_coll,
     296                 :                                    &prefix, NULL);
     297                 : 
     298                 :     /* fail if no fixed prefix */
     299            3478 :     if (pstatus == Pattern_Prefix_None)
     300             155 :         return NIL;
     301                 : 
     302                 :     /*
     303                 :      * Identify the operators we want to use, based on the type of the
     304                 :      * left-hand argument.  Usually these are just the type's regular
     305                 :      * comparison operators, but if we are considering one of the semi-legacy
     306                 :      * "pattern" opclasses, use the "pattern" operators instead.  Those are
     307                 :      * not collation-sensitive but always use C collation, as we want.  The
     308                 :      * selected operators also determine the needed type of the prefix
     309                 :      * constant.
     310                 :      */
     311            3323 :     ldatatype = exprType(leftop);
     312            3323 :     switch (ldatatype)
     313                 :     {
     314              34 :         case TEXTOID:
     315              34 :             if (opfamily == TEXT_PATTERN_BTREE_FAM_OID)
     316                 :             {
     317 UBC           0 :                 eqopr = TextEqualOperator;
     318               0 :                 ltopr = TextPatternLessOperator;
     319               0 :                 geopr = TextPatternGreaterEqualOperator;
     320               0 :                 collation_aware = false;
     321                 :             }
     322 CBC          34 :             else if (opfamily == TEXT_SPGIST_FAM_OID)
     323                 :             {
     324              12 :                 eqopr = TextEqualOperator;
     325              12 :                 ltopr = TextPatternLessOperator;
     326              12 :                 geopr = TextPatternGreaterEqualOperator;
     327                 :                 /* This opfamily has direct support for prefixing */
     328              12 :                 preopr = TextPrefixOperator;
     329              12 :                 collation_aware = false;
     330                 :             }
     331                 :             else
     332                 :             {
     333              22 :                 eqopr = TextEqualOperator;
     334              22 :                 ltopr = TextLessOperator;
     335              22 :                 geopr = TextGreaterEqualOperator;
     336              22 :                 collation_aware = true;
     337                 :             }
     338              34 :             rdatatype = TEXTOID;
     339              34 :             break;
     340            3277 :         case NAMEOID:
     341                 : 
     342                 :             /*
     343                 :              * Note that here, we need the RHS type to be text, so that the
     344                 :              * comparison value isn't improperly truncated to NAMEDATALEN.
     345                 :              */
     346            3277 :             eqopr = NameEqualTextOperator;
     347            3277 :             ltopr = NameLessTextOperator;
     348            3277 :             geopr = NameGreaterEqualTextOperator;
     349            3277 :             collation_aware = true;
     350            3277 :             rdatatype = TEXTOID;
     351            3277 :             break;
     352              12 :         case BPCHAROID:
     353              12 :             if (opfamily == BPCHAR_PATTERN_BTREE_FAM_OID)
     354                 :             {
     355 UBC           0 :                 eqopr = BpcharEqualOperator;
     356               0 :                 ltopr = BpcharPatternLessOperator;
     357               0 :                 geopr = BpcharPatternGreaterEqualOperator;
     358               0 :                 collation_aware = false;
     359                 :             }
     360                 :             else
     361                 :             {
     362 CBC          12 :                 eqopr = BpcharEqualOperator;
     363              12 :                 ltopr = BpcharLessOperator;
     364              12 :                 geopr = BpcharGreaterEqualOperator;
     365              12 :                 collation_aware = true;
     366                 :             }
     367              12 :             rdatatype = BPCHAROID;
     368              12 :             break;
     369 UBC           0 :         case BYTEAOID:
     370               0 :             eqopr = ByteaEqualOperator;
     371               0 :             ltopr = ByteaLessOperator;
     372               0 :             geopr = ByteaGreaterEqualOperator;
     373               0 :             collation_aware = false;
     374               0 :             rdatatype = BYTEAOID;
     375               0 :             break;
     376               0 :         default:
     377                 :             /* Can't get here unless we're attached to the wrong operator */
     378               0 :             return NIL;
     379                 :     }
     380                 : 
     381                 :     /*
     382                 :      * If necessary, coerce the prefix constant to the right type.  The given
     383                 :      * prefix constant is either text or bytea type, therefore the only case
     384                 :      * where we need to do anything is when converting text to bpchar.  Those
     385                 :      * two types are binary-compatible, so relabeling the Const node is
     386                 :      * sufficient.
     387                 :      */
     388 CBC        3323 :     if (prefix->consttype != rdatatype)
     389                 :     {
     390              12 :         Assert(prefix->consttype == TEXTOID &&
     391                 :                rdatatype == BPCHAROID);
     392              12 :         prefix->consttype = rdatatype;
     393                 :     }
     394                 : 
     395                 :     /*
     396                 :      * If we found an exact-match pattern, generate an "=" indexqual.
     397                 :      *
     398                 :      * Here and below, check to see whether the desired operator is actually
     399                 :      * supported by the index opclass, and fail quietly if not.  This allows
     400                 :      * us to not be concerned with specific opclasses (except for the legacy
     401                 :      * "pattern" cases); any index that correctly implements the operators
     402                 :      * will work.
     403                 :      */
     404            3323 :     if (pstatus == Pattern_Prefix_Exact)
     405                 :     {
     406            2710 :         if (!op_in_opfamily(eqopr, opfamily))
     407               6 :             return NIL;
     408            2704 :         expr = make_opclause(eqopr, BOOLOID, false,
     409                 :                              (Expr *) leftop, (Expr *) prefix,
     410                 :                              InvalidOid, indexcollation);
     411            2704 :         result = list_make1(expr);
     412            2704 :         return result;
     413                 :     }
     414                 : 
     415                 :     /*
     416                 :      * Otherwise, we have a nonempty required prefix of the values.  Some
     417                 :      * opclasses support prefix checks directly, otherwise we'll try to
     418                 :      * generate a range constraint.
     419                 :      */
     420             613 :     if (OidIsValid(preopr) && op_in_opfamily(preopr, opfamily))
     421                 :     {
     422              12 :         expr = make_opclause(preopr, BOOLOID, false,
     423                 :                              (Expr *) leftop, (Expr *) prefix,
     424                 :                              InvalidOid, indexcollation);
     425              12 :         result = list_make1(expr);
     426              12 :         return result;
     427                 :     }
     428                 : 
     429                 :     /*
     430                 :      * Since we need a range constraint, it's only going to work reliably if
     431                 :      * the index is collation-insensitive or has "C" collation.  Note that
     432                 :      * here we are looking at the index's collation, not the expression's
     433                 :      * collation -- this test is *not* dependent on the LIKE/regex operator's
     434                 :      * collation.
     435                 :      */
     436             601 :     if (collation_aware &&
     437             601 :         !lc_collate_is_c(indexcollation))
     438               4 :         return NIL;
     439                 : 
     440                 :     /*
     441                 :      * We can always say "x >= prefix".
     442                 :      */
     443             597 :     if (!op_in_opfamily(geopr, opfamily))
     444               6 :         return NIL;
     445             591 :     expr = make_opclause(geopr, BOOLOID, false,
     446                 :                          (Expr *) leftop, (Expr *) prefix,
     447                 :                          InvalidOid, indexcollation);
     448             591 :     result = list_make1(expr);
     449                 : 
     450                 :     /*-------
     451                 :      * If we can create a string larger than the prefix, we can say
     452                 :      * "x < greaterstr".  NB: we rely on make_greater_string() to generate
     453                 :      * a guaranteed-greater string, not just a probably-greater string.
     454                 :      * In general this is only guaranteed in C locale, so we'd better be
     455                 :      * using a C-locale index collation.
     456                 :      *-------
     457                 :      */
     458             591 :     if (!op_in_opfamily(ltopr, opfamily))
     459 UBC           0 :         return result;
     460 CBC         591 :     fmgr_info(get_opcode(ltopr), &ltproc);
     461             591 :     greaterstr = make_greater_string(prefix, &ltproc, indexcollation);
     462             591 :     if (greaterstr)
     463                 :     {
     464             591 :         expr = make_opclause(ltopr, BOOLOID, false,
     465                 :                              (Expr *) leftop, (Expr *) greaterstr,
     466                 :                              InvalidOid, indexcollation);
     467             591 :         result = lappend(result, expr);
     468                 :     }
     469                 : 
     470             591 :     return result;
     471                 : }
     472                 : 
     473                 : 
     474                 : /*
     475                 :  * patternsel_common - generic code for pattern-match restriction selectivity.
     476                 :  *
     477                 :  * To support using this from either the operator or function paths, caller
     478                 :  * may pass either operator OID or underlying function OID; we look up the
     479                 :  * latter from the former if needed.  (We could just have patternsel() call
     480                 :  * get_opcode(), but the work would be wasted if we don't have a need to
     481                 :  * compare a fixed prefix to the pg_statistic data.)
     482                 :  *
     483                 :  * Note that oprid and/or opfuncid should be for the positive-match operator
     484                 :  * even when negate is true.
     485                 :  */
     486                 : static double
     487            4847 : patternsel_common(PlannerInfo *root,
     488                 :                   Oid oprid,
     489                 :                   Oid opfuncid,
     490                 :                   List *args,
     491                 :                   int varRelid,
     492                 :                   Oid collation,
     493                 :                   Pattern_Type ptype,
     494                 :                   bool negate)
     495                 : {
     496                 :     VariableStatData vardata;
     497                 :     Node       *other;
     498                 :     bool        varonleft;
     499                 :     Datum       constval;
     500                 :     Oid         consttype;
     501                 :     Oid         vartype;
     502                 :     Oid         rdatatype;
     503                 :     Oid         eqopr;
     504                 :     Oid         ltopr;
     505                 :     Oid         geopr;
     506                 :     Pattern_Prefix_Status pstatus;
     507                 :     Const      *patt;
     508            4847 :     Const      *prefix = NULL;
     509            4847 :     Selectivity rest_selec = 0;
     510            4847 :     double      nullfrac = 0.0;
     511                 :     double      result;
     512                 : 
     513                 :     /*
     514                 :      * Initialize result to the appropriate default estimate depending on
     515                 :      * whether it's a match or not-match operator.
     516                 :      */
     517            4847 :     if (negate)
     518             469 :         result = 1.0 - DEFAULT_MATCH_SEL;
     519                 :     else
     520            4378 :         result = DEFAULT_MATCH_SEL;
     521                 : 
     522                 :     /*
     523                 :      * If expression is not variable op constant, then punt and return the
     524                 :      * default estimate.
     525                 :      */
     526            4847 :     if (!get_restriction_variable(root, args, varRelid,
     527                 :                                   &vardata, &other, &varonleft))
     528              68 :         return result;
     529            4779 :     if (!varonleft || !IsA(other, Const))
     530                 :     {
     531              20 :         ReleaseVariableStats(vardata);
     532              20 :         return result;
     533                 :     }
     534                 : 
     535                 :     /*
     536                 :      * If the constant is NULL, assume operator is strict and return zero, ie,
     537                 :      * operator will never return TRUE.  (It's zero even for a negator op.)
     538                 :      */
     539            4759 :     if (((Const *) other)->constisnull)
     540                 :     {
     541 UBC           0 :         ReleaseVariableStats(vardata);
     542               0 :         return 0.0;
     543                 :     }
     544 CBC        4759 :     constval = ((Const *) other)->constvalue;
     545            4759 :     consttype = ((Const *) other)->consttype;
     546                 : 
     547                 :     /*
     548                 :      * The right-hand const is type text or bytea for all supported operators.
     549                 :      * We do not expect to see binary-compatible types here, since
     550                 :      * const-folding should have relabeled the const to exactly match the
     551                 :      * operator's declared type.
     552                 :      */
     553            4759 :     if (consttype != TEXTOID && consttype != BYTEAOID)
     554                 :     {
     555              12 :         ReleaseVariableStats(vardata);
     556              12 :         return result;
     557                 :     }
     558                 : 
     559                 :     /*
     560                 :      * Similarly, the exposed type of the left-hand side should be one of
     561                 :      * those we know.  (Do not look at vardata.atttype, which might be
     562                 :      * something binary-compatible but different.)  We can use it to identify
     563                 :      * the comparison operators and the required type of the comparison
     564                 :      * constant, much as in match_pattern_prefix().
     565                 :      */
     566            4747 :     vartype = vardata.vartype;
     567                 : 
     568            4747 :     switch (vartype)
     569                 :     {
     570             953 :         case TEXTOID:
     571             953 :             eqopr = TextEqualOperator;
     572             953 :             ltopr = TextLessOperator;
     573             953 :             geopr = TextGreaterEqualOperator;
     574             953 :             rdatatype = TEXTOID;
     575             953 :             break;
     576            3747 :         case NAMEOID:
     577                 : 
     578                 :             /*
     579                 :              * Note that here, we need the RHS type to be text, so that the
     580                 :              * comparison value isn't improperly truncated to NAMEDATALEN.
     581                 :              */
     582            3747 :             eqopr = NameEqualTextOperator;
     583            3747 :             ltopr = NameLessTextOperator;
     584            3747 :             geopr = NameGreaterEqualTextOperator;
     585            3747 :             rdatatype = TEXTOID;
     586            3747 :             break;
     587              42 :         case BPCHAROID:
     588              42 :             eqopr = BpcharEqualOperator;
     589              42 :             ltopr = BpcharLessOperator;
     590              42 :             geopr = BpcharGreaterEqualOperator;
     591              42 :             rdatatype = BPCHAROID;
     592              42 :             break;
     593               3 :         case BYTEAOID:
     594               3 :             eqopr = ByteaEqualOperator;
     595               3 :             ltopr = ByteaLessOperator;
     596               3 :             geopr = ByteaGreaterEqualOperator;
     597               3 :             rdatatype = BYTEAOID;
     598               3 :             break;
     599               2 :         default:
     600                 :             /* Can't get here unless we're attached to the wrong operator */
     601               2 :             ReleaseVariableStats(vardata);
     602               2 :             return result;
     603                 :     }
     604                 : 
     605                 :     /*
     606                 :      * Grab the nullfrac for use below.
     607                 :      */
     608            4745 :     if (HeapTupleIsValid(vardata.statsTuple))
     609                 :     {
     610                 :         Form_pg_statistic stats;
     611                 : 
     612            3576 :         stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
     613            3576 :         nullfrac = stats->stanullfrac;
     614                 :     }
     615                 : 
     616                 :     /*
     617                 :      * Pull out any fixed prefix implied by the pattern, and estimate the
     618                 :      * fractional selectivity of the remainder of the pattern.  Unlike many
     619                 :      * other selectivity estimators, we use the pattern operator's actual
     620                 :      * collation for this step.  This is not because we expect the collation
     621                 :      * to make a big difference in the selectivity estimate (it seldom would),
     622                 :      * but because we want to be sure we cache compiled regexps under the
     623                 :      * right cache key, so that they can be re-used at runtime.
     624                 :      */
     625            4745 :     patt = (Const *) other;
     626            4745 :     pstatus = pattern_fixed_prefix(patt, ptype, collation,
     627                 :                                    &prefix, &rest_selec);
     628                 : 
     629                 :     /*
     630                 :      * If necessary, coerce the prefix constant to the right type.  The only
     631                 :      * case where we need to do anything is when converting text to bpchar.
     632                 :      * Those two types are binary-compatible, so relabeling the Const node is
     633                 :      * sufficient.
     634                 :      */
     635            4733 :     if (prefix && prefix->consttype != rdatatype)
     636                 :     {
     637              18 :         Assert(prefix->consttype == TEXTOID &&
     638                 :                rdatatype == BPCHAROID);
     639              18 :         prefix->consttype = rdatatype;
     640                 :     }
     641                 : 
     642            4733 :     if (pstatus == Pattern_Prefix_Exact)
     643                 :     {
     644                 :         /*
     645                 :          * Pattern specifies an exact match, so estimate as for '='
     646                 :          */
     647            2796 :         result = var_eq_const(&vardata, eqopr, collation, prefix->constvalue,
     648                 :                               false, true, false);
     649                 :     }
     650                 :     else
     651                 :     {
     652                 :         /*
     653                 :          * Not exact-match pattern.  If we have a sufficiently large
     654                 :          * histogram, estimate selectivity for the histogram part of the
     655                 :          * population by counting matches in the histogram.  If not, estimate
     656                 :          * selectivity of the fixed prefix and remainder of pattern
     657                 :          * separately, then combine the two to get an estimate of the
     658                 :          * selectivity for the part of the column population represented by
     659                 :          * the histogram.  (For small histograms, we combine these
     660                 :          * approaches.)
     661                 :          *
     662                 :          * We then add up data for any most-common-values values; these are
     663                 :          * not in the histogram population, and we can get exact answers for
     664                 :          * them by applying the pattern operator, so there's no reason to
     665                 :          * approximate.  (If the MCVs cover a significant part of the total
     666                 :          * population, this gives us a big leg up in accuracy.)
     667                 :          */
     668                 :         Selectivity selec;
     669                 :         int         hist_size;
     670                 :         FmgrInfo    opproc;
     671                 :         double      mcv_selec,
     672                 :                     sumcommon;
     673                 : 
     674                 :         /* Try to use the histogram entries to get selectivity */
     675            1937 :         if (!OidIsValid(opfuncid))
     676            1925 :             opfuncid = get_opcode(oprid);
     677            1937 :         fmgr_info(opfuncid, &opproc);
     678                 : 
     679            1937 :         selec = histogram_selectivity(&vardata, &opproc, collation,
     680                 :                                       constval, true,
     681                 :                                       10, 1, &hist_size);
     682                 : 
     683                 :         /* If not at least 100 entries, use the heuristic method */
     684            1937 :         if (hist_size < 100)
     685                 :         {
     686                 :             Selectivity heursel;
     687                 :             Selectivity prefixsel;
     688                 : 
     689            1298 :             if (pstatus == Pattern_Prefix_Partial)
     690             923 :                 prefixsel = prefix_selectivity(root, &vardata,
     691                 :                                                eqopr, ltopr, geopr,
     692                 :                                                collation,
     693                 :                                                prefix);
     694                 :             else
     695             375 :                 prefixsel = 1.0;
     696            1298 :             heursel = prefixsel * rest_selec;
     697                 : 
     698            1298 :             if (selec < 0)       /* fewer than 10 histogram entries? */
     699            1154 :                 selec = heursel;
     700                 :             else
     701                 :             {
     702                 :                 /*
     703                 :                  * For histogram sizes from 10 to 100, we combine the
     704                 :                  * histogram and heuristic selectivities, putting increasingly
     705                 :                  * more trust in the histogram for larger sizes.
     706                 :                  */
     707             144 :                 double      hist_weight = hist_size / 100.0;
     708                 : 
     709             144 :                 selec = selec * hist_weight + heursel * (1.0 - hist_weight);
     710                 :             }
     711                 :         }
     712                 : 
     713                 :         /* In any case, don't believe extremely small or large estimates. */
     714            1937 :         if (selec < 0.0001)
     715             663 :             selec = 0.0001;
     716            1274 :         else if (selec > 0.9999)
     717              68 :             selec = 0.9999;
     718                 : 
     719                 :         /*
     720                 :          * If we have most-common-values info, add up the fractions of the MCV
     721                 :          * entries that satisfy MCV OP PATTERN.  These fractions contribute
     722                 :          * directly to the result selectivity.  Also add up the total fraction
     723                 :          * represented by MCV entries.
     724                 :          */
     725            1937 :         mcv_selec = mcv_selectivity(&vardata, &opproc, collation,
     726                 :                                     constval, true,
     727                 :                                     &sumcommon);
     728                 : 
     729                 :         /*
     730                 :          * Now merge the results from the MCV and histogram calculations,
     731                 :          * realizing that the histogram covers only the non-null values that
     732                 :          * are not listed in MCV.
     733                 :          */
     734            1937 :         selec *= 1.0 - nullfrac - sumcommon;
     735            1937 :         selec += mcv_selec;
     736            1937 :         result = selec;
     737                 :     }
     738                 : 
     739                 :     /* now adjust if we wanted not-match rather than match */
     740            4733 :     if (negate)
     741             413 :         result = 1.0 - result - nullfrac;
     742                 : 
     743                 :     /* result should be in range, but make sure... */
     744            4733 :     CLAMP_PROBABILITY(result);
     745                 : 
     746            4733 :     if (prefix)
     747                 :     {
     748            4433 :         pfree(DatumGetPointer(prefix->constvalue));
     749            4433 :         pfree(prefix);
     750                 :     }
     751                 : 
     752            4733 :     ReleaseVariableStats(vardata);
     753                 : 
     754            4733 :     return result;
     755                 : }
     756                 : 
     757                 : /*
     758                 :  * Fix impedance mismatch between SQL-callable functions and patternsel_common
     759                 :  */
     760                 : static double
     761            4835 : patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
     762                 : {
     763            4835 :     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
     764            4835 :     Oid         operator = PG_GETARG_OID(1);
     765            4835 :     List       *args = (List *) PG_GETARG_POINTER(2);
     766            4835 :     int         varRelid = PG_GETARG_INT32(3);
     767            4835 :     Oid         collation = PG_GET_COLLATION();
     768                 : 
     769                 :     /*
     770                 :      * If this is for a NOT LIKE or similar operator, get the corresponding
     771                 :      * positive-match operator and work with that.
     772                 :      */
     773            4835 :     if (negate)
     774                 :     {
     775             469 :         operator = get_negator(operator);
     776             469 :         if (!OidIsValid(operator))
     777 UBC           0 :             elog(ERROR, "patternsel called for operator without a negator");
     778                 :     }
     779                 : 
     780 CBC        4835 :     return patternsel_common(root,
     781                 :                              operator,
     782                 :                              InvalidOid,
     783                 :                              args,
     784                 :                              varRelid,
     785                 :                              collation,
     786                 :                              ptype,
     787                 :                              negate);
     788                 : }
     789                 : 
     790                 : /*
     791                 :  *      regexeqsel      - Selectivity of regular-expression pattern match.
     792                 :  */
     793                 : Datum
     794            3212 : regexeqsel(PG_FUNCTION_ARGS)
     795                 : {
     796            3212 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, false));
     797                 : }
     798                 : 
     799                 : /*
     800                 :  *      icregexeqsel    - Selectivity of case-insensitive regex match.
     801                 :  */
     802                 : Datum
     803              44 : icregexeqsel(PG_FUNCTION_ARGS)
     804                 : {
     805              44 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, false));
     806                 : }
     807                 : 
     808                 : /*
     809                 :  *      likesel         - Selectivity of LIKE pattern match.
     810                 :  */
     811                 : Datum
     812            1007 : likesel(PG_FUNCTION_ARGS)
     813                 : {
     814            1007 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, false));
     815                 : }
     816                 : 
     817                 : /*
     818                 :  *      prefixsel           - selectivity of prefix operator
     819                 :  */
     820                 : Datum
     821              27 : prefixsel(PG_FUNCTION_ARGS)
     822                 : {
     823              27 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
     824                 : }
     825                 : 
     826                 : /*
     827                 :  *
     828                 :  *      iclikesel           - Selectivity of ILIKE pattern match.
     829                 :  */
     830                 : Datum
     831              76 : iclikesel(PG_FUNCTION_ARGS)
     832                 : {
     833              76 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, false));
     834                 : }
     835                 : 
     836                 : /*
     837                 :  *      regexnesel      - Selectivity of regular-expression pattern non-match.
     838                 :  */
     839                 : Datum
     840             392 : regexnesel(PG_FUNCTION_ARGS)
     841                 : {
     842             392 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, true));
     843                 : }
     844                 : 
     845                 : /*
     846                 :  *      icregexnesel    - Selectivity of case-insensitive regex non-match.
     847                 :  */
     848                 : Datum
     849               8 : icregexnesel(PG_FUNCTION_ARGS)
     850                 : {
     851               8 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, true));
     852                 : }
     853                 : 
     854                 : /*
     855                 :  *      nlikesel        - Selectivity of LIKE pattern non-match.
     856                 :  */
     857                 : Datum
     858              65 : nlikesel(PG_FUNCTION_ARGS)
     859                 : {
     860              65 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, true));
     861                 : }
     862                 : 
     863                 : /*
     864                 :  *      icnlikesel      - Selectivity of ILIKE pattern non-match.
     865                 :  */
     866                 : Datum
     867               4 : icnlikesel(PG_FUNCTION_ARGS)
     868                 : {
     869               4 :     PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
     870                 : }
     871                 : 
     872                 : /*
     873                 :  * patternjoinsel       - Generic code for pattern-match join selectivity.
     874                 :  */
     875                 : static double
     876              91 : patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
     877                 : {
     878                 :     /* For the moment we just punt. */
     879              91 :     return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL;
     880                 : }
     881                 : 
     882                 : /*
     883                 :  *      regexeqjoinsel  - Join selectivity of regular-expression pattern match.
     884                 :  */
     885                 : Datum
     886              91 : regexeqjoinsel(PG_FUNCTION_ARGS)
     887                 : {
     888              91 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, false));
     889                 : }
     890                 : 
     891                 : /*
     892                 :  *      icregexeqjoinsel    - Join selectivity of case-insensitive regex match.
     893                 :  */
     894                 : Datum
     895 UBC           0 : icregexeqjoinsel(PG_FUNCTION_ARGS)
     896                 : {
     897               0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, false));
     898                 : }
     899                 : 
     900                 : /*
     901                 :  *      likejoinsel         - Join selectivity of LIKE pattern match.
     902                 :  */
     903                 : Datum
     904               0 : likejoinsel(PG_FUNCTION_ARGS)
     905                 : {
     906               0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
     907                 : }
     908                 : 
     909                 : /*
     910                 :  *      prefixjoinsel           - Join selectivity of prefix operator
     911                 :  */
     912                 : Datum
     913               0 : prefixjoinsel(PG_FUNCTION_ARGS)
     914                 : {
     915               0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
     916                 : }
     917                 : 
     918                 : /*
     919                 :  *      iclikejoinsel           - Join selectivity of ILIKE pattern match.
     920                 :  */
     921                 : Datum
     922               0 : iclikejoinsel(PG_FUNCTION_ARGS)
     923                 : {
     924               0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, false));
     925                 : }
     926                 : 
     927                 : /*
     928                 :  *      regexnejoinsel  - Join selectivity of regex non-match.
     929                 :  */
     930                 : Datum
     931               0 : regexnejoinsel(PG_FUNCTION_ARGS)
     932                 : {
     933               0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, true));
     934                 : }
     935                 : 
     936                 : /*
     937                 :  *      icregexnejoinsel    - Join selectivity of case-insensitive regex non-match.
     938                 :  */
     939                 : Datum
     940               0 : icregexnejoinsel(PG_FUNCTION_ARGS)
     941                 : {
     942               0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, true));
     943                 : }
     944                 : 
     945                 : /*
     946                 :  *      nlikejoinsel        - Join selectivity of LIKE pattern non-match.
     947                 :  */
     948                 : Datum
     949               0 : nlikejoinsel(PG_FUNCTION_ARGS)
     950                 : {
     951               0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, true));
     952                 : }
     953                 : 
     954                 : /*
     955                 :  *      icnlikejoinsel      - Join selectivity of ILIKE pattern non-match.
     956                 :  */
     957                 : Datum
     958               0 : icnlikejoinsel(PG_FUNCTION_ARGS)
     959                 : {
     960               0 :     PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, true));
     961                 : }
     962                 : 
     963                 : 
     964                 : /*-------------------------------------------------------------------------
     965                 :  *
     966                 :  * Pattern analysis functions
     967                 :  *
     968                 :  * These routines support analysis of LIKE and regular-expression patterns
     969                 :  * by the planner/optimizer.  It's important that they agree with the
     970                 :  * regular-expression code in backend/regex/ and the LIKE code in
     971                 :  * backend/utils/adt/like.c.  Also, the computation of the fixed prefix
     972                 :  * must be conservative: if we report a string longer than the true fixed
     973                 :  * prefix, the query may produce actually wrong answers, rather than just
     974                 :  * getting a bad selectivity estimate!
     975                 :  *
     976                 :  *-------------------------------------------------------------------------
     977                 :  */
     978                 : 
     979                 : /*
     980                 :  * Extract the fixed prefix, if any, for a pattern.
     981                 :  *
     982                 :  * *prefix is set to a palloc'd prefix string (in the form of a Const node),
     983                 :  *  or to NULL if no fixed prefix exists for the pattern.
     984                 :  * If rest_selec is not NULL, *rest_selec is set to an estimate of the
     985                 :  *  selectivity of the remainder of the pattern (without any fixed prefix).
     986                 :  * The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
     987                 :  *
     988                 :  * The return value distinguishes no fixed prefix, a partial prefix,
     989                 :  * or an exact-match-only pattern.
     990                 :  */
     991                 : 
     992                 : static Pattern_Prefix_Status
     993 CBC        1711 : like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
     994                 :                   Const **prefix_const, Selectivity *rest_selec)
     995                 : {
     996                 :     char       *match;
     997                 :     char       *patt;
     998                 :     int         pattlen;
     999            1711 :     Oid         typeid = patt_const->consttype;
    1000                 :     int         pos,
    1001                 :                 match_pos;
    1002            1711 :     bool        is_multibyte = (pg_database_encoding_max_length() > 1);
    1003            1711 :     pg_locale_t locale = 0;
    1004            1711 :     bool        locale_is_c = false;
    1005                 : 
    1006                 :     /* the right-hand const is type text or bytea */
    1007            1711 :     Assert(typeid == BYTEAOID || typeid == TEXTOID);
    1008                 : 
    1009            1711 :     if (case_insensitive)
    1010                 :     {
    1011             134 :         if (typeid == BYTEAOID)
    1012 UBC           0 :             ereport(ERROR,
    1013                 :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1014                 :                      errmsg("case insensitive matching not supported on type bytea")));
    1015                 : 
    1016 CBC         134 :         if (!OidIsValid(collation))
    1017                 :         {
    1018                 :             /*
    1019                 :              * This typically means that the parser could not resolve a
    1020                 :              * conflict of implicit collations, so report it that way.
    1021                 :              */
    1022 UBC           0 :             ereport(ERROR,
    1023                 :                     (errcode(ERRCODE_INDETERMINATE_COLLATION),
    1024                 :                      errmsg("could not determine which collation to use for ILIKE"),
    1025                 :                      errhint("Use the COLLATE clause to set the collation explicitly.")));
    1026                 :         }
    1027                 : 
    1028                 :         /* If case-insensitive, we need locale info */
    1029 CBC         134 :         if (lc_ctype_is_c(collation))
    1030              58 :             locale_is_c = true;
    1031                 :         else
    1032              76 :             locale = pg_newlocale_from_collation(collation);
    1033                 :     }
    1034                 : 
    1035            1711 :     if (typeid != BYTEAOID)
    1036                 :     {
    1037            1705 :         patt = TextDatumGetCString(patt_const->constvalue);
    1038            1705 :         pattlen = strlen(patt);
    1039                 :     }
    1040                 :     else
    1041                 :     {
    1042               6 :         bytea      *bstr = DatumGetByteaPP(patt_const->constvalue);
    1043                 : 
    1044               6 :         pattlen = VARSIZE_ANY_EXHDR(bstr);
    1045               6 :         patt = (char *) palloc(pattlen);
    1046               6 :         memcpy(patt, VARDATA_ANY(bstr), pattlen);
    1047               6 :         Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue));
    1048                 :     }
    1049                 : 
    1050            1711 :     match = palloc(pattlen + 1);
    1051            1711 :     match_pos = 0;
    1052           10611 :     for (pos = 0; pos < pattlen; pos++)
    1053                 :     {
    1054                 :         /* % and _ are wildcard characters in LIKE */
    1055           10564 :         if (patt[pos] == '%' ||
    1056            9489 :             patt[pos] == '_')
    1057                 :             break;
    1058                 : 
    1059                 :         /* Backslash escapes the next character */
    1060            8999 :         if (patt[pos] == '\\')
    1061                 :         {
    1062             131 :             pos++;
    1063             131 :             if (pos >= pattlen)
    1064 UBC           0 :                 break;
    1065                 :         }
    1066                 : 
    1067                 :         /* Stop if case-varying character (it's sort of a wildcard) */
    1068 CBC        9146 :         if (case_insensitive &&
    1069             147 :             pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
    1070              99 :             break;
    1071                 : 
    1072            8900 :         match[match_pos++] = patt[pos];
    1073                 :     }
    1074                 : 
    1075            1711 :     match[match_pos] = '\0';
    1076                 : 
    1077            1711 :     if (typeid != BYTEAOID)
    1078            1705 :         *prefix_const = string_to_const(match, typeid);
    1079                 :     else
    1080               6 :         *prefix_const = string_to_bytea_const(match, match_pos);
    1081                 : 
    1082            1711 :     if (rest_selec != NULL)
    1083            1144 :         *rest_selec = like_selectivity(&patt[pos], pattlen - pos,
    1084                 :                                        case_insensitive);
    1085                 : 
    1086            1711 :     pfree(patt);
    1087            1711 :     pfree(match);
    1088                 : 
    1089                 :     /* in LIKE, an empty pattern is an exact match! */
    1090            1711 :     if (pos == pattlen)
    1091              47 :         return Pattern_Prefix_Exact;    /* reached end of pattern, so exact */
    1092                 : 
    1093            1664 :     if (match_pos > 0)
    1094            1387 :         return Pattern_Prefix_Partial;
    1095                 : 
    1096             277 :     return Pattern_Prefix_None;
    1097                 : }
    1098                 : 
    1099                 : static Pattern_Prefix_Status
    1100            6461 : regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
    1101                 :                    Const **prefix_const, Selectivity *rest_selec)
    1102                 : {
    1103            6461 :     Oid         typeid = patt_const->consttype;
    1104                 :     char       *prefix;
    1105                 :     bool        exact;
    1106                 : 
    1107                 :     /*
    1108                 :      * Should be unnecessary, there are no bytea regex operators defined. As
    1109                 :      * such, it should be noted that the rest of this function has *not* been
    1110                 :      * made safe for binary (possibly NULL containing) strings.
    1111                 :      */
    1112            6461 :     if (typeid == BYTEAOID)
    1113 UBC           0 :         ereport(ERROR,
    1114                 :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1115                 :                  errmsg("regular-expression matching not supported on type bytea")));
    1116                 : 
    1117                 :     /* Use the regexp machinery to extract the prefix, if any */
    1118 CBC        6461 :     prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
    1119                 :                                  case_insensitive, collation,
    1120                 :                                  &exact);
    1121                 : 
    1122            6449 :     if (prefix == NULL)
    1123                 :     {
    1124             364 :         *prefix_const = NULL;
    1125                 : 
    1126             364 :         if (rest_selec != NULL)
    1127                 :         {
    1128             300 :             char       *patt = TextDatumGetCString(patt_const->constvalue);
    1129                 : 
    1130             300 :             *rest_selec = regex_selectivity(patt, strlen(patt),
    1131                 :                                             case_insensitive,
    1132                 :                                             0);
    1133             300 :             pfree(patt);
    1134                 :         }
    1135                 : 
    1136             364 :         return Pattern_Prefix_None;
    1137                 :     }
    1138                 : 
    1139            6085 :     *prefix_const = string_to_const(prefix, typeid);
    1140                 : 
    1141            6085 :     if (rest_selec != NULL)
    1142                 :     {
    1143            3250 :         if (exact)
    1144                 :         {
    1145                 :             /* Exact match, so there's no additional selectivity */
    1146            2767 :             *rest_selec = 1.0;
    1147                 :         }
    1148                 :         else
    1149                 :         {
    1150             483 :             char       *patt = TextDatumGetCString(patt_const->constvalue);
    1151                 : 
    1152             966 :             *rest_selec = regex_selectivity(patt, strlen(patt),
    1153                 :                                             case_insensitive,
    1154             483 :                                             strlen(prefix));
    1155             483 :             pfree(patt);
    1156                 :         }
    1157                 :     }
    1158                 : 
    1159            6085 :     pfree(prefix);
    1160                 : 
    1161            6085 :     if (exact)
    1162            5459 :         return Pattern_Prefix_Exact;    /* pattern specifies exact match */
    1163                 :     else
    1164             626 :         return Pattern_Prefix_Partial;
    1165                 : }
    1166                 : 
    1167                 : static Pattern_Prefix_Status
    1168            8223 : pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
    1169                 :                      Const **prefix, Selectivity *rest_selec)
    1170                 : {
    1171                 :     Pattern_Prefix_Status result;
    1172                 : 
    1173            8223 :     switch (ptype)
    1174                 :     {
    1175            1577 :         case Pattern_Type_Like:
    1176            1577 :             result = like_fixed_prefix(patt, false, collation,
    1177                 :                                        prefix, rest_selec);
    1178            1577 :             break;
    1179             134 :         case Pattern_Type_Like_IC:
    1180             134 :             result = like_fixed_prefix(patt, true, collation,
    1181                 :                                        prefix, rest_selec);
    1182             134 :             break;
    1183            6415 :         case Pattern_Type_Regex:
    1184            6415 :             result = regex_fixed_prefix(patt, false, collation,
    1185                 :                                         prefix, rest_selec);
    1186            6403 :             break;
    1187              46 :         case Pattern_Type_Regex_IC:
    1188              46 :             result = regex_fixed_prefix(patt, true, collation,
    1189                 :                                         prefix, rest_selec);
    1190              46 :             break;
    1191              51 :         case Pattern_Type_Prefix:
    1192                 :             /* Prefix type work is trivial.  */
    1193              51 :             result = Pattern_Prefix_Partial;
    1194              51 :             *prefix = makeConst(patt->consttype,
    1195                 :                                 patt->consttypmod,
    1196                 :                                 patt->constcollid,
    1197                 :                                 patt->constlen,
    1198                 :                                 datumCopy(patt->constvalue,
    1199              51 :                                           patt->constbyval,
    1200                 :                                           patt->constlen),
    1201              51 :                                 patt->constisnull,
    1202              51 :                                 patt->constbyval);
    1203              51 :             if (rest_selec != NULL)
    1204              39 :                 *rest_selec = 1.0;  /* all */
    1205              51 :             break;
    1206 UBC           0 :         default:
    1207               0 :             elog(ERROR, "unrecognized ptype: %d", (int) ptype);
    1208                 :             result = Pattern_Prefix_None;   /* keep compiler quiet */
    1209                 :             break;
    1210                 :     }
    1211 CBC        8211 :     return result;
    1212                 : }
    1213                 : 
    1214                 : /*
    1215                 :  * Estimate the selectivity of a fixed prefix for a pattern match.
    1216                 :  *
    1217                 :  * A fixed prefix "foo" is estimated as the selectivity of the expression
    1218                 :  * "variable >= 'foo' AND variable < 'fop'".
    1219                 :  *
    1220                 :  * The selectivity estimate is with respect to the portion of the column
    1221                 :  * population represented by the histogram --- the caller must fold this
    1222                 :  * together with info about MCVs and NULLs.
    1223                 :  *
    1224                 :  * We use the given comparison operators and collation to do the estimation.
    1225                 :  * The given variable and Const must be of the associated datatype(s).
    1226                 :  *
    1227                 :  * XXX Note: we make use of the upper bound to estimate operator selectivity
    1228                 :  * even if the locale is such that we cannot rely on the upper-bound string.
    1229                 :  * The selectivity only needs to be approximately right anyway, so it seems
    1230                 :  * more useful to use the upper-bound code than not.
    1231                 :  */
    1232                 : static Selectivity
    1233             923 : prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
    1234                 :                    Oid eqopr, Oid ltopr, Oid geopr,
    1235                 :                    Oid collation,
    1236                 :                    Const *prefixcon)
    1237                 : {
    1238                 :     Selectivity prefixsel;
    1239                 :     FmgrInfo    opproc;
    1240                 :     Const      *greaterstrcon;
    1241                 :     Selectivity eq_sel;
    1242                 : 
    1243                 :     /* Estimate the selectivity of "x >= prefix" */
    1244             923 :     fmgr_info(get_opcode(geopr), &opproc);
    1245                 : 
    1246             923 :     prefixsel = ineq_histogram_selectivity(root, vardata,
    1247                 :                                            geopr, &opproc, true, true,
    1248                 :                                            collation,
    1249                 :                                            prefixcon->constvalue,
    1250                 :                                            prefixcon->consttype);
    1251                 : 
    1252             923 :     if (prefixsel < 0.0)
    1253                 :     {
    1254                 :         /* No histogram is present ... return a suitable default estimate */
    1255             534 :         return DEFAULT_MATCH_SEL;
    1256                 :     }
    1257                 : 
    1258                 :     /*
    1259                 :      * If we can create a string larger than the prefix, say "x < greaterstr".
    1260                 :      */
    1261             389 :     fmgr_info(get_opcode(ltopr), &opproc);
    1262             389 :     greaterstrcon = make_greater_string(prefixcon, &opproc, collation);
    1263             389 :     if (greaterstrcon)
    1264                 :     {
    1265                 :         Selectivity topsel;
    1266                 : 
    1267             389 :         topsel = ineq_histogram_selectivity(root, vardata,
    1268                 :                                             ltopr, &opproc, false, false,
    1269                 :                                             collation,
    1270                 :                                             greaterstrcon->constvalue,
    1271                 :                                             greaterstrcon->consttype);
    1272                 : 
    1273                 :         /* ineq_histogram_selectivity worked before, it shouldn't fail now */
    1274             389 :         Assert(topsel >= 0.0);
    1275                 : 
    1276                 :         /*
    1277                 :          * Merge the two selectivities in the same way as for a range query
    1278                 :          * (see clauselist_selectivity()).  Note that we don't need to worry
    1279                 :          * about double-exclusion of nulls, since ineq_histogram_selectivity
    1280                 :          * doesn't count those anyway.
    1281                 :          */
    1282             389 :         prefixsel = topsel + prefixsel - 1.0;
    1283                 :     }
    1284                 : 
    1285                 :     /*
    1286                 :      * If the prefix is long then the two bounding values might be too close
    1287                 :      * together for the histogram to distinguish them usefully, resulting in a
    1288                 :      * zero estimate (plus or minus roundoff error). To avoid returning a
    1289                 :      * ridiculously small estimate, compute the estimated selectivity for
    1290                 :      * "variable = 'foo'", and clamp to that. (Obviously, the resultant
    1291                 :      * estimate should be at least that.)
    1292                 :      *
    1293                 :      * We apply this even if we couldn't make a greater string.  That case
    1294                 :      * suggests that the prefix is near the maximum possible, and thus
    1295                 :      * probably off the end of the histogram, and thus we probably got a very
    1296                 :      * small estimate from the >= condition; so we still need to clamp.
    1297                 :      */
    1298             389 :     eq_sel = var_eq_const(vardata, eqopr, collation, prefixcon->constvalue,
    1299                 :                           false, true, false);
    1300                 : 
    1301             389 :     prefixsel = Max(prefixsel, eq_sel);
    1302                 : 
    1303             389 :     return prefixsel;
    1304                 : }
    1305                 : 
    1306                 : 
    1307                 : /*
    1308                 :  * Estimate the selectivity of a pattern of the specified type.
    1309                 :  * Note that any fixed prefix of the pattern will have been removed already,
    1310                 :  * so actually we may be looking at just a fragment of the pattern.
    1311                 :  *
    1312                 :  * For now, we use a very simplistic approach: fixed characters reduce the
    1313                 :  * selectivity a good deal, character ranges reduce it a little,
    1314                 :  * wildcards (such as % for LIKE or .* for regex) increase it.
    1315                 :  */
    1316                 : 
    1317                 : #define FIXED_CHAR_SEL  0.20    /* about 1/5 */
    1318                 : #define CHAR_RANGE_SEL  0.25
    1319                 : #define ANY_CHAR_SEL    0.9     /* not 1, since it won't match end-of-string */
    1320                 : #define FULL_WILDCARD_SEL 5.0
    1321                 : #define PARTIAL_WILDCARD_SEL 2.0
    1322                 : 
    1323                 : static Selectivity
    1324            1144 : like_selectivity(const char *patt, int pattlen, bool case_insensitive)
    1325                 : {
    1326            1144 :     Selectivity sel = 1.0;
    1327                 :     int         pos;
    1328                 : 
    1329                 :     /* Skip any leading wildcard; it's already factored into initial sel */
    1330            2219 :     for (pos = 0; pos < pattlen; pos++)
    1331                 :     {
    1332            1482 :         if (patt[pos] != '%' && patt[pos] != '_')
    1333             407 :             break;
    1334                 :     }
    1335                 : 
    1336            3363 :     for (; pos < pattlen; pos++)
    1337                 :     {
    1338                 :         /* % and _ are wildcard characters in LIKE */
    1339            2219 :         if (patt[pos] == '%')
    1340             355 :             sel *= FULL_WILDCARD_SEL;
    1341            1864 :         else if (patt[pos] == '_')
    1342              76 :             sel *= ANY_CHAR_SEL;
    1343            1788 :         else if (patt[pos] == '\\')
    1344                 :         {
    1345                 :             /* Backslash quotes the next character */
    1346              20 :             pos++;
    1347              20 :             if (pos >= pattlen)
    1348 UBC           0 :                 break;
    1349 CBC          20 :             sel *= FIXED_CHAR_SEL;
    1350                 :         }
    1351                 :         else
    1352            1768 :             sel *= FIXED_CHAR_SEL;
    1353                 :     }
    1354                 :     /* Could get sel > 1 if multiple wildcards */
    1355            1144 :     if (sel > 1.0)
    1356 UBC           0 :         sel = 1.0;
    1357 CBC        1144 :     return sel;
    1358                 : }
    1359                 : 
    1360                 : static Selectivity
    1361             926 : regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
    1362                 : {
    1363             926 :     Selectivity sel = 1.0;
    1364             926 :     int         paren_depth = 0;
    1365             926 :     int         paren_pos = 0;  /* dummy init to keep compiler quiet */
    1366                 :     int         pos;
    1367                 : 
    1368                 :     /* since this function recurses, it could be driven to stack overflow */
    1369             926 :     check_stack_depth();
    1370                 : 
    1371            8978 :     for (pos = 0; pos < pattlen; pos++)
    1372                 :     {
    1373            8061 :         if (patt[pos] == '(')
    1374                 :         {
    1375             137 :             if (paren_depth == 0)
    1376             134 :                 paren_pos = pos;    /* remember start of parenthesized item */
    1377             137 :             paren_depth++;
    1378                 :         }
    1379            7924 :         else if (patt[pos] == ')' && paren_depth > 0)
    1380                 :         {
    1381             137 :             paren_depth--;
    1382             137 :             if (paren_depth == 0)
    1383             134 :                 sel *= regex_selectivity_sub(patt + (paren_pos + 1),
    1384             134 :                                              pos - (paren_pos + 1),
    1385                 :                                              case_insensitive);
    1386                 :         }
    1387            7787 :         else if (patt[pos] == '|' && paren_depth == 0)
    1388                 :         {
    1389                 :             /*
    1390                 :              * If unquoted | is present at paren level 0 in pattern, we have
    1391                 :              * multiple alternatives; sum their probabilities.
    1392                 :              */
    1393              18 :             sel += regex_selectivity_sub(patt + (pos + 1),
    1394               9 :                                          pattlen - (pos + 1),
    1395                 :                                          case_insensitive);
    1396               9 :             break;              /* rest of pattern is now processed */
    1397                 :         }
    1398            7778 :         else if (patt[pos] == '[')
    1399                 :         {
    1400              38 :             bool        negclass = false;
    1401                 : 
    1402              38 :             if (patt[++pos] == '^')
    1403                 :             {
    1404 UBC           0 :                 negclass = true;
    1405               0 :                 pos++;
    1406                 :             }
    1407 CBC          38 :             if (patt[pos] == ']')   /* ']' at start of class is not special */
    1408 UBC           0 :                 pos++;
    1409 CBC         186 :             while (pos < pattlen && patt[pos] != ']')
    1410             148 :                 pos++;
    1411              38 :             if (paren_depth == 0)
    1412              38 :                 sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
    1413                 :         }
    1414            7740 :         else if (patt[pos] == '.')
    1415                 :         {
    1416             323 :             if (paren_depth == 0)
    1417             193 :                 sel *= ANY_CHAR_SEL;
    1418                 :         }
    1419            7417 :         else if (patt[pos] == '*' ||
    1420            7134 :                  patt[pos] == '?' ||
    1421            7108 :                  patt[pos] == '+')
    1422                 :         {
    1423                 :             /* Ought to be smarter about quantifiers... */
    1424             316 :             if (paren_depth == 0)
    1425             181 :                 sel *= PARTIAL_WILDCARD_SEL;
    1426                 :         }
    1427            7101 :         else if (patt[pos] == '{')
    1428                 :         {
    1429             132 :             while (pos < pattlen && patt[pos] != '}')
    1430              94 :                 pos++;
    1431              38 :             if (paren_depth == 0)
    1432              32 :                 sel *= PARTIAL_WILDCARD_SEL;
    1433                 :         }
    1434            7063 :         else if (patt[pos] == '\\')
    1435                 :         {
    1436                 :             /* backslash quotes the next character */
    1437             100 :             pos++;
    1438             100 :             if (pos >= pattlen)
    1439 UBC           0 :                 break;
    1440 CBC         100 :             if (paren_depth == 0)
    1441              52 :                 sel *= FIXED_CHAR_SEL;
    1442                 :         }
    1443                 :         else
    1444                 :         {
    1445            6963 :             if (paren_depth == 0)
    1446            6056 :                 sel *= FIXED_CHAR_SEL;
    1447                 :         }
    1448                 :     }
    1449                 :     /* Could get sel > 1 if multiple wildcards */
    1450             926 :     if (sel > 1.0)
    1451              13 :         sel = 1.0;
    1452             926 :     return sel;
    1453                 : }
    1454                 : 
    1455                 : static Selectivity
    1456             783 : regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
    1457                 :                   int fixed_prefix_len)
    1458                 : {
    1459                 :     Selectivity sel;
    1460                 : 
    1461                 :     /* If patt doesn't end with $, consider it to have a trailing wildcard */
    1462             783 :     if (pattlen > 0 && patt[pattlen - 1] == '$' &&
    1463             137 :         (pattlen == 1 || patt[pattlen - 2] != '\\'))
    1464                 :     {
    1465                 :         /* has trailing $ */
    1466             137 :         sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive);
    1467                 :     }
    1468                 :     else
    1469                 :     {
    1470                 :         /* no trailing $ */
    1471             646 :         sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
    1472             646 :         sel *= FULL_WILDCARD_SEL;
    1473                 :     }
    1474                 : 
    1475                 :     /*
    1476                 :      * If there's a fixed prefix, discount its selectivity.  We have to be
    1477                 :      * careful here since a very long prefix could result in pow's result
    1478                 :      * underflowing to zero (in which case "sel" probably has as well).
    1479                 :      */
    1480             783 :     if (fixed_prefix_len > 0)
    1481                 :     {
    1482             483 :         double      prefixsel = pow(FIXED_CHAR_SEL, fixed_prefix_len);
    1483                 : 
    1484             483 :         if (prefixsel > 0.0)
    1485             483 :             sel /= prefixsel;
    1486                 :     }
    1487                 : 
    1488                 :     /* Make sure result stays in range */
    1489             783 :     CLAMP_PROBABILITY(sel);
    1490             783 :     return sel;
    1491                 : }
    1492                 : 
    1493                 : /*
    1494                 :  * Check whether char is a letter (and, hence, subject to case-folding)
    1495                 :  *
    1496                 :  * In multibyte character sets or with ICU, we can't use isalpha, and it does
    1497                 :  * not seem worth trying to convert to wchar_t to use iswalpha or u_isalpha.
    1498                 :  * Instead, just assume any non-ASCII char is potentially case-varying, and
    1499                 :  * hard-wire knowledge of which ASCII chars are letters.
    1500                 :  */
    1501                 : static int
    1502             147 : pattern_char_isalpha(char c, bool is_multibyte,
    1503                 :                      pg_locale_t locale, bool locale_is_c)
    1504                 : {
    1505             147 :     if (locale_is_c)
    1506              78 :         return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
    1507              69 :     else if (is_multibyte && IS_HIGHBIT_SET(c))
    1508 UBC           0 :         return true;
    1509 CBC          69 :     else if (locale && locale->provider == COLLPROVIDER_ICU)
    1510              63 :         return IS_HIGHBIT_SET(c) ||
    1511             126 :             (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
    1512                 : #ifdef HAVE_LOCALE_T
    1513               6 :     else if (locale && locale->provider == COLLPROVIDER_LIBC)
    1514               6 :         return isalpha_l((unsigned char) c, locale->info.lt);
    1515                 : #endif
    1516                 :     else
    1517 UBC           0 :         return isalpha((unsigned char) c);
    1518                 : }
    1519                 : 
    1520                 : 
    1521                 : /*
    1522                 :  * For bytea, the increment function need only increment the current byte
    1523                 :  * (there are no multibyte characters to worry about).
    1524                 :  */
    1525                 : static bool
    1526               0 : byte_increment(unsigned char *ptr, int len)
    1527                 : {
    1528               0 :     if (*ptr >= 255)
    1529               0 :         return false;
    1530               0 :     (*ptr)++;
    1531               0 :     return true;
    1532                 : }
    1533                 : 
    1534                 : /*
    1535                 :  * Try to generate a string greater than the given string or any
    1536                 :  * string it is a prefix of.  If successful, return a palloc'd string
    1537                 :  * in the form of a Const node; else return NULL.
    1538                 :  *
    1539                 :  * The caller must provide the appropriate "less than" comparison function
    1540                 :  * for testing the strings, along with the collation to use.
    1541                 :  *
    1542                 :  * The key requirement here is that given a prefix string, say "foo",
    1543                 :  * we must be able to generate another string "fop" that is greater than
    1544                 :  * all strings "foobar" starting with "foo".  We can test that we have
    1545                 :  * generated a string greater than the prefix string, but in non-C collations
    1546                 :  * that is not a bulletproof guarantee that an extension of the string might
    1547                 :  * not sort after it; an example is that "foo " is less than "foo!", but it
    1548                 :  * is not clear that a "dictionary" sort ordering will consider "foo!" less
    1549                 :  * than "foo bar".  CAUTION: Therefore, this function should be used only for
    1550                 :  * estimation purposes when working in a non-C collation.
    1551                 :  *
    1552                 :  * To try to catch most cases where an extended string might otherwise sort
    1553                 :  * before the result value, we determine which of the strings "Z", "z", "y",
    1554                 :  * and "9" is seen as largest by the collation, and append that to the given
    1555                 :  * prefix before trying to find a string that compares as larger.
    1556                 :  *
    1557                 :  * To search for a greater string, we repeatedly "increment" the rightmost
    1558                 :  * character, using an encoding-specific character incrementer function.
    1559                 :  * When it's no longer possible to increment the last character, we truncate
    1560                 :  * off that character and start incrementing the next-to-rightmost.
    1561                 :  * For example, if "z" were the last character in the sort order, then we
    1562                 :  * could produce "foo" as a string greater than "fonz".
    1563                 :  *
    1564                 :  * This could be rather slow in the worst case, but in most cases we
    1565                 :  * won't have to try more than one or two strings before succeeding.
    1566                 :  *
    1567                 :  * Note that it's important for the character incrementer not to be too anal
    1568                 :  * about producing every possible character code, since in some cases the only
    1569                 :  * way to get a larger string is to increment a previous character position.
    1570                 :  * So we don't want to spend too much time trying every possible character
    1571                 :  * code at the last position.  A good rule of thumb is to be sure that we
    1572                 :  * don't try more than 256*K values for a K-byte character (and definitely
    1573                 :  * not 256^K, which is what an exhaustive search would approach).
    1574                 :  */
    1575                 : static Const *
    1576 CBC         980 : make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
    1577                 : {
    1578             980 :     Oid         datatype = str_const->consttype;
    1579                 :     char       *workstr;
    1580                 :     int         len;
    1581                 :     Datum       cmpstr;
    1582             980 :     char       *cmptxt = NULL;
    1583                 :     mbcharacter_incrementer charinc;
    1584                 : 
    1585                 :     /*
    1586                 :      * Get a modifiable copy of the prefix string in C-string format, and set
    1587                 :      * up the string we will compare to as a Datum.  In C locale this can just
    1588                 :      * be the given prefix string, otherwise we need to add a suffix.  Type
    1589                 :      * BYTEA sorts bytewise so it never needs a suffix either.
    1590                 :      */
    1591             980 :     if (datatype == BYTEAOID)
    1592                 :     {
    1593 UBC           0 :         bytea      *bstr = DatumGetByteaPP(str_const->constvalue);
    1594                 : 
    1595               0 :         len = VARSIZE_ANY_EXHDR(bstr);
    1596               0 :         workstr = (char *) palloc(len);
    1597               0 :         memcpy(workstr, VARDATA_ANY(bstr), len);
    1598               0 :         Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue));
    1599               0 :         cmpstr = str_const->constvalue;
    1600                 :     }
    1601                 :     else
    1602                 :     {
    1603 CBC         980 :         if (datatype == NAMEOID)
    1604 UBC           0 :             workstr = DatumGetCString(DirectFunctionCall1(nameout,
    1605                 :                                                           str_const->constvalue));
    1606                 :         else
    1607 CBC         980 :             workstr = TextDatumGetCString(str_const->constvalue);
    1608             980 :         len = strlen(workstr);
    1609             980 :         if (lc_collate_is_c(collation) || len == 0)
    1610             967 :             cmpstr = str_const->constvalue;
    1611                 :         else
    1612                 :         {
    1613                 :             /* If first time through, determine the suffix to use */
    1614                 :             static char suffixchar = 0;
    1615                 :             static Oid  suffixcollation = 0;
    1616                 : 
    1617              13 :             if (!suffixchar || suffixcollation != collation)
    1618                 :             {
    1619                 :                 char       *best;
    1620                 : 
    1621               4 :                 best = "Z";
    1622               4 :                 if (varstr_cmp(best, 1, "z", 1, collation) < 0)
    1623               3 :                     best = "z";
    1624               4 :                 if (varstr_cmp(best, 1, "y", 1, collation) < 0)
    1625 UBC           0 :                     best = "y";
    1626 CBC           4 :                 if (varstr_cmp(best, 1, "9", 1, collation) < 0)
    1627 UBC           0 :                     best = "9";
    1628 CBC           4 :                 suffixchar = *best;
    1629               4 :                 suffixcollation = collation;
    1630                 :             }
    1631                 : 
    1632                 :             /* And build the string to compare to */
    1633              13 :             if (datatype == NAMEOID)
    1634                 :             {
    1635 UBC           0 :                 cmptxt = palloc(len + 2);
    1636               0 :                 memcpy(cmptxt, workstr, len);
    1637               0 :                 cmptxt[len] = suffixchar;
    1638               0 :                 cmptxt[len + 1] = '\0';
    1639               0 :                 cmpstr = PointerGetDatum(cmptxt);
    1640                 :             }
    1641                 :             else
    1642                 :             {
    1643 CBC          13 :                 cmptxt = palloc(VARHDRSZ + len + 1);
    1644              13 :                 SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
    1645              13 :                 memcpy(VARDATA(cmptxt), workstr, len);
    1646              13 :                 *(VARDATA(cmptxt) + len) = suffixchar;
    1647              13 :                 cmpstr = PointerGetDatum(cmptxt);
    1648                 :             }
    1649                 :         }
    1650                 :     }
    1651                 : 
    1652                 :     /* Select appropriate character-incrementer function */
    1653             980 :     if (datatype == BYTEAOID)
    1654 UBC           0 :         charinc = byte_increment;
    1655                 :     else
    1656 CBC         980 :         charinc = pg_database_encoding_character_incrementer();
    1657                 : 
    1658                 :     /* And search ... */
    1659             980 :     while (len > 0)
    1660                 :     {
    1661                 :         int         charlen;
    1662                 :         unsigned char *lastchar;
    1663                 : 
    1664                 :         /* Identify the last character --- for bytea, just the last byte */
    1665             980 :         if (datatype == BYTEAOID)
    1666 UBC           0 :             charlen = 1;
    1667                 :         else
    1668 CBC         980 :             charlen = len - pg_mbcliplen(workstr, len, len - 1);
    1669             980 :         lastchar = (unsigned char *) (workstr + len - charlen);
    1670                 : 
    1671                 :         /*
    1672                 :          * Try to generate a larger string by incrementing the last character
    1673                 :          * (for BYTEA, we treat each byte as a character).
    1674                 :          *
    1675                 :          * Note: the incrementer function is expected to return true if it's
    1676                 :          * generated a valid-per-the-encoding new character, otherwise false.
    1677                 :          * The contents of the character on false return are unspecified.
    1678                 :          */
    1679             980 :         while (charinc(lastchar, charlen))
    1680                 :         {
    1681                 :             Const      *workstr_const;
    1682                 : 
    1683             980 :             if (datatype == BYTEAOID)
    1684 UBC           0 :                 workstr_const = string_to_bytea_const(workstr, len);
    1685                 :             else
    1686 CBC         980 :                 workstr_const = string_to_const(workstr, datatype);
    1687                 : 
    1688             980 :             if (DatumGetBool(FunctionCall2Coll(ltproc,
    1689                 :                                                collation,
    1690                 :                                                cmpstr,
    1691                 :                                                workstr_const->constvalue)))
    1692                 :             {
    1693                 :                 /* Successfully made a string larger than cmpstr */
    1694             980 :                 if (cmptxt)
    1695              13 :                     pfree(cmptxt);
    1696             980 :                 pfree(workstr);
    1697             980 :                 return workstr_const;
    1698                 :             }
    1699                 : 
    1700                 :             /* No good, release unusable value and try again */
    1701 UBC           0 :             pfree(DatumGetPointer(workstr_const->constvalue));
    1702               0 :             pfree(workstr_const);
    1703                 :         }
    1704                 : 
    1705                 :         /*
    1706                 :          * No luck here, so truncate off the last character and try to
    1707                 :          * increment the next one.
    1708                 :          */
    1709               0 :         len -= charlen;
    1710               0 :         workstr[len] = '\0';
    1711                 :     }
    1712                 : 
    1713                 :     /* Failed... */
    1714               0 :     if (cmptxt)
    1715               0 :         pfree(cmptxt);
    1716               0 :     pfree(workstr);
    1717                 : 
    1718               0 :     return NULL;
    1719                 : }
    1720                 : 
    1721                 : /*
    1722                 :  * Generate a Datum of the appropriate type from a C string.
    1723                 :  * Note that all of the supported types are pass-by-ref, so the
    1724                 :  * returned value should be pfree'd if no longer needed.
    1725                 :  */
    1726                 : static Datum
    1727 CBC        8770 : string_to_datum(const char *str, Oid datatype)
    1728                 : {
    1729            8770 :     Assert(str != NULL);
    1730                 : 
    1731                 :     /*
    1732                 :      * We cheat a little by assuming that CStringGetTextDatum() will do for
    1733                 :      * bpchar and varchar constants too...
    1734                 :      */
    1735            8770 :     if (datatype == NAMEOID)
    1736 UBC           0 :         return DirectFunctionCall1(namein, CStringGetDatum(str));
    1737 CBC        8770 :     else if (datatype == BYTEAOID)
    1738 UBC           0 :         return DirectFunctionCall1(byteain, CStringGetDatum(str));
    1739                 :     else
    1740 CBC        8770 :         return CStringGetTextDatum(str);
    1741                 : }
    1742                 : 
    1743                 : /*
    1744                 :  * Generate a Const node of the appropriate type from a C string.
    1745                 :  */
    1746                 : static Const *
    1747            8770 : string_to_const(const char *str, Oid datatype)
    1748                 : {
    1749            8770 :     Datum       conval = string_to_datum(str, datatype);
    1750                 :     Oid         collation;
    1751                 :     int         constlen;
    1752                 : 
    1753                 :     /*
    1754                 :      * We only need to support a few datatypes here, so hard-wire properties
    1755                 :      * instead of incurring the expense of catalog lookups.
    1756                 :      */
    1757            8770 :     switch (datatype)
    1758                 :     {
    1759            8770 :         case TEXTOID:
    1760                 :         case VARCHAROID:
    1761                 :         case BPCHAROID:
    1762            8770 :             collation = DEFAULT_COLLATION_OID;
    1763            8770 :             constlen = -1;
    1764            8770 :             break;
    1765                 : 
    1766 UBC           0 :         case NAMEOID:
    1767               0 :             collation = C_COLLATION_OID;
    1768               0 :             constlen = NAMEDATALEN;
    1769               0 :             break;
    1770                 : 
    1771               0 :         case BYTEAOID:
    1772               0 :             collation = InvalidOid;
    1773               0 :             constlen = -1;
    1774               0 :             break;
    1775                 : 
    1776               0 :         default:
    1777               0 :             elog(ERROR, "unexpected datatype in string_to_const: %u",
    1778                 :                  datatype);
    1779                 :             return NULL;
    1780                 :     }
    1781                 : 
    1782 CBC        8770 :     return makeConst(datatype, -1, collation, constlen,
    1783                 :                      conval, false, false);
    1784                 : }
    1785                 : 
    1786                 : /*
    1787                 :  * Generate a Const node of bytea type from a binary C string and a length.
    1788                 :  */
    1789                 : static Const *
    1790               6 : string_to_bytea_const(const char *str, size_t str_len)
    1791                 : {
    1792               6 :     bytea      *bstr = palloc(VARHDRSZ + str_len);
    1793                 :     Datum       conval;
    1794                 : 
    1795               6 :     memcpy(VARDATA(bstr), str, str_len);
    1796               6 :     SET_VARSIZE(bstr, VARHDRSZ + str_len);
    1797               6 :     conval = PointerGetDatum(bstr);
    1798                 : 
    1799               6 :     return makeConst(BYTEAOID, -1, InvalidOid, -1, conval, false, false);
    1800                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a