LCOV - differential code coverage report
Current view: top level - src/backend/optimizer/path - clausesel.c (source / functions) Coverage Total Hit LBC UIC UBC GBC GIC GNC CBC EUB ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 95.0 % 241 229 2 6 4 2 67 5 155 6 64 8
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 8 8 3 1 4 2 1
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * clausesel.c
       4                 :  *    Routines to compute clause selectivities
       5                 :  *
       6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       7                 :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :  *
       9                 :  *
      10                 :  * IDENTIFICATION
      11                 :  *    src/backend/optimizer/path/clausesel.c
      12                 :  *
      13                 :  *-------------------------------------------------------------------------
      14                 :  */
      15                 : #include "postgres.h"
      16                 : 
      17                 : #include "nodes/makefuncs.h"
      18                 : #include "nodes/nodeFuncs.h"
      19                 : #include "optimizer/clauses.h"
      20                 : #include "optimizer/cost.h"
      21                 : #include "optimizer/optimizer.h"
      22                 : #include "optimizer/pathnode.h"
      23                 : #include "optimizer/plancat.h"
      24                 : #include "statistics/statistics.h"
      25                 : #include "utils/fmgroids.h"
      26                 : #include "utils/lsyscache.h"
      27                 : #include "utils/selfuncs.h"
      28                 : 
      29                 : /*
      30                 :  * Data structure for accumulating info about possible range-query
      31                 :  * clause pairs in clauselist_selectivity.
      32                 :  */
      33                 : typedef struct RangeQueryClause
      34                 : {
      35                 :     struct RangeQueryClause *next;  /* next in linked list */
      36                 :     Node       *var;            /* The common variable of the clauses */
      37                 :     bool        have_lobound;   /* found a low-bound clause yet? */
      38                 :     bool        have_hibound;   /* found a high-bound clause yet? */
      39                 :     Selectivity lobound;        /* Selectivity of a var > something clause */
      40                 :     Selectivity hibound;        /* Selectivity of a var < something clause */
      41                 : } RangeQueryClause;
      42                 : 
      43                 : static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
      44                 :                            bool varonleft, bool isLTsel, Selectivity s2);
      45                 : static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
      46                 :                                                List *clauses);
      47                 : static Selectivity clauselist_selectivity_or(PlannerInfo *root,
      48                 :                                              List *clauses,
      49                 :                                              int varRelid,
      50                 :                                              JoinType jointype,
      51                 :                                              SpecialJoinInfo *sjinfo,
      52                 :                                              bool use_extended_stats);
      53                 : 
      54                 : /****************************************************************************
      55                 :  *      ROUTINES TO COMPUTE SELECTIVITIES
      56                 :  ****************************************************************************/
      57                 : 
      58                 : /*
      59                 :  * clauselist_selectivity -
      60                 :  *    Compute the selectivity of an implicitly-ANDed list of boolean
      61                 :  *    expression clauses.  The list can be empty, in which case 1.0
      62                 :  *    must be returned.  List elements may be either RestrictInfos
      63                 :  *    or bare expression clauses --- the former is preferred since
      64                 :  *    it allows caching of results.
      65                 :  *
      66                 :  * See clause_selectivity() for the meaning of the additional parameters.
      67                 :  *
      68                 :  * The basic approach is to apply extended statistics first, on as many
      69                 :  * clauses as possible, in order to capture cross-column dependencies etc.
      70                 :  * The remaining clauses are then estimated by taking the product of their
      71                 :  * selectivities, but that's only right if they have independent
      72                 :  * probabilities, and in reality they are often NOT independent even if they
      73                 :  * only refer to a single column.  So, we want to be smarter where we can.
      74                 :  *
      75                 :  * We also recognize "range queries", such as "x > 34 AND x < 42".  Clauses
      76                 :  * are recognized as possible range query components if they are restriction
      77                 :  * opclauses whose operators have scalarltsel or a related function as their
      78                 :  * restriction selectivity estimator.  We pair up clauses of this form that
      79                 :  * refer to the same variable.  An unpairable clause of this kind is simply
      80                 :  * multiplied into the selectivity product in the normal way.  But when we
      81                 :  * find a pair, we know that the selectivities represent the relative
      82                 :  * positions of the low and high bounds within the column's range, so instead
      83                 :  * of figuring the selectivity as hisel * losel, we can figure it as hisel +
      84                 :  * losel - 1.  (To visualize this, see that hisel is the fraction of the range
      85                 :  * below the high bound, while losel is the fraction above the low bound; so
      86                 :  * hisel can be interpreted directly as a 0..1 value but we need to convert
      87                 :  * losel to 1-losel before interpreting it as a value.  Then the available
      88                 :  * range is 1-losel to hisel.  However, this calculation double-excludes
      89                 :  * nulls, so really we need hisel + losel + null_frac - 1.)
      90                 :  *
      91                 :  * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
      92                 :  * and instead use DEFAULT_RANGE_INEQ_SEL.  The same applies if the equation
      93                 :  * yields an impossible (negative) result.
      94                 :  *
      95                 :  * A free side-effect is that we can recognize redundant inequalities such
      96                 :  * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
      97                 :  *
      98                 :  * Of course this is all very dependent on the behavior of the inequality
      99                 :  * selectivity functions; perhaps some day we can generalize the approach.
     100                 :  */
     101                 : Selectivity
     102 CBC      960274 : clauselist_selectivity(PlannerInfo *root,
     103                 :                        List *clauses,
     104                 :                        int varRelid,
     105                 :                        JoinType jointype,
     106                 :                        SpecialJoinInfo *sjinfo)
     107                 : {
     108          960274 :     return clauselist_selectivity_ext(root, clauses, varRelid,
     109                 :                                       jointype, sjinfo, true);
     110                 : }
     111                 : 
     112                 : /*
     113                 :  * clauselist_selectivity_ext -
     114                 :  *    Extended version of clauselist_selectivity().  If "use_extended_stats"
     115                 :  *    is false, all extended statistics will be ignored, and only per-column
     116                 :  *    statistics will be used.
     117                 :  */
     118                 : Selectivity
     119          963049 : clauselist_selectivity_ext(PlannerInfo *root,
     120                 :                            List *clauses,
     121                 :                            int varRelid,
     122                 :                            JoinType jointype,
     123                 :                            SpecialJoinInfo *sjinfo,
     124                 :                            bool use_extended_stats)
     125                 : {
     126          963049 :     Selectivity s1 = 1.0;
     127                 :     RelOptInfo *rel;
     128          963049 :     Bitmapset  *estimatedclauses = NULL;
     129          963049 :     RangeQueryClause *rqlist = NULL;
     130                 :     ListCell   *l;
     131                 :     int         listidx;
     132                 : 
     133                 :     /*
     134                 :      * If there's exactly one clause, just go directly to
     135                 :      * clause_selectivity_ext(). None of what we might do below is relevant.
     136                 :      */
     137          963049 :     if (list_length(clauses) == 1)
     138          508393 :         return clause_selectivity_ext(root, (Node *) linitial(clauses),
     139                 :                                       varRelid, jointype, sjinfo,
     140                 :                                       use_extended_stats);
     141                 : 
     142                 :     /*
     143                 :      * Determine if these clauses reference a single relation.  If so, and if
     144                 :      * it has extended statistics, try to apply those.
     145                 :      */
     146          454656 :     rel = find_single_rel_for_clauses(root, clauses);
     147          454656 :     if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
     148                 :     {
     149                 :         /*
     150                 :          * Estimate as many clauses as possible using extended statistics.
     151                 :          *
     152                 :          * 'estimatedclauses' is populated with the 0-based list position
     153                 :          * index of clauses estimated here, and that should be ignored below.
     154                 :          */
     155             894 :         s1 = statext_clauselist_selectivity(root, clauses, varRelid,
     156                 :                                             jointype, sjinfo, rel,
     157                 :                                             &estimatedclauses, false);
     158                 :     }
     159                 : 
     160                 :     /*
     161                 :      * Apply normal selectivity estimates for remaining clauses. We'll be
     162                 :      * careful to skip any clauses which were already estimated above.
     163                 :      *
     164                 :      * Anything that doesn't look like a potential rangequery clause gets
     165                 :      * multiplied into s1 and forgotten. Anything that does gets inserted into
     166                 :      * an rqlist entry.
     167                 :      */
     168          454656 :     listidx = -1;
     169          727446 :     foreach(l, clauses)
     170                 :     {
     171          272790 :         Node       *clause = (Node *) lfirst(l);
     172                 :         RestrictInfo *rinfo;
     173                 :         Selectivity s2;
     174                 : 
     175          272790 :         listidx++;
     176                 : 
     177                 :         /*
     178                 :          * Skip this clause if it's already been estimated by some other
     179                 :          * statistics above.
     180                 :          */
     181          272790 :         if (bms_is_member(listidx, estimatedclauses))
     182            1737 :             continue;
     183                 : 
     184                 :         /* Compute the selectivity of this clause in isolation */
     185          271053 :         s2 = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo,
     186                 :                                     use_extended_stats);
     187                 : 
     188                 :         /*
     189                 :          * Check for being passed a RestrictInfo.
     190                 :          *
     191                 :          * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
     192                 :          * 0.0; just use that rather than looking for range pairs.
     193                 :          */
     194          271053 :         if (IsA(clause, RestrictInfo))
     195                 :         {
     196          270463 :             rinfo = (RestrictInfo *) clause;
     197          270463 :             if (rinfo->pseudoconstant)
     198                 :             {
     199           10483 :                 s1 = s1 * s2;
     200           10483 :                 continue;
     201                 :             }
     202          259980 :             clause = (Node *) rinfo->clause;
     203                 :         }
     204                 :         else
     205             590 :             rinfo = NULL;
     206                 : 
     207                 :         /*
     208                 :          * See if it looks like a restriction clause with a pseudoconstant on
     209                 :          * one side.  (Anything more complicated than that might not behave in
     210                 :          * the simple way we are expecting.)  Most of the tests here can be
     211                 :          * done more efficiently with rinfo than without.
     212                 :          */
     213          260570 :         if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
     214                 :         {
     215          222425 :             OpExpr     *expr = (OpExpr *) clause;
     216          222425 :             bool        varonleft = true;
     217                 :             bool        ok;
     218                 : 
     219          222425 :             if (rinfo)
     220                 :             {
     221 GNC      362272 :                 ok = (rinfo->num_base_rels == 1) &&
     222 CBC      138196 :                     (is_pseudo_constant_clause_relids(lsecond(expr->args),
     223            2233 :                                                       rinfo->right_relids) ||
     224            2233 :                      (varonleft = false,
     225            2233 :                       is_pseudo_constant_clause_relids(linitial(expr->args),
     226                 :                                                        rinfo->left_relids)));
     227                 :             }
     228                 :             else
     229                 :             {
     230            1164 :                 ok = (NumRelids(root, clause) == 1) &&
     231             582 :                     (is_pseudo_constant_clause(lsecond(expr->args)) ||
     232 UBC           0 :                      (varonleft = false,
     233               0 :                       is_pseudo_constant_clause(linitial(expr->args))));
     234                 :             }
     235                 : 
     236 CBC      222425 :             if (ok)
     237                 :             {
     238                 :                 /*
     239                 :                  * If it's not a "<"/"<="/">"/">=" operator, just merge the
     240                 :                  * selectivity in generically.  But if it's the right oprrest,
     241                 :                  * add the clause to rqlist for later processing.
     242                 :                  */
     243          138623 :                 switch (get_oprrest(expr->opno))
     244                 :                 {
     245            6255 :                     case F_SCALARLTSEL:
     246                 :                     case F_SCALARLESEL:
     247            6255 :                         addRangeClause(&rqlist, clause,
     248                 :                                        varonleft, true, s2);
     249            6255 :                         break;
     250           15408 :                     case F_SCALARGTSEL:
     251                 :                     case F_SCALARGESEL:
     252           15408 :                         addRangeClause(&rqlist, clause,
     253                 :                                        varonleft, false, s2);
     254           15408 :                         break;
     255          116960 :                     default:
     256                 :                         /* Just merge the selectivity in generically */
     257          116960 :                         s1 = s1 * s2;
     258          116960 :                         break;
     259                 :                 }
     260          138623 :                 continue;       /* drop to loop bottom */
     261                 :             }
     262                 :         }
     263                 : 
     264                 :         /* Not the right form, so treat it generically. */
     265          121947 :         s1 = s1 * s2;
     266                 :     }
     267                 : 
     268                 :     /*
     269                 :      * Now scan the rangequery pair list.
     270                 :      */
     271          472054 :     while (rqlist != NULL)
     272                 :     {
     273                 :         RangeQueryClause *rqnext;
     274                 : 
     275           17398 :         if (rqlist->have_lobound && rqlist->have_hibound)
     276            4106 :         {
     277                 :             /* Successfully matched a pair of range clauses */
     278                 :             Selectivity s2;
     279                 : 
     280                 :             /*
     281                 :              * Exact equality to the default value probably means the
     282                 :              * selectivity function punted.  This is not airtight but should
     283                 :              * be good enough.
     284                 :              */
     285            4106 :             if (rqlist->hibound == DEFAULT_INEQ_SEL ||
     286            3094 :                 rqlist->lobound == DEFAULT_INEQ_SEL)
     287                 :             {
     288            1012 :                 s2 = DEFAULT_RANGE_INEQ_SEL;
     289                 :             }
     290                 :             else
     291                 :             {
     292            3094 :                 s2 = rqlist->hibound + rqlist->lobound - 1.0;
     293                 : 
     294                 :                 /* Adjust for double-exclusion of NULLs */
     295            3094 :                 s2 += nulltestsel(root, IS_NULL, rqlist->var,
     296                 :                                   varRelid, jointype, sjinfo);
     297                 : 
     298                 :                 /*
     299                 :                  * A zero or slightly negative s2 should be converted into a
     300                 :                  * small positive value; we probably are dealing with a very
     301                 :                  * tight range and got a bogus result due to roundoff errors.
     302                 :                  * However, if s2 is very negative, then we probably have
     303                 :                  * default selectivity estimates on one or both sides of the
     304                 :                  * range that we failed to recognize above for some reason.
     305                 :                  */
     306            3094 :                 if (s2 <= 0.0)
     307                 :                 {
     308             431 :                     if (s2 < -0.01)
     309                 :                     {
     310                 :                         /*
     311                 :                          * No data available --- use a default estimate that
     312                 :                          * is small, but not real small.
     313                 :                          */
     314               6 :                         s2 = DEFAULT_RANGE_INEQ_SEL;
     315                 :                     }
     316                 :                     else
     317                 :                     {
     318                 :                         /*
     319                 :                          * It's just roundoff error; use a small positive
     320                 :                          * value
     321                 :                          */
     322             425 :                         s2 = 1.0e-10;
     323                 :                     }
     324                 :                 }
     325                 :             }
     326                 :             /* Merge in the selectivity of the pair of clauses */
     327            4106 :             s1 *= s2;
     328                 :         }
     329                 :         else
     330                 :         {
     331                 :             /* Only found one of a pair, merge it in generically */
     332           13292 :             if (rqlist->have_lobound)
     333           11125 :                 s1 *= rqlist->lobound;
     334                 :             else
     335            2167 :                 s1 *= rqlist->hibound;
     336                 :         }
     337                 :         /* release storage and advance */
     338           17398 :         rqnext = rqlist->next;
     339           17398 :         pfree(rqlist);
     340           17398 :         rqlist = rqnext;
     341                 :     }
     342                 : 
     343          454656 :     return s1;
     344                 : }
     345                 : 
     346                 : /*
     347                 :  * clauselist_selectivity_or -
     348                 :  *    Compute the selectivity of an implicitly-ORed list of boolean
     349                 :  *    expression clauses.  The list can be empty, in which case 0.0
     350                 :  *    must be returned.  List elements may be either RestrictInfos
     351                 :  *    or bare expression clauses --- the former is preferred since
     352                 :  *    it allows caching of results.
     353                 :  *
     354                 :  * See clause_selectivity() for the meaning of the additional parameters.
     355                 :  *
     356                 :  * The basic approach is to apply extended statistics first, on as many
     357                 :  * clauses as possible, in order to capture cross-column dependencies etc.
     358                 :  * The remaining clauses are then estimated as if they were independent.
     359                 :  */
     360                 : static Selectivity
     361            5132 : clauselist_selectivity_or(PlannerInfo *root,
     362                 :                           List *clauses,
     363                 :                           int varRelid,
     364                 :                           JoinType jointype,
     365                 :                           SpecialJoinInfo *sjinfo,
     366                 :                           bool use_extended_stats)
     367                 : {
     368            5132 :     Selectivity s1 = 0.0;
     369                 :     RelOptInfo *rel;
     370            5132 :     Bitmapset  *estimatedclauses = NULL;
     371                 :     ListCell   *lc;
     372                 :     int         listidx;
     373                 : 
     374                 :     /*
     375                 :      * Determine if these clauses reference a single relation.  If so, and if
     376                 :      * it has extended statistics, try to apply those.
     377                 :      */
     378            5132 :     rel = find_single_rel_for_clauses(root, clauses);
     379            5132 :     if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
     380                 :     {
     381                 :         /*
     382                 :          * Estimate as many clauses as possible using extended statistics.
     383                 :          *
     384                 :          * 'estimatedclauses' is populated with the 0-based list position
     385                 :          * index of clauses estimated here, and that should be ignored below.
     386                 :          */
     387              54 :         s1 = statext_clauselist_selectivity(root, clauses, varRelid,
     388                 :                                             jointype, sjinfo, rel,
     389                 :                                             &estimatedclauses, true);
     390                 :     }
     391                 : 
     392                 :     /*
     393                 :      * Estimate the remaining clauses as if they were independent.
     394                 :      *
     395                 :      * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account
     396                 :      * for the probable overlap of selected tuple sets.
     397                 :      *
     398                 :      * XXX is this too conservative?
     399                 :      */
     400            5132 :     listidx = -1;
     401           17065 :     foreach(lc, clauses)
     402                 :     {
     403                 :         Selectivity s2;
     404                 : 
     405           11933 :         listidx++;
     406                 : 
     407                 :         /*
     408                 :          * Skip this clause if it's already been estimated by some other
     409                 :          * statistics above.
     410                 :          */
     411           11933 :         if (bms_is_member(listidx, estimatedclauses))
     412             120 :             continue;
     413                 : 
     414           11813 :         s2 = clause_selectivity_ext(root, (Node *) lfirst(lc), varRelid,
     415                 :                                     jointype, sjinfo, use_extended_stats);
     416                 : 
     417           11813 :         s1 = s1 + s2 - s1 * s2;
     418                 :     }
     419                 : 
     420            5132 :     return s1;
     421                 : }
     422                 : 
     423                 : /*
     424                 :  * addRangeClause --- add a new range clause for clauselist_selectivity
     425                 :  *
     426                 :  * Here is where we try to match up pairs of range-query clauses
     427                 :  */
     428                 : static void
     429           21663 : addRangeClause(RangeQueryClause **rqlist, Node *clause,
     430                 :                bool varonleft, bool isLTsel, Selectivity s2)
     431                 : {
     432                 :     RangeQueryClause *rqelem;
     433                 :     Node       *var;
     434                 :     bool        is_lobound;
     435                 : 
     436           21663 :     if (varonleft)
     437                 :     {
     438           21558 :         var = get_leftop((Expr *) clause);
     439           21558 :         is_lobound = !isLTsel;  /* x < something is high bound */
     440                 :     }
     441                 :     else
     442                 :     {
     443             105 :         var = get_rightop((Expr *) clause);
     444             105 :         is_lobound = isLTsel;   /* something < x is low bound */
     445                 :     }
     446                 : 
     447           22281 :     for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
     448                 :     {
     449                 :         /*
     450                 :          * We use full equal() here because the "var" might be a function of
     451                 :          * one or more attributes of the same relation...
     452                 :          */
     453            4883 :         if (!equal(var, rqelem->var))
     454             618 :             continue;
     455                 :         /* Found the right group to put this clause in */
     456            4265 :         if (is_lobound)
     457                 :         {
     458             156 :             if (!rqelem->have_lobound)
     459                 :             {
     460              66 :                 rqelem->have_lobound = true;
     461              66 :                 rqelem->lobound = s2;
     462                 :             }
     463                 :             else
     464                 :             {
     465                 : 
     466                 :                 /*------
     467                 :                  * We have found two similar clauses, such as
     468                 :                  * x < y AND x <= z.
     469                 :                  * Keep only the more restrictive one.
     470                 :                  *------
     471                 :                  */
     472              90 :                 if (rqelem->lobound > s2)
     473 UBC           0 :                     rqelem->lobound = s2;
     474                 :             }
     475                 :         }
     476                 :         else
     477                 :         {
     478 CBC        4109 :             if (!rqelem->have_hibound)
     479                 :             {
     480            4040 :                 rqelem->have_hibound = true;
     481            4040 :                 rqelem->hibound = s2;
     482                 :             }
     483                 :             else
     484                 :             {
     485                 : 
     486                 :                 /*------
     487                 :                  * We have found two similar clauses, such as
     488                 :                  * x > y AND x >= z.
     489                 :                  * Keep only the more restrictive one.
     490                 :                  *------
     491                 :                  */
     492              69 :                 if (rqelem->hibound > s2)
     493              18 :                     rqelem->hibound = s2;
     494                 :             }
     495                 :         }
     496            4265 :         return;
     497                 :     }
     498                 : 
     499                 :     /* No matching var found, so make a new clause-pair data structure */
     500           17398 :     rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
     501           17398 :     rqelem->var = var;
     502           17398 :     if (is_lobound)
     503                 :     {
     504           15165 :         rqelem->have_lobound = true;
     505           15165 :         rqelem->have_hibound = false;
     506           15165 :         rqelem->lobound = s2;
     507                 :     }
     508                 :     else
     509                 :     {
     510            2233 :         rqelem->have_lobound = false;
     511            2233 :         rqelem->have_hibound = true;
     512            2233 :         rqelem->hibound = s2;
     513                 :     }
     514           17398 :     rqelem->next = *rqlist;
     515           17398 :     *rqlist = rqelem;
     516                 : }
     517                 : 
     518                 : /*
     519                 :  * find_single_rel_for_clauses
     520                 :  *      Examine each clause in 'clauses' and determine if all clauses
     521                 :  *      reference only a single relation.  If so return that relation,
     522                 :  *      otherwise return NULL.
     523                 :  */
     524                 : static RelOptInfo *
     525          460840 : find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
     526                 : {
     527          460840 :     int         lastrelid = 0;
     528                 :     ListCell   *l;
     529                 : 
     530          617942 :     foreach(l, clauses)
     531                 :     {
     532          226345 :         RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
     533                 :         int         relid;
     534                 : 
     535                 :         /*
     536                 :          * If we have a list of bare clauses rather than RestrictInfos, we
     537                 :          * could pull out their relids the hard way with pull_varnos().
     538                 :          * However, currently the extended-stats machinery won't do anything
     539                 :          * with non-RestrictInfo clauses anyway, so there's no point in
     540                 :          * spending extra cycles; just fail if that's what we have.
     541                 :          *
     542                 :          * An exception to that rule is if we have a bare BoolExpr AND clause.
     543                 :          * We treat this as a special case because the restrictinfo machinery
     544                 :          * doesn't build RestrictInfos on top of AND clauses.
     545                 :          */
     546          226345 :         if (is_andclause(rinfo))
     547             894 :         {
     548                 :             RelOptInfo *rel;
     549                 : 
     550            1052 :             rel = find_single_rel_for_clauses(root,
     551                 :                                               ((BoolExpr *) rinfo)->args);
     552                 : 
     553            1052 :             if (rel == NULL)
     554           69243 :                 return NULL;
     555             894 :             if (lastrelid == 0)
     556             433 :                 lastrelid = rel->relid;
     557             461 :             else if (rel->relid != lastrelid)
     558 UBC           0 :                 return NULL;
     559                 : 
     560 CBC       11482 :             continue;
     561                 :         }
     562                 : 
     563          225293 :         if (!IsA(rinfo, RestrictInfo))
     564             511 :             return NULL;
     565                 : 
     566          224782 :         if (bms_is_empty(rinfo->clause_relids))
     567           10588 :             continue;           /* we can ignore variable-free clauses */
     568          214194 :         if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
     569           67412 :             return NULL;        /* multiple relations in this clause */
     570          146782 :         if (lastrelid == 0)
     571           72125 :             lastrelid = relid;  /* first clause referencing a relation */
     572           74657 :         else if (relid != lastrelid)
     573            1162 :             return NULL;        /* relation not same as last one */
     574                 :     }
     575                 : 
     576          391597 :     if (lastrelid != 0)
     577           58638 :         return find_base_rel(root, lastrelid);
     578                 : 
     579          332959 :     return NULL;                /* no clauses */
     580                 : }
     581                 : 
     582                 : /*
     583                 :  * treat_as_join_clause -
     584                 :  *    Decide whether an operator clause is to be handled by the
     585                 :  *    restriction or join estimator.  Subroutine for clause_selectivity().
     586                 :  */
     587                 : static inline bool
     588 GIC      348021 : treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo,
     589                 :                      int varRelid, SpecialJoinInfo *sjinfo)
     590                 : {
     591          348021 :     if (varRelid != 0)
     592                 :     {
     593                 :         /*
     594                 :          * Caller is forcing restriction mode (eg, because we are examining an
     595                 :          * inner indexscan qual).
     596                 :          */
     597          127699 :         return false;
     598 ECB             :     }
     599 CBC      220322 :     else if (sjinfo == NULL)
     600                 :     {
     601 ECB             :         /*
     602                 :          * It must be a restriction clause, since it's being evaluated at a
     603                 :          * scan node.
     604                 :          */
     605 GIC      132976 :         return false;
     606                 :     }
     607                 :     else
     608                 :     {
     609                 :         /*
     610                 :          * Otherwise, it's a join if there's more than one base relation used.
     611                 :          * We can optimize this calculation if an rinfo was passed.
     612                 :          *
     613                 :          * XXX  Since we know the clause is being evaluated at a join, the
     614                 :          * only way it could be single-relation is if it was delayed by outer
     615                 :          * joins.  We intentionally count only baserels here, not OJs that
     616                 :          * might be present in rinfo->clause_relids, so that we direct such
     617                 :          * cases to the restriction qual estimators not join estimators.
     618                 :          * Eventually some notice should be taken of the possibility of
     619                 :          * injected nulls, but we'll likely want to do that in the restriction
     620                 :          * estimators rather than starting to treat such cases as join quals.
     621                 :          */
     622           87346 :         if (rinfo)
     623 GNC       87118 :             return (rinfo->num_base_rels > 1);
     624                 :         else
     625 GIC         228 :             return (NumRelids(root, clause) > 1);
     626                 :     }
     627                 : }
     628                 : 
     629                 : 
     630                 : /*
     631                 :  * clause_selectivity -
     632                 :  *    Compute the selectivity of a general boolean expression clause.
     633                 :  *
     634                 :  * The clause can be either a RestrictInfo or a plain expression.  If it's
     635                 :  * a RestrictInfo, we try to cache the selectivity for possible re-use,
     636                 :  * so passing RestrictInfos is preferred.
     637                 :  *
     638                 :  * varRelid is either 0 or a rangetable index.
     639                 :  *
     640                 :  * When varRelid is not 0, only variables belonging to that relation are
     641                 :  * considered in computing selectivity; other vars are treated as constants
     642                 :  * of unknown values.  This is appropriate for estimating the selectivity of
     643                 :  * a join clause that is being used as a restriction clause in a scan of a
     644                 :  * nestloop join's inner relation --- varRelid should then be the ID of the
     645                 :  * inner relation.
     646                 :  *
     647                 :  * When varRelid is 0, all variables are treated as variables.  This
     648 ECB             :  * is appropriate for ordinary join clauses and restriction clauses.
     649                 :  *
     650                 :  * jointype is the join type, if the clause is a join clause.  Pass JOIN_INNER
     651                 :  * if the clause isn't a join clause.
     652                 :  *
     653                 :  * sjinfo is NULL for a non-join clause, otherwise it provides additional
     654                 :  * context information about the join being performed.  There are some
     655                 :  * special cases:
     656                 :  *  1. For a special (not INNER) join, sjinfo is always a member of
     657                 :  *     root->join_info_list.
     658                 :  *  2. For an INNER join, sjinfo is just a transient struct, and only the
     659                 :  *     relids and jointype fields in it can be trusted.
     660                 :  * It is possible for jointype to be different from sjinfo->jointype.
     661                 :  * This indicates we are considering a variant join: either with
     662                 :  * the LHS and RHS switched, or with one input unique-ified.
     663                 :  *
     664                 :  * Note: when passing nonzero varRelid, it's normally appropriate to set
     665                 :  * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
     666                 :  * join clause; because we aren't treating it as a join clause.
     667                 :  */
     668                 : Selectivity
     669 GIC      186281 : clause_selectivity(PlannerInfo *root,
     670                 :                    Node *clause,
     671                 :                    int varRelid,
     672 ECB             :                    JoinType jointype,
     673                 :                    SpecialJoinInfo *sjinfo)
     674                 : {
     675 GIC      186281 :     return clause_selectivity_ext(root, clause, varRelid,
     676 ECB             :                                   jointype, sjinfo, true);
     677 EUB             : }
     678                 : 
     679 ECB             : /*
     680                 :  * clause_selectivity_ext -
     681                 :  *    Extended version of clause_selectivity().  If "use_extended_stats" is
     682                 :  *    false, all extended statistics will be ignored, and only per-column
     683                 :  *    statistics will be used.
     684                 :  */
     685                 : Selectivity
     686 GIC      980847 : clause_selectivity_ext(PlannerInfo *root,
     687                 :                        Node *clause,
     688                 :                        int varRelid,
     689                 :                        JoinType jointype,
     690                 :                        SpecialJoinInfo *sjinfo,
     691 ECB             :                        bool use_extended_stats)
     692                 : {
     693 CBC      980847 :     Selectivity s1 = 0.5;       /* default for any unhandled clause type */
     694          980847 :     RestrictInfo *rinfo = NULL;
     695 GIC      980847 :     bool        cacheable = false;
     696                 : 
     697          980847 :     if (clause == NULL)         /* can this still happen? */
     698 UIC           0 :         return s1;
     699                 : 
     700 GIC      980847 :     if (IsA(clause, RestrictInfo))
     701                 :     {
     702          973443 :         rinfo = (RestrictInfo *) clause;
     703                 : 
     704                 :         /*
     705                 :          * If the clause is marked pseudoconstant, then it will be used as a
     706                 :          * gating qual and should not affect selectivity estimates; hence
     707                 :          * return 1.0.  The only exception is that a constant FALSE may be
     708 ECB             :          * taken as having selectivity 0.0, since it will surely mean no rows
     709                 :          * out of the plan.  This case is simple enough that we need not
     710                 :          * bother caching the result.
     711                 :          */
     712 GIC      973443 :         if (rinfo->pseudoconstant)
     713                 :         {
     714 CBC       10635 :             if (!IsA(rinfo->clause, Const))
     715 GIC       10529 :                 return (Selectivity) 1.0;
     716 ECB             :         }
     717                 : 
     718                 :         /*
     719                 :          * If possible, cache the result of the selectivity calculation for
     720                 :          * the clause.  We can cache if varRelid is zero or the clause
     721                 :          * contains only vars of that relid --- otherwise varRelid will affect
     722                 :          * the result, so mustn't cache.  Outer join quals might be examined
     723                 :          * with either their join's actual jointype or JOIN_INNER, so we need
     724                 :          * two cache variables to remember both cases.  Note: we assume the
     725                 :          * result won't change if we are switching the input relations or
     726                 :          * considering a unique-ified case, so we only need one cache variable
     727                 :          * for all non-JOIN_INNER cases.
     728                 :          */
     729 CBC      962914 :         if (varRelid == 0 ||
     730 GNC      374621 :             rinfo->num_base_rels == 0 ||
     731          626825 :             (rinfo->num_base_rels == 1 &&
     732          252204 :              bms_is_member(varRelid, rinfo->clause_relids)))
     733                 :         {
     734 ECB             :             /* Cacheable --- do we already have the result? */
     735 GIC      839629 :             if (jointype == JOIN_INNER)
     736 ECB             :             {
     737 GIC      713070 :                 if (rinfo->norm_selec >= 0)
     738          507456 :                     return rinfo->norm_selec;
     739                 :             }
     740                 :             else
     741                 :             {
     742 CBC      126559 :                 if (rinfo->outer_selec >= 0)
     743           81084 :                     return rinfo->outer_selec;
     744                 :             }
     745 GIC      251089 :             cacheable = true;
     746 ECB             :         }
     747                 : 
     748                 :         /*
     749                 :          * Proceed with examination of contained clause.  If the clause is an
     750                 :          * OR-clause, we want to look at the variant with sub-RestrictInfos,
     751                 :          * so that per-subclause selectivities can be cached.
     752                 :          */
     753 GIC      374374 :         if (rinfo->orclause)
     754 CBC        5112 :             clause = (Node *) rinfo->orclause;
     755 ECB             :         else
     756 GIC      369262 :             clause = (Node *) rinfo->clause;
     757 ECB             :     }
     758                 : 
     759 GIC      381778 :     if (IsA(clause, Var))
     760 ECB             :     {
     761 GIC       15206 :         Var        *var = (Var *) clause;
     762 ECB             : 
     763                 :         /*
     764                 :          * We probably shouldn't ever see an uplevel Var here, but if we do,
     765 EUB             :          * return the default selectivity...
     766                 :          */
     767 GBC       15206 :         if (var->varlevelsup == 0 &&
     768             266 :             (varRelid == 0 || varRelid == (int) var->varno))
     769                 :         {
     770                 :             /* Use the restriction selectivity function for a bool Var */
     771 GIC       15085 :             s1 = boolvarsel(root, (Node *) var, varRelid);
     772                 :         }
     773                 :     }
     774          366572 :     else if (IsA(clause, Const))
     775 ECB             :     {
     776                 :         /* bool constant is pretty easy... */
     777 GIC         859 :         Const      *con = (Const *) clause;
     778 ECB             : 
     779 CBC        1663 :         s1 = con->constisnull ? 0.0 :
     780 GIC         804 :             DatumGetBool(con->constvalue) ? 1.0 : 0.0;
     781                 :     }
     782          365713 :     else if (IsA(clause, Param))
     783                 :     {
     784                 :         /* see if we can replace the Param */
     785 CBC          12 :         Node       *subst = estimate_expression_value(root, clause);
     786                 : 
     787 GIC          12 :         if (IsA(subst, Const))
     788 ECB             :         {
     789                 :             /* bool constant is pretty easy... */
     790 UIC           0 :             Const      *con = (Const *) subst;
     791                 : 
     792               0 :             s1 = con->constisnull ? 0.0 :
     793               0 :                 DatumGetBool(con->constvalue) ? 1.0 : 0.0;
     794                 :         }
     795 ECB             :         else
     796                 :         {
     797                 :             /* XXX any way to do better than default? */
     798                 :         }
     799                 :     }
     800 GIC      365701 :     else if (is_notclause(clause))
     801 ECB             :     {
     802                 :         /* inverse of the selectivity of the underlying clause */
     803 GIC        3187 :         s1 = 1.0 - clause_selectivity_ext(root,
     804            3187 :                                           (Node *) get_notclausearg((Expr *) clause),
     805                 :                                           varRelid,
     806                 :                                           jointype,
     807                 :                                           sjinfo,
     808 ECB             :                                           use_extended_stats);
     809                 :     }
     810 CBC      362514 :     else if (is_andclause(clause))
     811 ECB             :     {
     812                 :         /* share code with clauselist_selectivity() */
     813 CBC        1302 :         s1 = clauselist_selectivity_ext(root,
     814                 :                                         ((BoolExpr *) clause)->args,
     815                 :                                         varRelid,
     816 ECB             :                                         jointype,
     817                 :                                         sjinfo,
     818                 :                                         use_extended_stats);
     819                 :     }
     820 GIC      361212 :     else if (is_orclause(clause))
     821                 :     {
     822                 :         /*
     823                 :          * Almost the same thing as clauselist_selectivity, but with the
     824                 :          * clauses connected by OR.
     825 ECB             :          */
     826 GIC        5132 :         s1 = clauselist_selectivity_or(root,
     827                 :                                        ((BoolExpr *) clause)->args,
     828                 :                                        varRelid,
     829                 :                                        jointype,
     830                 :                                        sjinfo,
     831                 :                                        use_extended_stats);
     832                 :     }
     833          356080 :     else if (is_opclause(clause) || IsA(clause, DistinctExpr))
     834          335455 :     {
     835          335467 :         OpExpr     *opclause = (OpExpr *) clause;
     836          335467 :         Oid         opno = opclause->opno;
     837 ECB             : 
     838 CBC      335467 :         if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo))
     839                 :         {
     840 ECB             :             /* Estimate selectivity for a join clause. */
     841 GIC       83942 :             s1 = join_selectivity(root, opno,
     842 ECB             :                                   opclause->args,
     843                 :                                   opclause->inputcollid,
     844                 :                                   jointype,
     845                 :                                   sjinfo);
     846                 :         }
     847                 :         else
     848                 :         {
     849                 :             /* Estimate selectivity for a restriction clause. */
     850 GIC      251525 :             s1 = restriction_selectivity(root, opno,
     851                 :                                          opclause->args,
     852                 :                                          opclause->inputcollid,
     853                 :                                          varRelid);
     854                 :         }
     855 ECB             : 
     856                 :         /*
     857                 :          * DistinctExpr has the same representation as OpExpr, but the
     858                 :          * contained operator is "=" not "<>", so we must negate the result.
     859                 :          * This estimation method doesn't give the right behavior for nulls,
     860                 :          * but it's better than doing nothing.
     861                 :          */
     862 GIC      335455 :         if (IsA(clause, DistinctExpr))
     863             321 :             s1 = 1.0 - s1;
     864                 :     }
     865           20613 :     else if (is_funcclause(clause))
     866 ECB             :     {
     867 GIC        4672 :         FuncExpr   *funcclause = (FuncExpr *) clause;
     868                 : 
     869 ECB             :         /* Try to get an estimate from the support function, if any */
     870 GIC        4672 :         s1 = function_selectivity(root,
     871                 :                                   funcclause->funcid,
     872                 :                                   funcclause->args,
     873                 :                                   funcclause->inputcollid,
     874            4672 :                                   treat_as_join_clause(root, clause, rinfo,
     875 ECB             :                                                        varRelid, sjinfo),
     876                 :                                   varRelid,
     877                 :                                   jointype,
     878                 :                                   sjinfo);
     879                 :     }
     880 CBC       15941 :     else if (IsA(clause, ScalarArrayOpExpr))
     881                 :     {
     882                 :         /* Use node specific selectivity calculation function */
     883 GIC        7882 :         s1 = scalararraysel(root,
     884                 :                             (ScalarArrayOpExpr *) clause,
     885 CBC        7882 :                             treat_as_join_clause(root, clause, rinfo,
     886                 :                                                  varRelid, sjinfo),
     887                 :                             varRelid,
     888 ECB             :                             jointype,
     889                 :                             sjinfo);
     890                 :     }
     891 GIC        8059 :     else if (IsA(clause, RowCompareExpr))
     892                 :     {
     893                 :         /* Use node specific selectivity calculation function */
     894              78 :         s1 = rowcomparesel(root,
     895 ECB             :                            (RowCompareExpr *) clause,
     896                 :                            varRelid,
     897                 :                            jointype,
     898                 :                            sjinfo);
     899                 :     }
     900 GIC        7981 :     else if (IsA(clause, NullTest))
     901 ECB             :     {
     902                 :         /* Use node specific selectivity calculation function */
     903 GIC        6732 :         s1 = nulltestsel(root,
     904 ECB             :                          ((NullTest *) clause)->nulltesttype,
     905 GIC        6732 :                          (Node *) ((NullTest *) clause)->arg,
     906                 :                          varRelid,
     907 EUB             :                          jointype,
     908                 :                          sjinfo);
     909                 :     }
     910 GIC        1249 :     else if (IsA(clause, BooleanTest))
     911                 :     {
     912                 :         /* Use node specific selectivity calculation function */
     913              96 :         s1 = booltestsel(root,
     914 ECB             :                          ((BooleanTest *) clause)->booltesttype,
     915 GIC          96 :                          (Node *) ((BooleanTest *) clause)->arg,
     916                 :                          varRelid,
     917 EUB             :                          jointype,
     918                 :                          sjinfo);
     919                 :     }
     920 GIC        1153 :     else if (IsA(clause, CurrentOfExpr))
     921                 :     {
     922                 :         /* CURRENT OF selects at most one row of its table */
     923             197 :         CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
     924             197 :         RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
     925                 : 
     926             197 :         if (crel->tuples > 0)
     927             190 :             s1 = 1.0 / crel->tuples;
     928                 :     }
     929             956 :     else if (IsA(clause, RelabelType))
     930                 :     {
     931                 :         /* Not sure this case is needed, but it can't hurt */
     932 UIC           0 :         s1 = clause_selectivity_ext(root,
     933 LBC           0 :                                     (Node *) ((RelabelType *) clause)->arg,
     934                 :                                     varRelid,
     935                 :                                     jointype,
     936                 :                                     sjinfo,
     937 ECB             :                                     use_extended_stats);
     938                 :     }
     939 CBC         956 :     else if (IsA(clause, CoerceToDomain))
     940 ECB             :     {
     941                 :         /* Not sure this case is needed, but it can't hurt */
     942 LBC           0 :         s1 = clause_selectivity_ext(root,
     943 UIC           0 :                                     (Node *) ((CoerceToDomain *) clause)->arg,
     944                 :                                     varRelid,
     945                 :                                     jointype,
     946                 :                                     sjinfo,
     947                 :                                     use_extended_stats);
     948                 :     }
     949 ECB             :     else
     950                 :     {
     951                 :         /*
     952                 :          * For anything else, see if we can consider it as a boolean variable.
     953                 :          * This only works if it's an immutable expression in Vars of a single
     954                 :          * relation; but there's no point in us checking that here because
     955                 :          * boolvarsel() will do it internally, and return a suitable default
     956                 :          * selectivity if not.
     957                 :          */
     958 GIC         956 :         s1 = boolvarsel(root, clause, varRelid);
     959                 :     }
     960                 : 
     961                 :     /* Cache the result if possible */
     962          381766 :     if (cacheable)
     963                 :     {
     964          251077 :         if (jointype == JOIN_INNER)
     965          205602 :             rinfo->norm_selec = s1;
     966                 :         else
     967           45475 :             rinfo->outer_selec = s1;
     968                 :     }
     969                 : 
     970                 : #ifdef SELECTIVITY_DEBUG
     971                 :     elog(DEBUG4, "clause_selectivity: s1 %f", s1);
     972                 : #endif                          /* SELECTIVITY_DEBUG */
     973                 : 
     974          381766 :     return s1;
     975                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a