LCOV - differential code coverage report
Current view: top level - src/backend/optimizer/util - orclauses.c (source / functions) Coverage Total Hit LBC UIC UBC GBC GIC GNC CBC EUB ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 92.7 % 82 76 1 4 1 1 44 5 26 4 46 3
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 4 4 3 1 3
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * orclauses.c
       4                 :  *    Routines to extract restriction OR clauses from join OR clauses
       5                 :  *
       6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       7                 :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :  *
       9                 :  *
      10                 :  * IDENTIFICATION
      11                 :  *    src/backend/optimizer/util/orclauses.c
      12                 :  *
      13                 :  *-------------------------------------------------------------------------
      14                 :  */
      15                 : 
      16                 : #include "postgres.h"
      17                 : 
      18                 : #include "nodes/makefuncs.h"
      19                 : #include "nodes/nodeFuncs.h"
      20                 : #include "optimizer/clauses.h"
      21                 : #include "optimizer/cost.h"
      22                 : #include "optimizer/optimizer.h"
      23                 : #include "optimizer/orclauses.h"
      24                 : #include "optimizer/restrictinfo.h"
      25                 : 
      26                 : 
      27                 : static bool is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel);
      28                 : static Expr *extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel);
      29                 : static void consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
      30                 :                                    Expr *orclause, RestrictInfo *join_or_rinfo);
      31                 : 
      32                 : 
      33                 : /*
      34                 :  * extract_restriction_or_clauses
      35                 :  *    Examine join OR-of-AND clauses to see if any useful restriction OR
      36                 :  *    clauses can be extracted.  If so, add them to the query.
      37                 :  *
      38                 :  * Although a join clause must reference multiple relations overall,
      39                 :  * an OR of ANDs clause might contain sub-clauses that reference just one
      40                 :  * relation and can be used to build a restriction clause for that rel.
      41                 :  * For example consider
      42                 :  *      WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
      43                 :  * We can transform this into
      44                 :  *      WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
      45                 :  *          AND (a.x = 42 OR a.x = 44)
      46                 :  *          AND (b.y = 43 OR b.z = 45);
      47                 :  * which allows the latter clauses to be applied during the scans of a and b,
      48                 :  * perhaps as index qualifications, and in any case reducing the number of
      49                 :  * rows arriving at the join.  In essence this is a partial transformation to
      50                 :  * CNF (AND of ORs format).  It is not complete, however, because we do not
      51                 :  * unravel the original OR --- doing so would usually bloat the qualification
      52                 :  * expression to little gain.
      53                 :  *
      54                 :  * The added quals are partially redundant with the original OR, and therefore
      55                 :  * would cause the size of the joinrel to be underestimated when it is finally
      56                 :  * formed.  (This would be true of a full transformation to CNF as well; the
      57                 :  * fault is not really in the transformation, but in clauselist_selectivity's
      58                 :  * inability to recognize redundant conditions.)  We can compensate for this
      59                 :  * redundancy by changing the cached selectivity of the original OR clause,
      60                 :  * canceling out the (valid) reduction in the estimated sizes of the base
      61                 :  * relations so that the estimated joinrel size remains the same.  This is
      62                 :  * a MAJOR HACK: it depends on the fact that clause selectivities are cached
      63                 :  * and on the fact that the same RestrictInfo node will appear in every
      64                 :  * joininfo list that might be used when the joinrel is formed.
      65                 :  * And it doesn't work in cases where the size estimation is nonlinear
      66                 :  * (i.e., outer and IN joins).  But it beats not doing anything.
      67                 :  *
      68                 :  * We examine each base relation to see if join clauses associated with it
      69                 :  * contain extractable restriction conditions.  If so, add those conditions
      70                 :  * to the rel's baserestrictinfo and update the cached selectivities of the
      71                 :  * join clauses.  Note that the same join clause will be examined afresh
      72                 :  * from the point of view of each baserel that participates in it, so its
      73                 :  * cached selectivity may get updated multiple times.
      74                 :  */
      75                 : void
      76 CBC      128145 : extract_restriction_or_clauses(PlannerInfo *root)
      77                 : {
      78                 :     Index       rti;
      79                 : 
      80                 :     /* Examine each baserel for potential join OR clauses */
      81          366747 :     for (rti = 1; rti < root->simple_rel_array_size; rti++)
      82                 :     {
      83          238602 :         RelOptInfo *rel = root->simple_rel_array[rti];
      84                 :         ListCell   *lc;
      85                 : 
      86                 :         /* there may be empty slots corresponding to non-baserel RTEs */
      87          238602 :         if (rel == NULL)
      88           62468 :             continue;
      89                 : 
      90          176134 :         Assert(rel->relid == rti);   /* sanity check on array */
      91                 : 
      92                 :         /* ignore RTEs that are "other rels" */
      93          176134 :         if (rel->reloptkind != RELOPT_BASEREL)
      94 UBC           0 :             continue;
      95                 : 
      96                 :         /*
      97                 :          * Find potentially interesting OR joinclauses.  We can use any
      98                 :          * joinclause that is considered safe to move to this rel by the
      99                 :          * parameterized-path machinery, even though what we are going to do
     100                 :          * with it is not exactly a parameterized path.
     101                 :          */
     102 CBC      224855 :         foreach(lc, rel->joininfo)
     103 ECB             :         {
     104 GIC       48721 :             RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
     105                 : 
     106 CBC       51736 :             if (restriction_is_or_clause(rinfo) &&
     107 GNC        3015 :                 join_clause_is_movable_to(rinfo, rel))
     108                 :             {
     109                 :                 /* Try to extract a qual for this rel only */
     110 GIC        2029 :                 Expr       *orclause = extract_or_clause(rinfo, rel);
     111 ECB             : 
     112                 :                 /*
     113                 :                  * If successful, decide whether we want to use the clause,
     114                 :                  * and insert it into the rel's restrictinfo list if so.
     115                 :                  */
     116 CBC        2029 :                 if (orclause)
     117 GIC          65 :                     consider_new_or_clause(root, rel, orclause, rinfo);
     118                 :             }
     119                 :         }
     120                 :     }
     121          128145 : }
     122 ECB             : 
     123                 : /*
     124                 :  * Is the given primitive (non-OR) RestrictInfo safe to move to the rel?
     125                 :  */
     126                 : static bool
     127 GIC        3868 : is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel)
     128                 : {
     129 ECB             :     /*
     130 EUB             :      * We want clauses that mention the rel, and only the rel.  So in
     131 ECB             :      * particular pseudoconstant clauses can be rejected quickly.  Then check
     132                 :      * the clause's Var membership.
     133                 :      */
     134 GIC        3868 :     if (rinfo->pseudoconstant)
     135 LBC           0 :         return false;
     136 GBC        3868 :     if (!bms_equal(rinfo->clause_relids, rel->relids))
     137 GIC        2153 :         return false;
     138 ECB             : 
     139                 :     /* We don't want extra evaluations of any volatile functions */
     140 GIC        1715 :     if (contain_volatile_functions((Node *) rinfo->clause))
     141 UIC           0 :         return false;
     142                 : 
     143 GIC        1715 :     return true;
     144                 : }
     145                 : 
     146                 : /*
     147                 :  * Try to extract a restriction clause mentioning only "rel" from the given
     148                 :  * join OR-clause.
     149                 :  *
     150                 :  * We must be able to extract at least one qual for this rel from each of
     151                 :  * the arms of the OR, else we can't use it.
     152 ECB             :  *
     153                 :  * Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL
     154                 :  * if no OR clause could be extracted.
     155                 :  */
     156                 : static Expr *
     157 GIC        2047 : extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel)
     158                 : {
     159            2047 :     List       *clauselist = NIL;
     160                 :     ListCell   *lc;
     161                 : 
     162                 :     /*
     163                 :      * Scan each arm of the input OR clause.  Notice we descend into
     164                 :      * or_rinfo->orclause, which has RestrictInfo nodes embedded below the
     165                 :      * toplevel OR/AND structure.  This is useful because we can use the info
     166                 :      * in those nodes to make is_safe_restriction_clause_for()'s checks
     167                 :      * cheaper.  We'll strip those nodes from the returned tree, though,
     168                 :      * meaning that fresh ones will be built if the clause is accepted as a
     169 ECB             :      * restriction clause.  This might seem wasteful --- couldn't we re-use
     170                 :      * the existing RestrictInfos?  But that'd require assuming that
     171                 :      * selectivity and other cached data is computed exactly the same way for
     172                 :      * a restriction clause as for a join clause, which seems undesirable.
     173                 :      */
     174 GIC        2047 :     Assert(is_orclause(or_rinfo->orclause));
     175            3603 :     foreach(lc, ((BoolExpr *) or_rinfo->orclause)->args)
     176                 :     {
     177 CBC        3535 :         Node       *orarg = (Node *) lfirst(lc);
     178 GIC        3535 :         List       *subclauses = NIL;
     179 ECB             :         Node       *subclause;
     180                 : 
     181                 :         /* OR arguments should be ANDs or sub-RestrictInfos */
     182 CBC        3535 :         if (is_andclause(orarg))
     183                 :         {
     184             263 :             List       *andargs = ((BoolExpr *) orarg)->args;
     185                 :             ListCell   *lc2;
     186 ECB             : 
     187 GIC         877 :             foreach(lc2, andargs)
     188                 :             {
     189             614 :                 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
     190                 : 
     191             614 :                 if (restriction_is_or_clause(rinfo))
     192                 :                 {
     193                 :                     /*
     194                 :                      * Recurse to deal with nested OR.  Note we *must* recurse
     195                 :                      * here, this isn't just overly-tense optimization: we
     196 ECB             :                      * have to descend far enough to find and strip all
     197                 :                      * RestrictInfos in the expression.
     198                 :                      */
     199                 :                     Expr       *suborclause;
     200                 : 
     201 CBC          18 :                     suborclause = extract_or_clause(rinfo, rel);
     202 GIC          18 :                     if (suborclause)
     203               3 :                         subclauses = lappend(subclauses, suborclause);
     204                 :                 }
     205             596 :                 else if (is_safe_restriction_clause_for(rinfo, rel))
     206 CBC         407 :                     subclauses = lappend(subclauses, rinfo->clause);
     207                 :             }
     208 ECB             :         }
     209                 :         else
     210                 :         {
     211 GIC        3272 :             RestrictInfo *rinfo = castNode(RestrictInfo, orarg);
     212                 : 
     213            3272 :             Assert(!restriction_is_or_clause(rinfo));
     214            3272 :             if (is_safe_restriction_clause_for(rinfo, rel))
     215            1308 :                 subclauses = lappend(subclauses, rinfo->clause);
     216                 :         }
     217 ECB             : 
     218                 :         /*
     219                 :          * If nothing could be extracted from this arm, we can't do anything
     220                 :          * with this OR clause.
     221                 :          */
     222 GIC        3535 :         if (subclauses == NIL)
     223            1979 :             return NULL;
     224                 : 
     225                 :         /*
     226 ECB             :          * OK, add subclause(s) to the result OR.  If we found more than one,
     227                 :          * we need an AND node.  But if we found only one, and it is itself an
     228                 :          * OR node, add its subclauses to the result instead; this is needed
     229                 :          * to preserve AND/OR flatness (ie, no OR directly underneath OR).
     230                 :          */
     231 CBC        1556 :         subclause = (Node *) make_ands_explicit(subclauses);
     232 GIC        1556 :         if (is_orclause(subclause))
     233               3 :             clauselist = list_concat(clauselist,
     234               3 :                                      ((BoolExpr *) subclause)->args);
     235                 :         else
     236            1553 :             clauselist = lappend(clauselist, subclause);
     237                 :     }
     238                 : 
     239 ECB             :     /*
     240                 :      * If we got a restriction clause from every arm, wrap them up in an OR
     241 EUB             :      * node.  (In theory the OR node might be unnecessary, if there was only
     242                 :      * one arm --- but then the input OR node was also redundant.)
     243                 :      */
     244 GIC          68 :     if (clauselist != NIL)
     245              68 :         return make_orclause(clauselist);
     246 UIC           0 :     return NULL;
     247                 : }
     248                 : 
     249                 : /*
     250 ECB             :  * Consider whether a successfully-extracted restriction OR clause is
     251                 :  * actually worth using.  If so, add it to the planner's data structures,
     252                 :  * and adjust the original join clause (join_or_rinfo) to compensate.
     253                 :  */
     254                 : static void
     255 GIC          65 : consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
     256                 :                        Expr *orclause, RestrictInfo *join_or_rinfo)
     257                 : {
     258                 :     RestrictInfo *or_rinfo;
     259                 :     Selectivity or_selec,
     260                 :                 orig_selec;
     261 ECB             : 
     262                 :     /*
     263                 :      * Build a RestrictInfo from the new OR clause.  We can assume it's valid
     264                 :      * as a base restriction clause.
     265                 :      */
     266 GIC          65 :     or_rinfo = make_restrictinfo(root,
     267                 :                                  orclause,
     268                 :                                  true,
     269                 :                                  false,
     270                 :                                  join_or_rinfo->security_level,
     271                 :                                  NULL,
     272 ECB             :                                  NULL);
     273                 : 
     274                 :     /*
     275                 :      * Estimate its selectivity.  (We could have done this earlier, but doing
     276                 :      * it on the RestrictInfo representation allows the result to get cached,
     277                 :      * saving work later.)
     278                 :      */
     279 GIC          65 :     or_selec = clause_selectivity(root, (Node *) or_rinfo,
     280                 :                                   0, JOIN_INNER, NULL);
     281                 : 
     282 ECB             :     /*
     283 EUB             :      * The clause is only worth adding to the query if it rejects a useful
     284                 :      * fraction of the base relation's rows; otherwise, it's just going to
     285                 :      * cause duplicate computation (since we will still have to check the
     286                 :      * original OR clause when the join is formed).  Somewhat arbitrarily, we
     287                 :      * set the selectivity threshold at 0.9.
     288 ECB             :      */
     289 CBC          65 :     if (or_selec > 0.9)
     290 UIC           0 :         return;                 /* forget it */
     291                 : 
     292                 :     /*
     293                 :      * OK, add it to the rel's restriction-clause list.
     294                 :      */
     295 GIC          65 :     rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo);
     296              65 :     rel->baserestrict_min_security = Min(rel->baserestrict_min_security,
     297                 :                                          or_rinfo->security_level);
     298                 : 
     299                 :     /*
     300                 :      * Adjust the original join OR clause's cached selectivity to compensate
     301                 :      * for the selectivity of the added (but redundant) lower-level qual. This
     302                 :      * should result in the join rel getting approximately the same rows
     303                 :      * estimate as it would have gotten without all these shenanigans.
     304                 :      *
     305                 :      * XXX major hack alert: this depends on the assumption that the
     306                 :      * selectivity will stay cached.
     307                 :      *
     308                 :      * XXX another major hack: we adjust only norm_selec, the cached
     309                 :      * selectivity for JOIN_INNER semantics, even though the join clause
     310                 :      * might've been an outer-join clause.  This is partly because we can't
     311                 :      * easily identify the relevant SpecialJoinInfo here, and partly because
     312 ECB             :      * the linearity assumption we're making would fail anyway.  (If it is an
     313                 :      * outer-join clause, "rel" must be on the nullable side, else we'd not
     314                 :      * have gotten here.  So the computation of the join size is going to be
     315                 :      * quite nonlinear with respect to the size of "rel", so it's not clear
     316                 :      * how we ought to adjust outer_selec even if we could compute its
     317                 :      * original value correctly.)
     318                 :      */
     319 GIC          65 :     if (or_selec > 0)
     320 ECB             :     {
     321                 :         SpecialJoinInfo sjinfo;
     322                 : 
     323                 :         /*
     324                 :          * Make up a SpecialJoinInfo for JOIN_INNER semantics.  (Compare
     325                 :          * approx_tuple_count() in costsize.c.)
     326                 :          */
     327 CBC          65 :         sjinfo.type = T_SpecialJoinInfo;
     328             130 :         sjinfo.min_lefthand = bms_difference(join_or_rinfo->clause_relids,
     329              65 :                                              rel->relids);
     330              65 :         sjinfo.min_righthand = rel->relids;
     331 GIC          65 :         sjinfo.syn_lefthand = sjinfo.min_lefthand;
     332 CBC          65 :         sjinfo.syn_righthand = sjinfo.min_righthand;
     333              65 :         sjinfo.jointype = JOIN_INNER;
     334 GNC          65 :         sjinfo.ojrelid = 0;
     335              65 :         sjinfo.commute_above_l = NULL;
     336              65 :         sjinfo.commute_above_r = NULL;
     337              65 :         sjinfo.commute_below = NULL;
     338 ECB             :         /* we don't bother trying to make the remaining fields valid */
     339 CBC          65 :         sjinfo.lhs_strict = false;
     340 GIC          65 :         sjinfo.semi_can_btree = false;
     341              65 :         sjinfo.semi_can_hash = false;
     342 CBC          65 :         sjinfo.semi_operators = NIL;
     343 GIC          65 :         sjinfo.semi_rhs_exprs = NIL;
     344                 : 
     345                 :         /* Compute inner-join size */
     346 CBC          65 :         orig_selec = clause_selectivity(root, (Node *) join_or_rinfo,
     347                 :                                         0, JOIN_INNER, &sjinfo);
     348 ECB             : 
     349 EUB             :         /* And hack cached selectivity so join size remains the same */
     350 GIC          65 :         join_or_rinfo->norm_selec = orig_selec / or_selec;
     351                 :         /* ensure result stays in sane range */
     352              65 :         if (join_or_rinfo->norm_selec > 1)
     353 UIC           0 :             join_or_rinfo->norm_selec = 1;
     354                 :         /* as explained above, we don't touch outer_selec */
     355                 :     }
     356                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a