Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * orclauses.c
4 : * Routines to extract restriction OR clauses from join OR clauses
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/optimizer/util/orclauses.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "postgres.h"
17 :
18 : #include "nodes/makefuncs.h"
19 : #include "nodes/nodeFuncs.h"
20 : #include "optimizer/clauses.h"
21 : #include "optimizer/cost.h"
22 : #include "optimizer/optimizer.h"
23 : #include "optimizer/orclauses.h"
24 : #include "optimizer/restrictinfo.h"
25 :
26 :
27 : static bool is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel);
28 : static Expr *extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel);
29 : static void consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
30 : Expr *orclause, RestrictInfo *join_or_rinfo);
31 :
32 :
33 : /*
34 : * extract_restriction_or_clauses
35 : * Examine join OR-of-AND clauses to see if any useful restriction OR
36 : * clauses can be extracted. If so, add them to the query.
37 : *
38 : * Although a join clause must reference multiple relations overall,
39 : * an OR of ANDs clause might contain sub-clauses that reference just one
40 : * relation and can be used to build a restriction clause for that rel.
41 : * For example consider
42 : * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
43 : * We can transform this into
44 : * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
45 : * AND (a.x = 42 OR a.x = 44)
46 : * AND (b.y = 43 OR b.z = 45);
47 : * which allows the latter clauses to be applied during the scans of a and b,
48 : * perhaps as index qualifications, and in any case reducing the number of
49 : * rows arriving at the join. In essence this is a partial transformation to
50 : * CNF (AND of ORs format). It is not complete, however, because we do not
51 : * unravel the original OR --- doing so would usually bloat the qualification
52 : * expression to little gain.
53 : *
54 : * The added quals are partially redundant with the original OR, and therefore
55 : * would cause the size of the joinrel to be underestimated when it is finally
56 : * formed. (This would be true of a full transformation to CNF as well; the
57 : * fault is not really in the transformation, but in clauselist_selectivity's
58 : * inability to recognize redundant conditions.) We can compensate for this
59 : * redundancy by changing the cached selectivity of the original OR clause,
60 : * canceling out the (valid) reduction in the estimated sizes of the base
61 : * relations so that the estimated joinrel size remains the same. This is
62 : * a MAJOR HACK: it depends on the fact that clause selectivities are cached
63 : * and on the fact that the same RestrictInfo node will appear in every
64 : * joininfo list that might be used when the joinrel is formed.
65 : * And it doesn't work in cases where the size estimation is nonlinear
66 : * (i.e., outer and IN joins). But it beats not doing anything.
67 : *
68 : * We examine each base relation to see if join clauses associated with it
69 : * contain extractable restriction conditions. If so, add those conditions
70 : * to the rel's baserestrictinfo and update the cached selectivities of the
71 : * join clauses. Note that the same join clause will be examined afresh
72 : * from the point of view of each baserel that participates in it, so its
73 : * cached selectivity may get updated multiple times.
74 : */
75 : void
3387 tgl 76 CBC 128145 : extract_restriction_or_clauses(PlannerInfo *root)
77 : {
78 : Index rti;
79 :
80 : /* Examine each baserel for potential join OR clauses */
81 366747 : for (rti = 1; rti < root->simple_rel_array_size; rti++)
82 : {
83 238602 : RelOptInfo *rel = root->simple_rel_array[rti];
84 : ListCell *lc;
85 :
86 : /* there may be empty slots corresponding to non-baserel RTEs */
87 238602 : if (rel == NULL)
88 62468 : continue;
89 :
2118 90 176134 : Assert(rel->relid == rti); /* sanity check on array */
91 :
92 : /* ignore RTEs that are "other rels" */
3387 93 176134 : if (rel->reloptkind != RELOPT_BASEREL)
3387 tgl 94 UBC 0 : continue;
95 :
96 : /*
97 : * Find potentially interesting OR joinclauses. We can use any
98 : * joinclause that is considered safe to move to this rel by the
99 : * parameterized-path machinery, even though what we are going to do
100 : * with it is not exactly a parameterized path.
101 : */
3387 tgl 102 CBC 224855 : foreach(lc, rel->joininfo)
3387 tgl 103 ECB : {
3387 tgl 104 GIC 48721 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
105 :
3387 tgl 106 CBC 51736 : if (restriction_is_or_clause(rinfo) &&
69 tgl 107 GNC 3015 : join_clause_is_movable_to(rinfo, rel))
108 : {
109 : /* Try to extract a qual for this rel only */
3387 tgl 110 GIC 2029 : Expr *orclause = extract_or_clause(rinfo, rel);
3387 tgl 111 ECB :
112 : /*
113 : * If successful, decide whether we want to use the clause,
114 : * and insert it into the rel's restrictinfo list if so.
115 : */
3387 tgl 116 CBC 2029 : if (orclause)
3387 tgl 117 GIC 65 : consider_new_or_clause(root, rel, orclause, rinfo);
118 : }
119 : }
120 : }
121 128145 : }
3387 tgl 122 ECB :
123 : /*
124 : * Is the given primitive (non-OR) RestrictInfo safe to move to the rel?
125 : */
126 : static bool
3387 tgl 127 GIC 3868 : is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel)
128 : {
3387 tgl 129 ECB : /*
3260 bruce 130 EUB : * We want clauses that mention the rel, and only the rel. So in
3387 tgl 131 ECB : * particular pseudoconstant clauses can be rejected quickly. Then check
132 : * the clause's Var membership.
133 : */
3387 tgl 134 GIC 3868 : if (rinfo->pseudoconstant)
3387 tgl 135 LBC 0 : return false;
3387 tgl 136 GBC 3868 : if (!bms_equal(rinfo->clause_relids, rel->relids))
3387 tgl 137 GIC 2153 : return false;
3387 tgl 138 ECB :
139 : /* We don't want extra evaluations of any volatile functions */
3387 tgl 140 GIC 1715 : if (contain_volatile_functions((Node *) rinfo->clause))
3387 tgl 141 UIC 0 : return false;
142 :
3387 tgl 143 GIC 1715 : return true;
144 : }
145 :
146 : /*
147 : * Try to extract a restriction clause mentioning only "rel" from the given
148 : * join OR-clause.
149 : *
150 : * We must be able to extract at least one qual for this rel from each of
151 : * the arms of the OR, else we can't use it.
3387 tgl 152 ECB : *
153 : * Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL
154 : * if no OR clause could be extracted.
155 : */
156 : static Expr *
3387 tgl 157 GIC 2047 : extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel)
158 : {
159 2047 : List *clauselist = NIL;
160 : ListCell *lc;
161 :
162 : /*
163 : * Scan each arm of the input OR clause. Notice we descend into
164 : * or_rinfo->orclause, which has RestrictInfo nodes embedded below the
165 : * toplevel OR/AND structure. This is useful because we can use the info
166 : * in those nodes to make is_safe_restriction_clause_for()'s checks
167 : * cheaper. We'll strip those nodes from the returned tree, though,
168 : * meaning that fresh ones will be built if the clause is accepted as a
3260 bruce 169 ECB : * restriction clause. This might seem wasteful --- couldn't we re-use
3387 tgl 170 : * the existing RestrictInfos? But that'd require assuming that
171 : * selectivity and other cached data is computed exactly the same way for
172 : * a restriction clause as for a join clause, which seems undesirable.
173 : */
1531 tgl 174 GIC 2047 : Assert(is_orclause(or_rinfo->orclause));
3387 175 3603 : foreach(lc, ((BoolExpr *) or_rinfo->orclause)->args)
176 : {
3387 tgl 177 CBC 3535 : Node *orarg = (Node *) lfirst(lc);
3387 tgl 178 GIC 3535 : List *subclauses = NIL;
3134 tgl 179 ECB : Node *subclause;
180 :
181 : /* OR arguments should be ANDs or sub-RestrictInfos */
1531 tgl 182 CBC 3535 : if (is_andclause(orarg))
183 : {
3387 184 263 : List *andargs = ((BoolExpr *) orarg)->args;
185 : ListCell *lc2;
3387 tgl 186 ECB :
3387 tgl 187 GIC 877 : foreach(lc2, andargs)
188 : {
2190 189 614 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
190 :
3387 191 614 : if (restriction_is_or_clause(rinfo))
192 : {
193 : /*
194 : * Recurse to deal with nested OR. Note we *must* recurse
195 : * here, this isn't just overly-tense optimization: we
3387 tgl 196 ECB : * have to descend far enough to find and strip all
197 : * RestrictInfos in the expression.
198 : */
199 : Expr *suborclause;
200 :
3387 tgl 201 CBC 18 : suborclause = extract_or_clause(rinfo, rel);
3387 tgl 202 GIC 18 : if (suborclause)
203 3 : subclauses = lappend(subclauses, suborclause);
204 : }
205 596 : else if (is_safe_restriction_clause_for(rinfo, rel))
3387 tgl 206 CBC 407 : subclauses = lappend(subclauses, rinfo->clause);
207 : }
3387 tgl 208 ECB : }
209 : else
210 : {
2238 peter_e 211 GIC 3272 : RestrictInfo *rinfo = castNode(RestrictInfo, orarg);
212 :
213 3272 : Assert(!restriction_is_or_clause(rinfo));
214 3272 : if (is_safe_restriction_clause_for(rinfo, rel))
215 1308 : subclauses = lappend(subclauses, rinfo->clause);
216 : }
3387 tgl 217 ECB :
218 : /*
219 : * If nothing could be extracted from this arm, we can't do anything
220 : * with this OR clause.
221 : */
3387 tgl 222 GIC 3535 : if (subclauses == NIL)
223 1979 : return NULL;
224 :
225 : /*
3387 tgl 226 ECB : * OK, add subclause(s) to the result OR. If we found more than one,
3134 227 : * we need an AND node. But if we found only one, and it is itself an
228 : * OR node, add its subclauses to the result instead; this is needed
229 : * to preserve AND/OR flatness (ie, no OR directly underneath OR).
230 : */
3134 tgl 231 CBC 1556 : subclause = (Node *) make_ands_explicit(subclauses);
1531 tgl 232 GIC 1556 : if (is_orclause(subclause))
3134 233 3 : clauselist = list_concat(clauselist,
1336 234 3 : ((BoolExpr *) subclause)->args);
235 : else
3134 236 1553 : clauselist = lappend(clauselist, subclause);
237 : }
238 :
3387 tgl 239 ECB : /*
240 : * If we got a restriction clause from every arm, wrap them up in an OR
3387 tgl 241 EUB : * node. (In theory the OR node might be unnecessary, if there was only
242 : * one arm --- but then the input OR node was also redundant.)
243 : */
3387 tgl 244 GIC 68 : if (clauselist != NIL)
245 68 : return make_orclause(clauselist);
3387 tgl 246 UIC 0 : return NULL;
247 : }
248 :
249 : /*
3387 tgl 250 ECB : * Consider whether a successfully-extracted restriction OR clause is
251 : * actually worth using. If so, add it to the planner's data structures,
252 : * and adjust the original join clause (join_or_rinfo) to compensate.
253 : */
254 : static void
3387 tgl 255 GIC 65 : consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
256 : Expr *orclause, RestrictInfo *join_or_rinfo)
257 : {
258 : RestrictInfo *or_rinfo;
259 : Selectivity or_selec,
260 : orig_selec;
3387 tgl 261 ECB :
262 : /*
263 : * Build a RestrictInfo from the new OR clause. We can assume it's valid
264 : * as a base restriction clause.
265 : */
808 tgl 266 GIC 65 : or_rinfo = make_restrictinfo(root,
267 : orclause,
268 : true,
269 : false,
270 : join_or_rinfo->security_level,
271 : NULL,
3387 tgl 272 ECB : NULL);
273 :
274 : /*
275 : * Estimate its selectivity. (We could have done this earlier, but doing
276 : * it on the RestrictInfo representation allows the result to get cached,
277 : * saving work later.)
278 : */
3387 tgl 279 GIC 65 : or_selec = clause_selectivity(root, (Node *) or_rinfo,
280 : 0, JOIN_INNER, NULL);
281 :
3387 tgl 282 ECB : /*
3387 tgl 283 EUB : * The clause is only worth adding to the query if it rejects a useful
284 : * fraction of the base relation's rows; otherwise, it's just going to
285 : * cause duplicate computation (since we will still have to check the
286 : * original OR clause when the join is formed). Somewhat arbitrarily, we
287 : * set the selectivity threshold at 0.9.
3387 tgl 288 ECB : */
3387 tgl 289 CBC 65 : if (or_selec > 0.9)
3387 tgl 290 UIC 0 : return; /* forget it */
291 :
292 : /*
293 : * OK, add it to the rel's restriction-clause list.
294 : */
3387 tgl 295 GIC 65 : rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo);
2272 296 65 : rel->baserestrict_min_security = Min(rel->baserestrict_min_security,
297 : or_rinfo->security_level);
298 :
299 : /*
300 : * Adjust the original join OR clause's cached selectivity to compensate
301 : * for the selectivity of the added (but redundant) lower-level qual. This
302 : * should result in the join rel getting approximately the same rows
303 : * estimate as it would have gotten without all these shenanigans.
304 : *
305 : * XXX major hack alert: this depends on the assumption that the
306 : * selectivity will stay cached.
307 : *
308 : * XXX another major hack: we adjust only norm_selec, the cached
309 : * selectivity for JOIN_INNER semantics, even though the join clause
310 : * might've been an outer-join clause. This is partly because we can't
311 : * easily identify the relevant SpecialJoinInfo here, and partly because
3387 tgl 312 ECB : * the linearity assumption we're making would fail anyway. (If it is an
313 : * outer-join clause, "rel" must be on the nullable side, else we'd not
314 : * have gotten here. So the computation of the join size is going to be
315 : * quite nonlinear with respect to the size of "rel", so it's not clear
316 : * how we ought to adjust outer_selec even if we could compute its
317 : * original value correctly.)
318 : */
3387 tgl 319 GIC 65 : if (or_selec > 0)
3387 tgl 320 ECB : {
321 : SpecialJoinInfo sjinfo;
322 :
323 : /*
3260 bruce 324 : * Make up a SpecialJoinInfo for JOIN_INNER semantics. (Compare
3387 tgl 325 : * approx_tuple_count() in costsize.c.)
326 : */
3387 tgl 327 CBC 65 : sjinfo.type = T_SpecialJoinInfo;
328 130 : sjinfo.min_lefthand = bms_difference(join_or_rinfo->clause_relids,
329 65 : rel->relids);
330 65 : sjinfo.min_righthand = rel->relids;
3387 tgl 331 GIC 65 : sjinfo.syn_lefthand = sjinfo.min_lefthand;
3387 tgl 332 CBC 65 : sjinfo.syn_righthand = sjinfo.min_righthand;
333 65 : sjinfo.jointype = JOIN_INNER;
69 tgl 334 GNC 65 : sjinfo.ojrelid = 0;
335 65 : sjinfo.commute_above_l = NULL;
336 65 : sjinfo.commute_above_r = NULL;
337 65 : sjinfo.commute_below = NULL;
3387 tgl 338 ECB : /* we don't bother trying to make the remaining fields valid */
3387 tgl 339 CBC 65 : sjinfo.lhs_strict = false;
2951 tgl 340 GIC 65 : sjinfo.semi_can_btree = false;
341 65 : sjinfo.semi_can_hash = false;
2951 tgl 342 CBC 65 : sjinfo.semi_operators = NIL;
2951 tgl 343 GIC 65 : sjinfo.semi_rhs_exprs = NIL;
344 :
345 : /* Compute inner-join size */
3387 tgl 346 CBC 65 : orig_selec = clause_selectivity(root, (Node *) join_or_rinfo,
347 : 0, JOIN_INNER, &sjinfo);
3387 tgl 348 ECB :
3387 tgl 349 EUB : /* And hack cached selectivity so join size remains the same */
3387 tgl 350 GIC 65 : join_or_rinfo->norm_selec = orig_selec / or_selec;
351 : /* ensure result stays in sane range */
352 65 : if (join_or_rinfo->norm_selec > 1)
3387 tgl 353 UIC 0 : join_or_rinfo->norm_selec = 1;
354 : /* as explained above, we don't touch outer_selec */
355 : }
356 : }
|