Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * orclauses.c
4 : : * Routines to extract restriction OR clauses from join OR clauses
5 : : *
6 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/optimizer/util/orclauses.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : :
16 : : #include "postgres.h"
17 : :
18 : : #include "nodes/makefuncs.h"
19 : : #include "nodes/nodeFuncs.h"
20 : : #include "optimizer/optimizer.h"
21 : : #include "optimizer/orclauses.h"
22 : : #include "optimizer/paths.h"
23 : : #include "optimizer/restrictinfo.h"
24 : :
25 : :
26 : : static bool is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel);
27 : : static Expr *extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel);
28 : : static void consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
29 : : Expr *orclause, RestrictInfo *join_or_rinfo);
30 : :
31 : :
32 : : /*
33 : : * extract_restriction_or_clauses
34 : : * Examine join OR-of-AND clauses to see if any useful restriction OR
35 : : * clauses can be extracted. If so, add them to the query.
36 : : *
37 : : * Although a join clause must reference multiple relations overall,
38 : : * an OR of ANDs clause might contain sub-clauses that reference just one
39 : : * relation and can be used to build a restriction clause for that rel.
40 : : * For example consider
41 : : * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
42 : : * We can transform this into
43 : : * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
44 : : * AND (a.x = 42 OR a.x = 44)
45 : : * AND (b.y = 43 OR b.z = 45);
46 : : * which allows the latter clauses to be applied during the scans of a and b,
47 : : * perhaps as index qualifications, and in any case reducing the number of
48 : : * rows arriving at the join. In essence this is a partial transformation to
49 : : * CNF (AND of ORs format). It is not complete, however, because we do not
50 : : * unravel the original OR --- doing so would usually bloat the qualification
51 : : * expression to little gain.
52 : : *
53 : : * The added quals are partially redundant with the original OR, and therefore
54 : : * would cause the size of the joinrel to be underestimated when it is finally
55 : : * formed. (This would be true of a full transformation to CNF as well; the
56 : : * fault is not really in the transformation, but in clauselist_selectivity's
57 : : * inability to recognize redundant conditions.) We can compensate for this
58 : : * redundancy by changing the cached selectivity of the original OR clause,
59 : : * canceling out the (valid) reduction in the estimated sizes of the base
60 : : * relations so that the estimated joinrel size remains the same. This is
61 : : * a MAJOR HACK: it depends on the fact that clause selectivities are cached
62 : : * and on the fact that the same RestrictInfo node will appear in every
63 : : * joininfo list that might be used when the joinrel is formed.
64 : : * And it doesn't work in cases where the size estimation is nonlinear
65 : : * (i.e., outer and IN joins). But it beats not doing anything.
66 : : *
67 : : * We examine each base relation to see if join clauses associated with it
68 : : * contain extractable restriction conditions. If so, add those conditions
69 : : * to the rel's baserestrictinfo and update the cached selectivities of the
70 : : * join clauses. Note that the same join clause will be examined afresh
71 : : * from the point of view of each baserel that participates in it, so its
72 : : * cached selectivity may get updated multiple times.
73 : : */
74 : : void
3758 tgl@sss.pgh.pa.us 75 :CBC 139537 : extract_restriction_or_clauses(PlannerInfo *root)
76 : : {
77 : : Index rti;
78 : :
79 : : /* Examine each baserel for potential join OR clauses */
80 [ + + ]: 403311 : for (rti = 1; rti < root->simple_rel_array_size; rti++)
81 : : {
82 : 263774 : RelOptInfo *rel = root->simple_rel_array[rti];
83 : : ListCell *lc;
84 : :
85 : : /* there may be empty slots corresponding to non-baserel RTEs */
86 [ + + ]: 263774 : if (rel == NULL)
87 : 67629 : continue;
88 : :
2489 89 [ - + ]: 196145 : Assert(rel->relid == rti); /* sanity check on array */
90 : :
91 : : /* ignore RTEs that are "other rels" */
3758 92 [ - + ]: 196145 : if (rel->reloptkind != RELOPT_BASEREL)
3758 tgl@sss.pgh.pa.us 93 :UBC 0 : continue;
94 : :
95 : : /*
96 : : * Find potentially interesting OR joinclauses. We can use any
97 : : * joinclause that is considered safe to move to this rel by the
98 : : * parameterized-path machinery, even though what we are going to do
99 : : * with it is not exactly a parameterized path.
100 : : */
3758 tgl@sss.pgh.pa.us 101 [ + + + + :CBC 252986 : foreach(lc, rel->joininfo)
+ + ]
102 : : {
103 : 56841 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
104 : :
105 [ + + + + ]: 60593 : if (restriction_is_or_clause(rinfo) &&
440 106 : 3752 : join_clause_is_movable_to(rinfo, rel))
107 : : {
108 : : /* Try to extract a qual for this rel only */
3758 109 : 2520 : Expr *orclause = extract_or_clause(rinfo, rel);
110 : :
111 : : /*
112 : : * If successful, decide whether we want to use the clause,
113 : : * and insert it into the rel's restrictinfo list if so.
114 : : */
115 [ + + ]: 2520 : if (orclause)
116 : 125 : consider_new_or_clause(root, rel, orclause, rinfo);
117 : : }
118 : : }
119 : : }
120 : 139537 : }
121 : :
122 : : /*
123 : : * Is the given primitive (non-OR) RestrictInfo safe to move to the rel?
124 : : */
125 : : static bool
126 : 5070 : is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel)
127 : : {
128 : : /*
129 : : * We want clauses that mention the rel, and only the rel. So in
130 : : * particular pseudoconstant clauses can be rejected quickly. Then check
131 : : * the clause's Var membership.
132 : : */
133 [ - + ]: 5070 : if (rinfo->pseudoconstant)
3758 tgl@sss.pgh.pa.us 134 :UBC 0 : return false;
3758 tgl@sss.pgh.pa.us 135 [ + + ]:CBC 5070 : if (!bms_equal(rinfo->clause_relids, rel->relids))
136 : 2764 : return false;
137 : :
138 : : /* We don't want extra evaluations of any volatile functions */
139 [ - + ]: 2306 : if (contain_volatile_functions((Node *) rinfo->clause))
3758 tgl@sss.pgh.pa.us 140 :UBC 0 : return false;
141 : :
3758 tgl@sss.pgh.pa.us 142 :CBC 2306 : return true;
143 : : }
144 : :
145 : : /*
146 : : * Try to extract a restriction clause mentioning only "rel" from the given
147 : : * join OR-clause.
148 : : *
149 : : * We must be able to extract at least one qual for this rel from each of
150 : : * the arms of the OR, else we can't use it.
151 : : *
152 : : * Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL
153 : : * if no OR clause could be extracted.
154 : : */
155 : : static Expr *
156 : 2538 : extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel)
157 : : {
158 : 2538 : List *clauselist = NIL;
159 : : ListCell *lc;
160 : :
161 : : /*
162 : : * Scan each arm of the input OR clause. Notice we descend into
163 : : * or_rinfo->orclause, which has RestrictInfo nodes embedded below the
164 : : * toplevel OR/AND structure. This is useful because we can use the info
165 : : * in those nodes to make is_safe_restriction_clause_for()'s checks
166 : : * cheaper. We'll strip those nodes from the returned tree, though,
167 : : * meaning that fresh ones will be built if the clause is accepted as a
168 : : * restriction clause. This might seem wasteful --- couldn't we re-use
169 : : * the existing RestrictInfos? But that'd require assuming that
170 : : * selectivity and other cached data is computed exactly the same way for
171 : : * a restriction clause as for a join clause, which seems undesirable.
172 : : */
1902 173 [ - + ]: 2538 : Assert(is_orclause(or_rinfo->orclause));
3758 174 [ + - + + : 4589 : foreach(lc, ((BoolExpr *) or_rinfo->orclause)->args)
+ + ]
175 : : {
176 : 4461 : Node *orarg = (Node *) lfirst(lc);
177 : 4461 : List *subclauses = NIL;
178 : : Node *subclause;
179 : :
180 : : /* OR arguments should be ANDs or sub-RestrictInfos */
1902 181 [ + + ]: 4461 : if (is_andclause(orarg))
182 : : {
3758 183 : 419 : List *andargs = ((BoolExpr *) orarg)->args;
184 : : ListCell *lc2;
185 : :
186 [ + - + + : 1465 : foreach(lc2, andargs)
+ + ]
187 : : {
2561 188 : 1046 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
189 : :
3758 190 [ + + ]: 1046 : if (restriction_is_or_clause(rinfo))
191 : : {
192 : : /*
193 : : * Recurse to deal with nested OR. Note we *must* recurse
194 : : * here, this isn't just overly-tense optimization: we
195 : : * have to descend far enough to find and strip all
196 : : * RestrictInfos in the expression.
197 : : */
198 : : Expr *suborclause;
199 : :
200 : 18 : suborclause = extract_or_clause(rinfo, rel);
201 [ + + ]: 18 : if (suborclause)
202 : 3 : subclauses = lappend(subclauses, suborclause);
203 : : }
204 [ + + ]: 1028 : else if (is_safe_restriction_clause_for(rinfo, rel))
205 : 659 : subclauses = lappend(subclauses, rinfo->clause);
206 : : }
207 : : }
208 : : else
209 : : {
2609 peter_e@gmx.net 210 : 4042 : RestrictInfo *rinfo = castNode(RestrictInfo, orarg);
211 : :
212 [ - + ]: 4042 : Assert(!restriction_is_or_clause(rinfo));
213 [ + + ]: 4042 : if (is_safe_restriction_clause_for(rinfo, rel))
214 : 1647 : subclauses = lappend(subclauses, rinfo->clause);
215 : : }
216 : :
217 : : /*
218 : : * If nothing could be extracted from this arm, we can't do anything
219 : : * with this OR clause.
220 : : */
3758 tgl@sss.pgh.pa.us 221 [ + + ]: 4461 : if (subclauses == NIL)
222 : 2410 : return NULL;
223 : :
224 : : /*
225 : : * OK, add subclause(s) to the result OR. If we found more than one,
226 : : * we need an AND node. But if we found only one, and it is itself an
227 : : * OR node, add its subclauses to the result instead; this is needed
228 : : * to preserve AND/OR flatness (ie, no OR directly underneath OR).
229 : : */
3505 230 : 2051 : subclause = (Node *) make_ands_explicit(subclauses);
1902 231 [ + + ]: 2051 : if (is_orclause(subclause))
3505 232 : 3 : clauselist = list_concat(clauselist,
1707 233 : 3 : ((BoolExpr *) subclause)->args);
234 : : else
3505 235 : 2048 : clauselist = lappend(clauselist, subclause);
236 : : }
237 : :
238 : : /*
239 : : * If we got a restriction clause from every arm, wrap them up in an OR
240 : : * node. (In theory the OR node might be unnecessary, if there was only
241 : : * one arm --- but then the input OR node was also redundant.)
242 : : */
3758 243 [ + - ]: 128 : if (clauselist != NIL)
244 : 128 : return make_orclause(clauselist);
3758 tgl@sss.pgh.pa.us 245 :UBC 0 : return NULL;
246 : : }
247 : :
248 : : /*
249 : : * Consider whether a successfully-extracted restriction OR clause is
250 : : * actually worth using. If so, add it to the planner's data structures,
251 : : * and adjust the original join clause (join_or_rinfo) to compensate.
252 : : */
253 : : static void
3758 tgl@sss.pgh.pa.us 254 :CBC 125 : consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
255 : : Expr *orclause, RestrictInfo *join_or_rinfo)
256 : : {
257 : : RestrictInfo *or_rinfo;
258 : : Selectivity or_selec,
259 : : orig_selec;
260 : :
261 : : /*
262 : : * Build a RestrictInfo from the new OR clause. We can assume it's valid
263 : : * as a base restriction clause.
264 : : */
1179 265 : 125 : or_rinfo = make_restrictinfo(root,
266 : : orclause,
267 : : true,
268 : : false,
269 : : false,
270 : : false,
271 : : join_or_rinfo->security_level,
272 : : NULL,
273 : : NULL,
274 : : NULL);
275 : :
276 : : /*
277 : : * Estimate its selectivity. (We could have done this earlier, but doing
278 : : * it on the RestrictInfo representation allows the result to get cached,
279 : : * saving work later.)
280 : : */
3758 281 : 125 : or_selec = clause_selectivity(root, (Node *) or_rinfo,
282 : : 0, JOIN_INNER, NULL);
283 : :
284 : : /*
285 : : * The clause is only worth adding to the query if it rejects a useful
286 : : * fraction of the base relation's rows; otherwise, it's just going to
287 : : * cause duplicate computation (since we will still have to check the
288 : : * original OR clause when the join is formed). Somewhat arbitrarily, we
289 : : * set the selectivity threshold at 0.9.
290 : : */
291 [ - + ]: 125 : if (or_selec > 0.9)
3758 tgl@sss.pgh.pa.us 292 :UBC 0 : return; /* forget it */
293 : :
294 : : /*
295 : : * OK, add it to the rel's restriction-clause list.
296 : : */
3758 tgl@sss.pgh.pa.us 297 :CBC 125 : rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo);
2643 298 : 125 : rel->baserestrict_min_security = Min(rel->baserestrict_min_security,
299 : : or_rinfo->security_level);
300 : :
301 : : /*
302 : : * Adjust the original join OR clause's cached selectivity to compensate
303 : : * for the selectivity of the added (but redundant) lower-level qual. This
304 : : * should result in the join rel getting approximately the same rows
305 : : * estimate as it would have gotten without all these shenanigans.
306 : : *
307 : : * XXX major hack alert: this depends on the assumption that the
308 : : * selectivity will stay cached.
309 : : *
310 : : * XXX another major hack: we adjust only norm_selec, the cached
311 : : * selectivity for JOIN_INNER semantics, even though the join clause
312 : : * might've been an outer-join clause. This is partly because we can't
313 : : * easily identify the relevant SpecialJoinInfo here, and partly because
314 : : * the linearity assumption we're making would fail anyway. (If it is an
315 : : * outer-join clause, "rel" must be on the nullable side, else we'd not
316 : : * have gotten here. So the computation of the join size is going to be
317 : : * quite nonlinear with respect to the size of "rel", so it's not clear
318 : : * how we ought to adjust outer_selec even if we could compute its
319 : : * original value correctly.)
320 : : */
3758 321 [ + - ]: 125 : if (or_selec > 0)
322 : : {
323 : : SpecialJoinInfo sjinfo;
324 : :
325 : : /*
326 : : * Make up a SpecialJoinInfo for JOIN_INNER semantics. (Compare
327 : : * approx_tuple_count() in costsize.c.)
328 : : */
20 amitlan@postgresql.o 329 :GNC 125 : init_dummy_sjinfo(&sjinfo,
330 : 125 : bms_difference(join_or_rinfo->clause_relids,
331 : 125 : rel->relids),
332 : : rel->relids);
333 : :
334 : : /* Compute inner-join size */
3758 tgl@sss.pgh.pa.us 335 :CBC 125 : orig_selec = clause_selectivity(root, (Node *) join_or_rinfo,
336 : : 0, JOIN_INNER, &sjinfo);
337 : :
338 : : /* And hack cached selectivity so join size remains the same */
339 : 125 : join_or_rinfo->norm_selec = orig_selec / or_selec;
340 : : /* ensure result stays in sane range */
341 [ - + ]: 125 : if (join_or_rinfo->norm_selec > 1)
3758 tgl@sss.pgh.pa.us 342 :UBC 0 : join_or_rinfo->norm_selec = 1;
343 : : /* as explained above, we don't touch outer_selec */
344 : : }
345 : : }
|