Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * clausesel.c
4 : : * Routines to compute clause selectivities
5 : : *
6 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/optimizer/path/clausesel.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : #include "postgres.h"
16 : :
17 : : #include "nodes/nodeFuncs.h"
18 : : #include "optimizer/clauses.h"
19 : : #include "optimizer/optimizer.h"
20 : : #include "optimizer/pathnode.h"
21 : : #include "optimizer/plancat.h"
22 : : #include "statistics/statistics.h"
23 : : #include "utils/fmgroids.h"
24 : : #include "utils/lsyscache.h"
25 : : #include "utils/selfuncs.h"
26 : :
27 : : /*
28 : : * Data structure for accumulating info about possible range-query
29 : : * clause pairs in clauselist_selectivity.
30 : : */
31 : : typedef struct RangeQueryClause
32 : : {
33 : : struct RangeQueryClause *next; /* next in linked list */
34 : : Node *var; /* The common variable of the clauses */
35 : : bool have_lobound; /* found a low-bound clause yet? */
36 : : bool have_hibound; /* found a high-bound clause yet? */
37 : : Selectivity lobound; /* Selectivity of a var > something clause */
38 : : Selectivity hibound; /* Selectivity of a var < something clause */
39 : : } RangeQueryClause;
40 : :
41 : : static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
42 : : bool varonleft, bool isLTsel, Selectivity s2);
43 : : static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
44 : : List *clauses);
45 : : static Selectivity clauselist_selectivity_or(PlannerInfo *root,
46 : : List *clauses,
47 : : int varRelid,
48 : : JoinType jointype,
49 : : SpecialJoinInfo *sjinfo,
50 : : bool use_extended_stats);
51 : :
52 : : /****************************************************************************
53 : : * ROUTINES TO COMPUTE SELECTIVITIES
54 : : ****************************************************************************/
55 : :
56 : : /*
57 : : * clauselist_selectivity -
58 : : * Compute the selectivity of an implicitly-ANDed list of boolean
59 : : * expression clauses. The list can be empty, in which case 1.0
60 : : * must be returned. List elements may be either RestrictInfos
61 : : * or bare expression clauses --- the former is preferred since
62 : : * it allows caching of results.
63 : : *
64 : : * See clause_selectivity() for the meaning of the additional parameters.
65 : : *
66 : : * The basic approach is to apply extended statistics first, on as many
67 : : * clauses as possible, in order to capture cross-column dependencies etc.
68 : : * The remaining clauses are then estimated by taking the product of their
69 : : * selectivities, but that's only right if they have independent
70 : : * probabilities, and in reality they are often NOT independent even if they
71 : : * only refer to a single column. So, we want to be smarter where we can.
72 : : *
73 : : * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses
74 : : * are recognized as possible range query components if they are restriction
75 : : * opclauses whose operators have scalarltsel or a related function as their
76 : : * restriction selectivity estimator. We pair up clauses of this form that
77 : : * refer to the same variable. An unpairable clause of this kind is simply
78 : : * multiplied into the selectivity product in the normal way. But when we
79 : : * find a pair, we know that the selectivities represent the relative
80 : : * positions of the low and high bounds within the column's range, so instead
81 : : * of figuring the selectivity as hisel * losel, we can figure it as hisel +
82 : : * losel - 1. (To visualize this, see that hisel is the fraction of the range
83 : : * below the high bound, while losel is the fraction above the low bound; so
84 : : * hisel can be interpreted directly as a 0..1 value but we need to convert
85 : : * losel to 1-losel before interpreting it as a value. Then the available
86 : : * range is 1-losel to hisel. However, this calculation double-excludes
87 : : * nulls, so really we need hisel + losel + null_frac - 1.)
88 : : *
89 : : * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
90 : : * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
91 : : * yields an impossible (negative) result.
92 : : *
93 : : * A free side-effect is that we can recognize redundant inequalities such
94 : : * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
95 : : *
96 : : * Of course this is all very dependent on the behavior of the inequality
97 : : * selectivity functions; perhaps some day we can generalize the approach.
98 : : */
99 : : Selectivity
1228 dean.a.rasheed@gmail 100 :CBC 1137901 : clauselist_selectivity(PlannerInfo *root,
101 : : List *clauses,
102 : : int varRelid,
103 : : JoinType jointype,
104 : : SpecialJoinInfo *sjinfo)
105 : : {
106 : 1137901 : return clauselist_selectivity_ext(root, clauses, varRelid,
107 : : jointype, sjinfo, true);
108 : : }
109 : :
110 : : /*
111 : : * clauselist_selectivity_ext -
112 : : * Extended version of clauselist_selectivity(). If "use_extended_stats"
113 : : * is false, all extended statistics will be ignored, and only per-column
114 : : * statistics will be used.
115 : : */
116 : : Selectivity
117 : 1140979 : clauselist_selectivity_ext(PlannerInfo *root,
118 : : List *clauses,
119 : : int varRelid,
120 : : JoinType jointype,
121 : : SpecialJoinInfo *sjinfo,
122 : : bool use_extended_stats)
123 : : {
8768 bruce@momjian.us 124 : 1140979 : Selectivity s1 = 1.0;
125 : : RelOptInfo *rel;
1228 dean.a.rasheed@gmail 126 : 1140979 : Bitmapset *estimatedclauses = NULL;
8768 bruce@momjian.us 127 : 1140979 : RangeQueryClause *rqlist = NULL;
128 : : ListCell *l;
129 : : int listidx;
130 : :
131 : : /*
132 : : * If there's exactly one clause, just go directly to
133 : : * clause_selectivity_ext(). None of what we might do below is relevant.
134 : : */
1228 dean.a.rasheed@gmail 135 [ + + ]: 1140979 : if (list_length(clauses) == 1)
136 : 580957 : return clause_selectivity_ext(root, (Node *) linitial(clauses),
137 : : varRelid, jointype, sjinfo,
138 : : use_extended_stats);
139 : :
140 : : /*
141 : : * Determine if these clauses reference a single relation. If so, and if
142 : : * it has extended statistics, try to apply those.
143 : : */
144 : 560022 : rel = find_single_rel_for_clauses(root, clauses);
145 [ + + + + : 560022 : if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
+ + + + ]
146 : : {
147 : : /*
148 : : * Estimate as many clauses as possible using extended statistics.
149 : : *
150 : : * 'estimatedclauses' is populated with the 0-based list position
151 : : * index of clauses estimated here, and that should be ignored below.
152 : : */
153 : 867 : s1 = statext_clauselist_selectivity(root, clauses, varRelid,
154 : : jointype, sjinfo, rel,
155 : : &estimatedclauses, false);
156 : : }
157 : :
158 : : /*
159 : : * Apply normal selectivity estimates for remaining clauses. We'll be
160 : : * careful to skip any clauses which were already estimated above.
161 : : *
162 : : * Anything that doesn't look like a potential rangequery clause gets
163 : : * multiplied into s1 and forgotten. Anything that does gets inserted into
164 : : * an rqlist entry.
165 : : */
2566 simon@2ndQuadrant.co 166 : 560022 : listidx = -1;
7263 neilc@samurai.com 167 [ + + + + : 898251 : foreach(l, clauses)
+ + ]
168 : : {
169 : 338229 : Node *clause = (Node *) lfirst(l);
170 : : RestrictInfo *rinfo;
171 : : Selectivity s2;
172 : :
2566 simon@2ndQuadrant.co 173 : 338229 : listidx++;
174 : :
175 : : /*
176 : : * Skip this clause if it's already been estimated by some other
177 : : * statistics above.
178 : : */
179 [ + + ]: 338229 : if (bms_is_member(listidx, estimatedclauses))
180 : 1701 : continue;
181 : :
182 : : /* Compute the selectivity of this clause in isolation */
1228 dean.a.rasheed@gmail 183 : 336528 : s2 = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo,
184 : : use_extended_stats);
185 : :
186 : : /*
187 : : * Check for being passed a RestrictInfo.
188 : : *
189 : : * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
190 : : * 0.0; just use that rather than looking for range pairs.
191 : : */
7406 tgl@sss.pgh.pa.us 192 [ + + ]: 336528 : if (IsA(clause, RestrictInfo))
193 : : {
194 : 335914 : rinfo = (RestrictInfo *) clause;
6497 195 [ + + ]: 335914 : if (rinfo->pseudoconstant)
196 : : {
197 : 12054 : s1 = s1 * s2;
198 : 12054 : continue;
199 : : }
7406 200 : 323860 : clause = (Node *) rinfo->clause;
201 : : }
202 : : else
203 : 614 : rinfo = NULL;
204 : :
205 : : /*
206 : : * See if it looks like a restriction clause with a pseudoconstant on
207 : : * one side. (Anything more complicated than that might not behave in
208 : : * the simple way we are expecting.) Most of the tests here can be
209 : : * done more efficiently with rinfo than without.
210 : : */
7259 neilc@samurai.com 211 [ + + + - ]: 324474 : if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
212 : : {
7794 tgl@sss.pgh.pa.us 213 : 275847 : OpExpr *expr = (OpExpr *) clause;
7406 214 : 275847 : bool varonleft = true;
215 : : bool ok;
216 : :
217 [ + + ]: 275847 : if (rinfo)
218 : : {
440 219 [ + + + + ]: 451508 : ok = (rinfo->num_base_rels == 1) &&
7406 220 : 173296 : (is_pseudo_constant_clause_relids(lsecond(expr->args),
6756 bruce@momjian.us 221 [ + + ]: 2971 : rinfo->right_relids) ||
7406 tgl@sss.pgh.pa.us 222 : 2971 : (varonleft = false,
6756 bruce@momjian.us 223 : 2971 : is_pseudo_constant_clause_relids(linitial(expr->args),
224 : : rinfo->left_relids)));
225 : : }
226 : : else
227 : : {
1179 tgl@sss.pgh.pa.us 228 [ + - - + ]: 1212 : ok = (NumRelids(root, clause) == 1) &&
7406 229 [ - - ]: 606 : (is_pseudo_constant_clause(lsecond(expr->args)) ||
7406 tgl@sss.pgh.pa.us 230 :UBC 0 : (varonleft = false,
7263 neilc@samurai.com 231 : 0 : is_pseudo_constant_clause(linitial(expr->args))));
232 : : }
233 : :
7406 tgl@sss.pgh.pa.us 234 [ + + ]:CBC 275847 : if (ok)
235 : : {
236 : : /*
237 : : * If it's not a "<"/"<="/">"/">=" operator, just merge the
238 : : * selectivity in generically. But if it's the right oprrest,
239 : : * add the clause to rqlist for later processing.
240 : : */
241 [ + + + ]: 173606 : switch (get_oprrest(expr->opno))
242 : : {
243 : 6456 : case F_SCALARLTSEL:
244 : : case F_SCALARLESEL:
245 : 6456 : addRangeClause(&rqlist, clause,
246 : : varonleft, true, s2);
247 : 6456 : break;
248 : 17289 : case F_SCALARGTSEL:
249 : : case F_SCALARGESEL:
250 : 17289 : addRangeClause(&rqlist, clause,
251 : : varonleft, false, s2);
252 : 17289 : break;
253 : 149861 : default:
254 : : /* Just merge the selectivity in generically */
255 : 149861 : s1 = s1 * s2;
256 : 149861 : break;
257 : : }
258 : 173606 : continue; /* drop to loop bottom */
259 : : }
260 : : }
261 : :
262 : : /* Not the right form, so treat it generically. */
8862 263 : 150868 : s1 = s1 * s2;
264 : : }
265 : :
266 : : /*
267 : : * Now scan the rangequery pair list.
268 : : */
8847 269 [ + + ]: 579288 : while (rqlist != NULL)
270 : : {
271 : : RangeQueryClause *rqnext;
272 : :
273 [ + + + + ]: 19266 : if (rqlist->have_lobound && rqlist->have_hibound)
274 : 4230 : {
275 : : /* Successfully matched a pair of range clauses */
276 : : Selectivity s2;
277 : :
278 : : /*
279 : : * Exact equality to the default value probably means the
280 : : * selectivity function punted. This is not airtight but should
281 : : * be good enough.
282 : : */
7096 283 [ + + ]: 4230 : if (rqlist->hibound == DEFAULT_INEQ_SEL ||
284 [ - + ]: 3162 : rqlist->lobound == DEFAULT_INEQ_SEL)
285 : : {
286 : 1068 : s2 = DEFAULT_RANGE_INEQ_SEL;
287 : : }
288 : : else
289 : : {
290 : 3162 : s2 = rqlist->hibound + rqlist->lobound - 1.0;
291 : :
292 : : /* Adjust for double-exclusion of NULLs */
6071 293 : 3162 : s2 += nulltestsel(root, IS_NULL, rqlist->var,
294 : : varRelid, jointype, sjinfo);
295 : :
296 : : /*
297 : : * A zero or slightly negative s2 should be converted into a
298 : : * small positive value; we probably are dealing with a very
299 : : * tight range and got a bogus result due to roundoff errors.
300 : : * However, if s2 is very negative, then we probably have
301 : : * default selectivity estimates on one or both sides of the
302 : : * range that we failed to recognize above for some reason.
303 : : */
7096 304 [ + + ]: 3162 : if (s2 <= 0.0)
305 : : {
306 [ + + ]: 415 : if (s2 < -0.01)
307 : : {
308 : : /*
309 : : * No data available --- use a default estimate that
310 : : * is small, but not real small.
311 : : */
312 : 6 : s2 = DEFAULT_RANGE_INEQ_SEL;
313 : : }
314 : : else
315 : : {
316 : : /*
317 : : * It's just roundoff error; use a small positive
318 : : * value
319 : : */
320 : 409 : s2 = 1.0e-10;
321 : : }
322 : : }
323 : : }
324 : : /* Merge in the selectivity of the pair of clauses */
8788 325 : 4230 : s1 *= s2;
326 : : }
327 : : else
328 : : {
329 : : /* Only found one of a pair, merge it in generically */
8847 330 [ + + ]: 15036 : if (rqlist->have_lobound)
331 : 12855 : s1 *= rqlist->lobound;
332 : : else
333 : 2181 : s1 *= rqlist->hibound;
334 : : }
335 : : /* release storage and advance */
336 : 19266 : rqnext = rqlist->next;
337 : 19266 : pfree(rqlist);
338 : 19266 : rqlist = rqnext;
339 : : }
340 : :
8862 341 : 560022 : return s1;
342 : : }
343 : :
344 : : /*
345 : : * clauselist_selectivity_or -
346 : : * Compute the selectivity of an implicitly-ORed list of boolean
347 : : * expression clauses. The list can be empty, in which case 0.0
348 : : * must be returned. List elements may be either RestrictInfos
349 : : * or bare expression clauses --- the former is preferred since
350 : : * it allows caching of results.
351 : : *
352 : : * See clause_selectivity() for the meaning of the additional parameters.
353 : : *
354 : : * The basic approach is to apply extended statistics first, on as many
355 : : * clauses as possible, in order to capture cross-column dependencies etc.
356 : : * The remaining clauses are then estimated as if they were independent.
357 : : */
358 : : static Selectivity
1228 dean.a.rasheed@gmail 359 : 6109 : clauselist_selectivity_or(PlannerInfo *root,
360 : : List *clauses,
361 : : int varRelid,
362 : : JoinType jointype,
363 : : SpecialJoinInfo *sjinfo,
364 : : bool use_extended_stats)
365 : : {
366 : 6109 : Selectivity s1 = 0.0;
367 : : RelOptInfo *rel;
368 : 6109 : Bitmapset *estimatedclauses = NULL;
369 : : ListCell *lc;
370 : : int listidx;
371 : :
372 : : /*
373 : : * Determine if these clauses reference a single relation. If so, and if
374 : : * it has extended statistics, try to apply those.
375 : : */
376 : 6109 : rel = find_single_rel_for_clauses(root, clauses);
377 [ + + + + : 6109 : if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
+ + + + ]
378 : : {
379 : : /*
380 : : * Estimate as many clauses as possible using extended statistics.
381 : : *
382 : : * 'estimatedclauses' is populated with the 0-based list position
383 : : * index of clauses estimated here, and that should be ignored below.
384 : : */
385 : 54 : s1 = statext_clauselist_selectivity(root, clauses, varRelid,
386 : : jointype, sjinfo, rel,
387 : : &estimatedclauses, true);
388 : : }
389 : :
390 : : /*
391 : : * Estimate the remaining clauses as if they were independent.
392 : : *
393 : : * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account
394 : : * for the probable overlap of selected tuple sets.
395 : : *
396 : : * XXX is this too conservative?
397 : : */
398 : 6109 : listidx = -1;
399 [ + - + + : 20081 : foreach(lc, clauses)
+ + ]
400 : : {
401 : : Selectivity s2;
402 : :
403 : 13972 : listidx++;
404 : :
405 : : /*
406 : : * Skip this clause if it's already been estimated by some other
407 : : * statistics above.
408 : : */
409 [ + + ]: 13972 : if (bms_is_member(listidx, estimatedclauses))
410 : 120 : continue;
411 : :
412 : 13852 : s2 = clause_selectivity_ext(root, (Node *) lfirst(lc), varRelid,
413 : : jointype, sjinfo, use_extended_stats);
414 : :
415 : 13852 : s1 = s1 + s2 - s1 * s2;
416 : : }
417 : :
418 : 6109 : return s1;
419 : : }
420 : :
421 : : /*
422 : : * addRangeClause --- add a new range clause for clauselist_selectivity
423 : : *
424 : : * Here is where we try to match up pairs of range-query clauses
425 : : */
426 : : static void
8847 tgl@sss.pgh.pa.us 427 : 23745 : addRangeClause(RangeQueryClause **rqlist, Node *clause,
428 : : bool varonleft, bool isLTsel, Selectivity s2)
429 : : {
430 : : RangeQueryClause *rqelem;
431 : : Node *var;
432 : : bool is_lobound;
433 : :
8365 434 [ + + ]: 23745 : if (varonleft)
435 : : {
7760 436 : 23637 : var = get_leftop((Expr *) clause);
8768 bruce@momjian.us 437 : 23637 : is_lobound = !isLTsel; /* x < something is high bound */
438 : : }
439 : : else
440 : : {
7760 tgl@sss.pgh.pa.us 441 : 108 : var = get_rightop((Expr *) clause);
8847 442 : 108 : is_lobound = isLTsel; /* something < x is low bound */
443 : : }
444 : :
445 [ + + ]: 24362 : for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
446 : : {
447 : : /*
448 : : * We use full equal() here because the "var" might be a function of
449 : : * one or more attributes of the same relation...
450 : : */
8768 bruce@momjian.us 451 [ + + ]: 5096 : if (!equal(var, rqelem->var))
8847 tgl@sss.pgh.pa.us 452 : 617 : continue;
453 : : /* Found the right group to put this clause in */
454 [ + + ]: 4479 : if (is_lobound)
455 : : {
8768 bruce@momjian.us 456 [ + + ]: 210 : if (!rqelem->have_lobound)
457 : : {
8847 tgl@sss.pgh.pa.us 458 : 90 : rqelem->have_lobound = true;
459 : 90 : rqelem->lobound = s2;
460 : : }
461 : : else
462 : : {
463 : :
464 : : /*------
465 : : * We have found two similar clauses, such as
466 : : * x < y AND x <= z.
467 : : * Keep only the more restrictive one.
468 : : *------
469 : : */
470 [ + + ]: 120 : if (rqelem->lobound > s2)
8847 tgl@sss.pgh.pa.us 471 :GBC 30 : rqelem->lobound = s2;
472 : : }
473 : : }
474 : : else
475 : : {
8768 bruce@momjian.us 476 [ + + ]:CBC 4269 : if (!rqelem->have_hibound)
477 : : {
8847 tgl@sss.pgh.pa.us 478 : 4140 : rqelem->have_hibound = true;
479 : 4140 : rqelem->hibound = s2;
480 : : }
481 : : else
482 : : {
483 : :
484 : : /*------
485 : : * We have found two similar clauses, such as
486 : : * x > y AND x >= z.
487 : : * Keep only the more restrictive one.
488 : : *------
489 : : */
490 [ + + ]: 129 : if (rqelem->hibound > s2)
491 : 48 : rqelem->hibound = s2;
492 : : }
493 : : }
494 : 4479 : return;
495 : : }
496 : :
497 : : /* No matching var found, so make a new clause-pair data structure */
498 : 19266 : rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
499 : 19266 : rqelem->var = var;
500 [ + + ]: 19266 : if (is_lobound)
501 : : {
502 : 16995 : rqelem->have_lobound = true;
503 : 16995 : rqelem->have_hibound = false;
504 : 16995 : rqelem->lobound = s2;
505 : : }
506 : : else
507 : : {
508 : 2271 : rqelem->have_lobound = false;
509 : 2271 : rqelem->have_hibound = true;
510 : 2271 : rqelem->hibound = s2;
511 : : }
512 : 19266 : rqelem->next = *rqlist;
513 : 19266 : *rqlist = rqelem;
514 : : }
515 : :
516 : : /*
517 : : * find_single_rel_for_clauses
518 : : * Examine each clause in 'clauses' and determine if all clauses
519 : : * reference only a single relation. If so return that relation,
520 : : * otherwise return NULL.
521 : : */
522 : : static RelOptInfo *
2563 523 : 567402 : find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
524 : : {
525 : 567402 : int lastrelid = 0;
526 : : ListCell *l;
527 : :
2565 simon@2ndQuadrant.co 528 [ + + + + : 766932 : foreach(l, clauses)
+ + ]
529 : : {
530 : 285359 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
531 : : int relid;
532 : :
533 : : /*
534 : : * If we have a list of bare clauses rather than RestrictInfos, we
535 : : * could pull out their relids the hard way with pull_varnos().
536 : : * However, currently the extended-stats machinery won't do anything
537 : : * with non-RestrictInfo clauses anyway, so there's no point in
538 : : * spending extra cycles; just fail if that's what we have.
539 : : *
540 : : * An exception to that rule is if we have a bare BoolExpr AND clause.
541 : : * We treat this as a special case because the restrictinfo machinery
542 : : * doesn't build RestrictInfos on top of AND clauses.
543 : : */
1223 dean.a.rasheed@gmail 544 [ + + ]: 285359 : if (is_andclause(rinfo))
545 : 993 : {
546 : : RelOptInfo *rel;
547 : :
548 : 1271 : rel = find_single_rel_for_clauses(root,
549 : : ((BoolExpr *) rinfo)->args);
550 : :
551 [ + + ]: 1271 : if (rel == NULL)
552 : 85829 : return NULL;
553 [ + + ]: 993 : if (lastrelid == 0)
554 : 500 : lastrelid = rel->relid;
555 [ - + ]: 493 : else if (rel->relid != lastrelid)
1223 dean.a.rasheed@gmail 556 :UBC 0 : return NULL;
557 : :
1223 dean.a.rasheed@gmail 558 :CBC 17007 : continue;
559 : : }
560 : :
2563 tgl@sss.pgh.pa.us 561 [ + + ]: 284088 : if (!IsA(rinfo, RestrictInfo))
562 : 535 : return NULL;
563 : :
564 [ + + ]: 283553 : if (bms_is_empty(rinfo->clause_relids))
565 : 16014 : continue; /* we can ignore variable-free clauses */
566 [ + + ]: 267539 : if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
567 : 83323 : return NULL; /* multiple relations in this clause */
568 [ + + ]: 184216 : if (lastrelid == 0)
569 : 90832 : lastrelid = relid; /* first clause referencing a relation */
570 [ + + ]: 93384 : else if (relid != lastrelid)
571 : 1693 : return NULL; /* relation not same as last one */
572 : : }
573 : :
2565 simon@2ndQuadrant.co 574 [ + + ]: 481573 : if (lastrelid != 0)
575 : 72341 : return find_base_rel(root, lastrelid);
576 : :
2563 tgl@sss.pgh.pa.us 577 : 409232 : return NULL; /* no clauses */
578 : : }
579 : :
580 : : /*
581 : : * treat_as_join_clause -
582 : : * Decide whether an operator clause is to be handled by the
583 : : * restriction or join estimator. Subroutine for clause_selectivity().
584 : : */
585 : : static inline bool
1179 586 : 399872 : treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo,
587 : : int varRelid, SpecialJoinInfo *sjinfo)
588 : : {
5720 589 [ + + ]: 399872 : if (varRelid != 0)
590 : : {
591 : : /*
592 : : * Caller is forcing restriction mode (eg, because we are examining an
593 : : * inner indexscan qual).
594 : : */
595 : 146105 : return false;
596 : : }
597 [ + + ]: 253767 : else if (sjinfo == NULL)
598 : : {
599 : : /*
600 : : * It must be a restriction clause, since it's being evaluated at a
601 : : * scan node.
602 : : */
603 : 152489 : return false;
604 : : }
605 : : else
606 : : {
607 : : /*
608 : : * Otherwise, it's a join if there's more than one base relation used.
609 : : * We can optimize this calculation if an rinfo was passed.
610 : : *
611 : : * XXX Since we know the clause is being evaluated at a join, the
612 : : * only way it could be single-relation is if it was delayed by outer
613 : : * joins. We intentionally count only baserels here, not OJs that
614 : : * might be present in rinfo->clause_relids, so that we direct such
615 : : * cases to the restriction qual estimators not join estimators.
616 : : * Eventually some notice should be taken of the possibility of
617 : : * injected nulls, but we'll likely want to do that in the restriction
618 : : * estimators rather than starting to treat such cases as join quals.
619 : : */
620 [ + + ]: 101278 : if (rinfo)
440 621 : 101050 : return (rinfo->num_base_rels > 1);
622 : : else
1179 623 : 228 : return (NumRelids(root, clause) > 1);
624 : : }
625 : : }
626 : :
627 : :
628 : : /*
629 : : * clause_selectivity -
630 : : * Compute the selectivity of a general boolean expression clause.
631 : : *
632 : : * The clause can be either a RestrictInfo or a plain expression. If it's
633 : : * a RestrictInfo, we try to cache the selectivity for possible re-use,
634 : : * so passing RestrictInfos is preferred.
635 : : *
636 : : * varRelid is either 0 or a rangetable index.
637 : : *
638 : : * When varRelid is not 0, only variables belonging to that relation are
639 : : * considered in computing selectivity; other vars are treated as constants
640 : : * of unknown values. This is appropriate for estimating the selectivity of
641 : : * a join clause that is being used as a restriction clause in a scan of a
642 : : * nestloop join's inner relation --- varRelid should then be the ID of the
643 : : * inner relation.
644 : : *
645 : : * When varRelid is 0, all variables are treated as variables. This
646 : : * is appropriate for ordinary join clauses and restriction clauses.
647 : : *
648 : : * jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
649 : : * if the clause isn't a join clause.
650 : : *
651 : : * sjinfo is NULL for a non-join clause, otherwise it provides additional
652 : : * context information about the join being performed. There are some
653 : : * special cases:
654 : : * 1. For a special (not INNER) join, sjinfo is always a member of
655 : : * root->join_info_list.
656 : : * 2. For an INNER join, sjinfo is just a transient struct, and only the
657 : : * relids and jointype fields in it can be trusted.
658 : : * It is possible for jointype to be different from sjinfo->jointype.
659 : : * This indicates we are considering a variant join: either with
660 : : * the LHS and RHS switched, or with one input unique-ified.
661 : : *
662 : : * Note: when passing nonzero varRelid, it's normally appropriate to set
663 : : * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
664 : : * join clause; because we aren't treating it as a join clause.
665 : : */
666 : : Selectivity
6888 667 : 220463 : clause_selectivity(PlannerInfo *root,
668 : : Node *clause,
669 : : int varRelid,
670 : : JoinType jointype,
671 : : SpecialJoinInfo *sjinfo)
672 : : {
1228 dean.a.rasheed@gmail 673 : 220463 : return clause_selectivity_ext(root, clause, varRelid,
674 : : jointype, sjinfo, true);
675 : : }
676 : :
677 : : /*
678 : : * clause_selectivity_ext -
679 : : * Extended version of clause_selectivity(). If "use_extended_stats" is
680 : : * false, all extended statistics will be ignored, and only per-column
681 : : * statistics will be used.
682 : : */
683 : : Selectivity
684 : 1155535 : clause_selectivity_ext(PlannerInfo *root,
685 : : Node *clause,
686 : : int varRelid,
687 : : JoinType jointype,
688 : : SpecialJoinInfo *sjinfo,
689 : : bool use_extended_stats)
690 : : {
5938 tgl@sss.pgh.pa.us 691 : 1155535 : Selectivity s1 = 0.5; /* default for any unhandled clause type */
7406 692 : 1155535 : RestrictInfo *rinfo = NULL;
693 : 1155535 : bool cacheable = false;
694 : :
695 [ - + ]: 1155535 : if (clause == NULL) /* can this still happen? */
9031 tgl@sss.pgh.pa.us 696 :UBC 0 : return s1;
697 : :
7406 tgl@sss.pgh.pa.us 698 [ + + ]:CBC 1155535 : if (IsA(clause, RestrictInfo))
699 : : {
700 : 1146793 : rinfo = (RestrictInfo *) clause;
701 : :
702 : : /*
703 : : * If the clause is marked pseudoconstant, then it will be used as a
704 : : * gating qual and should not affect selectivity estimates; hence
705 : : * return 1.0. The only exception is that a constant FALSE may be
706 : : * taken as having selectivity 0.0, since it will surely mean no rows
707 : : * out of the plan. This case is simple enough that we need not
708 : : * bother caching the result.
709 : : */
6497 710 [ + + ]: 1146793 : if (rinfo->pseudoconstant)
711 : : {
6402 bruce@momjian.us 712 [ + + ]: 12225 : if (!IsA(rinfo->clause, Const))
5938 tgl@sss.pgh.pa.us 713 : 12119 : return (Selectivity) 1.0;
714 : : }
715 : :
716 : : /*
717 : : * If possible, cache the result of the selectivity calculation for
718 : : * the clause. We can cache if varRelid is zero or the clause
719 : : * contains only vars of that relid --- otherwise varRelid will affect
720 : : * the result, so mustn't cache. Outer join quals might be examined
721 : : * with either their join's actual jointype or JOIN_INNER, so we need
722 : : * two cache variables to remember both cases. Note: we assume the
723 : : * result won't change if we are switching the input relations or
724 : : * considering a unique-ified case, so we only need one cache variable
725 : : * for all non-JOIN_INNER cases.
726 : : */
7406 727 [ + + ]: 1134674 : if (varRelid == 0 ||
440 728 [ + - ]: 444872 : rinfo->num_base_rels == 0 ||
729 [ + + + + ]: 749025 : (rinfo->num_base_rels == 1 &&
730 : 304153 : bms_is_member(varRelid, rinfo->clause_relids)))
731 : : {
732 : : /* Cacheable --- do we already have the result? */
5546 733 [ + + ]: 992751 : if (jointype == JOIN_INNER)
734 : : {
735 [ + + ]: 841492 : if (rinfo->norm_selec >= 0)
736 : 605194 : return rinfo->norm_selec;
737 : : }
738 : : else
739 : : {
740 [ + + ]: 151259 : if (rinfo->outer_selec >= 0)
741 : 98057 : return rinfo->outer_selec;
742 : : }
5722 743 : 289500 : cacheable = true;
744 : : }
745 : :
746 : : /*
747 : : * Proceed with examination of contained clause. If the clause is an
748 : : * OR-clause, we want to look at the variant with sub-RestrictInfos,
749 : : * so that per-subclause selectivities can be cached.
750 : : */
6760 751 [ + + ]: 431423 : if (rinfo->orclause)
752 : 6089 : clause = (Node *) rinfo->orclause;
753 : : else
754 : 425334 : clause = (Node *) rinfo->clause;
755 : : }
756 : :
9031 757 [ + + ]: 440165 : if (IsA(clause, Var))
758 : : {
8572 759 : 19795 : Var *var = (Var *) clause;
760 : :
761 : : /*
762 : : * We probably shouldn't ever see an uplevel Var here, but if we do,
763 : : * return the default selectivity...
764 : : */
765 [ + - + + ]: 19795 : if (var->varlevelsup == 0 &&
766 [ + + ]: 338 : (varRelid == 0 || varRelid == (int) var->varno))
767 : : {
768 : : /* Use the restriction selectivity function for a bool Var */
3125 769 : 19638 : s1 = boolvarsel(root, (Node *) var, varRelid);
770 : : }
771 : : }
9031 772 [ + + ]: 420370 : else if (IsA(clause, Const))
773 : : {
774 : : /* bool constant is pretty easy... */
6402 bruce@momjian.us 775 : 1730 : Const *con = (Const *) clause;
776 : :
6497 tgl@sss.pgh.pa.us 777 [ + + ]: 3402 : s1 = con->constisnull ? 0.0 :
778 [ + + ]: 1672 : DatumGetBool(con->constvalue) ? 1.0 : 0.0;
779 : : }
7247 780 [ + + ]: 418640 : else if (IsA(clause, Param))
781 : : {
782 : : /* see if we can replace the Param */
6264 783 : 12 : Node *subst = estimate_expression_value(root, clause);
784 : :
7247 785 [ - + ]: 12 : if (IsA(subst, Const))
786 : : {
787 : : /* bool constant is pretty easy... */
6402 bruce@momjian.us 788 :UBC 0 : Const *con = (Const *) subst;
789 : :
6497 tgl@sss.pgh.pa.us 790 [ # # ]: 0 : s1 = con->constisnull ? 0.0 :
791 [ # # ]: 0 : DatumGetBool(con->constvalue) ? 1.0 : 0.0;
792 : : }
793 : : else
794 : : {
795 : : /* XXX any way to do better than default? */
796 : : }
797 : : }
1902 tgl@sss.pgh.pa.us 798 [ + + ]:CBC 418628 : else if (is_notclause(clause))
799 : : {
800 : : /* inverse of the selectivity of the underlying clause */
1228 dean.a.rasheed@gmail 801 : 3615 : s1 = 1.0 - clause_selectivity_ext(root,
802 : 3615 : (Node *) get_notclausearg((Expr *) clause),
803 : : varRelid,
804 : : jointype,
805 : : sjinfo,
806 : : use_extended_stats);
807 : : }
1902 tgl@sss.pgh.pa.us 808 [ + + ]: 415013 : else if (is_andclause(clause))
809 : : {
810 : : /* share code with clauselist_selectivity() */
1228 dean.a.rasheed@gmail 811 : 1641 : s1 = clauselist_selectivity_ext(root,
812 : : ((BoolExpr *) clause)->args,
813 : : varRelid,
814 : : jointype,
815 : : sjinfo,
816 : : use_extended_stats);
817 : : }
1902 tgl@sss.pgh.pa.us 818 [ + + ]: 413372 : else if (is_orclause(clause))
819 : : {
820 : : /*
821 : : * Almost the same thing as clauselist_selectivity, but with the
822 : : * clauses connected by OR.
823 : : */
1228 dean.a.rasheed@gmail 824 : 6109 : s1 = clauselist_selectivity_or(root,
825 : : ((BoolExpr *) clause)->args,
826 : : varRelid,
827 : : jointype,
828 : : sjinfo,
829 : : use_extended_stats);
830 : : }
5938 tgl@sss.pgh.pa.us 831 [ + + + + ]: 407263 : else if (is_opclause(clause) || IsA(clause, DistinctExpr))
9716 bruce@momjian.us 832 : 386672 : {
4298 tgl@sss.pgh.pa.us 833 : 386684 : OpExpr *opclause = (OpExpr *) clause;
834 : 386684 : Oid opno = opclause->opno;
835 : :
1179 836 [ + + ]: 386684 : if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo))
837 : : {
838 : : /* Estimate selectivity for a join clause. */
8207 bruce@momjian.us 839 : 96994 : s1 = join_selectivity(root, opno,
840 : : opclause->args,
841 : : opclause->inputcollid,
842 : : jointype,
843 : : sjinfo);
844 : : }
845 : : else
846 : : {
847 : : /* Estimate selectivity for a restriction clause. */
848 : 289690 : s1 = restriction_selectivity(root, opno,
849 : : opclause->args,
850 : : opclause->inputcollid,
851 : : varRelid);
852 : : }
853 : :
854 : : /*
855 : : * DistinctExpr has the same representation as OpExpr, but the
856 : : * contained operator is "=" not "<>", so we must negate the result.
857 : : * This estimation method doesn't give the right behavior for nulls,
858 : : * but it's better than doing nothing.
859 : : */
5938 tgl@sss.pgh.pa.us 860 [ + + ]: 386672 : if (IsA(clause, DistinctExpr))
861 : 393 : s1 = 1.0 - s1;
862 : : }
1891 863 [ + + ]: 20579 : else if (is_funcclause(clause))
864 : : {
865 : 4913 : FuncExpr *funcclause = (FuncExpr *) clause;
866 : :
867 : : /* Try to get an estimate from the support function, if any */
868 : 4913 : s1 = function_selectivity(root,
869 : : funcclause->funcid,
870 : : funcclause->args,
871 : : funcclause->inputcollid,
1179 872 : 4913 : treat_as_join_clause(root, clause, rinfo,
873 : : varRelid, sjinfo),
874 : : varRelid,
875 : : jointype,
876 : : sjinfo);
877 : : }
6715 878 [ + + ]: 15666 : else if (IsA(clause, ScalarArrayOpExpr))
879 : : {
880 : : /* Use node specific selectivity calculation function */
881 : 8275 : s1 = scalararraysel(root,
882 : : (ScalarArrayOpExpr *) clause,
1179 883 : 8275 : treat_as_join_clause(root, clause, rinfo,
884 : : varRelid, sjinfo),
885 : : varRelid,
886 : : jointype,
887 : : sjinfo);
888 : : }
6665 889 [ + + ]: 7391 : else if (IsA(clause, RowCompareExpr))
890 : : {
891 : : /* Use node specific selectivity calculation function */
892 : 78 : s1 = rowcomparesel(root,
893 : : (RowCompareExpr *) clause,
894 : : varRelid,
895 : : jointype,
896 : : sjinfo);
897 : : }
8329 898 [ + + ]: 7313 : else if (IsA(clause, NullTest))
899 : : {
900 : : /* Use node specific selectivity calculation function */
7848 901 : 5570 : s1 = nulltestsel(root,
902 : : ((NullTest *) clause)->nulltesttype,
7794 903 : 5570 : (Node *) ((NullTest *) clause)->arg,
904 : : varRelid,
905 : : jointype,
906 : : sjinfo);
907 : : }
8329 908 [ + + ]: 1743 : else if (IsA(clause, BooleanTest))
909 : : {
910 : : /* Use node specific selectivity calculation function */
7848 911 : 425 : s1 = booltestsel(root,
912 : : ((BooleanTest *) clause)->booltesttype,
7794 913 : 425 : (Node *) ((BooleanTest *) clause)->arg,
914 : : varRelid,
915 : : jointype,
916 : : sjinfo);
917 : : }
6152 918 [ + + ]: 1318 : else if (IsA(clause, CurrentOfExpr))
919 : : {
920 : : /* CURRENT OF selects at most one row of its table */
921 : 197 : CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
922 : 197 : RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
923 : :
924 [ + + ]: 197 : if (crel->tuples > 0)
925 : 190 : s1 = 1.0 / crel->tuples;
926 : : }
8645 927 [ - + ]: 1121 : else if (IsA(clause, RelabelType))
928 : : {
929 : : /* Not sure this case is needed, but it can't hurt */
1228 dean.a.rasheed@gmail 930 :UBC 0 : s1 = clause_selectivity_ext(root,
931 : 0 : (Node *) ((RelabelType *) clause)->arg,
932 : : varRelid,
933 : : jointype,
934 : : sjinfo,
935 : : use_extended_stats);
936 : : }
7741 tgl@sss.pgh.pa.us 937 [ - + ]:CBC 1121 : else if (IsA(clause, CoerceToDomain))
938 : : {
939 : : /* Not sure this case is needed, but it can't hurt */
1228 dean.a.rasheed@gmail 940 :UBC 0 : s1 = clause_selectivity_ext(root,
941 : 0 : (Node *) ((CoerceToDomain *) clause)->arg,
942 : : varRelid,
943 : : jointype,
944 : : sjinfo,
945 : : use_extended_stats);
946 : : }
947 : : else
948 : : {
949 : : /*
950 : : * For anything else, see if we can consider it as a boolean variable.
951 : : * This only works if it's an immutable expression in Vars of a single
952 : : * relation; but there's no point in us checking that here because
953 : : * boolvarsel() will do it internally, and return a suitable default
954 : : * selectivity if not.
955 : : */
3125 tgl@sss.pgh.pa.us 956 :CBC 1121 : s1 = boolvarsel(root, clause, varRelid);
957 : : }
958 : :
959 : : /* Cache the result if possible */
7406 960 [ + + ]: 440153 : if (cacheable)
961 : : {
5546 962 [ + + ]: 289488 : if (jointype == JOIN_INNER)
963 : 236286 : rinfo->norm_selec = s1;
964 : : else
965 : 53202 : rinfo->outer_selec = s1;
966 : : }
967 : :
968 : : #ifdef SELECTIVITY_DEBUG
969 : : elog(DEBUG4, "clause_selectivity: s1 %f", s1);
970 : : #endif /* SELECTIVITY_DEBUG */
971 : :
9031 972 : 440153 : return s1;
973 : : }
|