Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * clausesel.c
4 : * Routines to compute clause selectivities
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/optimizer/path/clausesel.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "nodes/makefuncs.h"
18 : #include "nodes/nodeFuncs.h"
19 : #include "optimizer/clauses.h"
20 : #include "optimizer/cost.h"
21 : #include "optimizer/optimizer.h"
22 : #include "optimizer/pathnode.h"
23 : #include "optimizer/plancat.h"
24 : #include "statistics/statistics.h"
25 : #include "utils/fmgroids.h"
26 : #include "utils/lsyscache.h"
27 : #include "utils/selfuncs.h"
28 :
29 : /*
30 : * Data structure for accumulating info about possible range-query
31 : * clause pairs in clauselist_selectivity.
32 : */
33 : typedef struct RangeQueryClause
34 : {
35 : struct RangeQueryClause *next; /* next in linked list */
36 : Node *var; /* The common variable of the clauses */
37 : bool have_lobound; /* found a low-bound clause yet? */
38 : bool have_hibound; /* found a high-bound clause yet? */
39 : Selectivity lobound; /* Selectivity of a var > something clause */
40 : Selectivity hibound; /* Selectivity of a var < something clause */
41 : } RangeQueryClause;
42 :
43 : static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
44 : bool varonleft, bool isLTsel, Selectivity s2);
45 : static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
46 : List *clauses);
47 : static Selectivity clauselist_selectivity_or(PlannerInfo *root,
48 : List *clauses,
49 : int varRelid,
50 : JoinType jointype,
51 : SpecialJoinInfo *sjinfo,
52 : bool use_extended_stats);
53 :
54 : /****************************************************************************
55 : * ROUTINES TO COMPUTE SELECTIVITIES
56 : ****************************************************************************/
57 :
58 : /*
59 : * clauselist_selectivity -
60 : * Compute the selectivity of an implicitly-ANDed list of boolean
61 : * expression clauses. The list can be empty, in which case 1.0
62 : * must be returned. List elements may be either RestrictInfos
63 : * or bare expression clauses --- the former is preferred since
64 : * it allows caching of results.
65 : *
66 : * See clause_selectivity() for the meaning of the additional parameters.
67 : *
68 : * The basic approach is to apply extended statistics first, on as many
69 : * clauses as possible, in order to capture cross-column dependencies etc.
70 : * The remaining clauses are then estimated by taking the product of their
71 : * selectivities, but that's only right if they have independent
72 : * probabilities, and in reality they are often NOT independent even if they
73 : * only refer to a single column. So, we want to be smarter where we can.
74 : *
75 : * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses
76 : * are recognized as possible range query components if they are restriction
77 : * opclauses whose operators have scalarltsel or a related function as their
78 : * restriction selectivity estimator. We pair up clauses of this form that
79 : * refer to the same variable. An unpairable clause of this kind is simply
80 : * multiplied into the selectivity product in the normal way. But when we
81 : * find a pair, we know that the selectivities represent the relative
82 : * positions of the low and high bounds within the column's range, so instead
83 : * of figuring the selectivity as hisel * losel, we can figure it as hisel +
84 : * losel - 1. (To visualize this, see that hisel is the fraction of the range
85 : * below the high bound, while losel is the fraction above the low bound; so
86 : * hisel can be interpreted directly as a 0..1 value but we need to convert
87 : * losel to 1-losel before interpreting it as a value. Then the available
88 : * range is 1-losel to hisel. However, this calculation double-excludes
89 : * nulls, so really we need hisel + losel + null_frac - 1.)
90 : *
91 : * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
92 : * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
93 : * yields an impossible (negative) result.
94 : *
95 : * A free side-effect is that we can recognize redundant inequalities such
96 : * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
97 : *
98 : * Of course this is all very dependent on the behavior of the inequality
99 : * selectivity functions; perhaps some day we can generalize the approach.
100 : */
101 : Selectivity
857 dean.a.rasheed 102 CBC 960274 : clauselist_selectivity(PlannerInfo *root,
103 : List *clauses,
104 : int varRelid,
105 : JoinType jointype,
106 : SpecialJoinInfo *sjinfo)
107 : {
108 960274 : return clauselist_selectivity_ext(root, clauses, varRelid,
109 : jointype, sjinfo, true);
110 : }
111 :
112 : /*
113 : * clauselist_selectivity_ext -
114 : * Extended version of clauselist_selectivity(). If "use_extended_stats"
115 : * is false, all extended statistics will be ignored, and only per-column
116 : * statistics will be used.
117 : */
118 : Selectivity
119 963049 : clauselist_selectivity_ext(PlannerInfo *root,
120 : List *clauses,
121 : int varRelid,
122 : JoinType jointype,
123 : SpecialJoinInfo *sjinfo,
124 : bool use_extended_stats)
125 : {
8397 bruce 126 963049 : Selectivity s1 = 1.0;
127 : RelOptInfo *rel;
857 dean.a.rasheed 128 963049 : Bitmapset *estimatedclauses = NULL;
8397 bruce 129 963049 : RangeQueryClause *rqlist = NULL;
130 : ListCell *l;
131 : int listidx;
132 :
133 : /*
134 : * If there's exactly one clause, just go directly to
135 : * clause_selectivity_ext(). None of what we might do below is relevant.
136 : */
857 dean.a.rasheed 137 963049 : if (list_length(clauses) == 1)
138 508393 : return clause_selectivity_ext(root, (Node *) linitial(clauses),
139 : varRelid, jointype, sjinfo,
140 : use_extended_stats);
141 :
142 : /*
143 : * Determine if these clauses reference a single relation. If so, and if
144 : * it has extended statistics, try to apply those.
145 : */
146 454656 : rel = find_single_rel_for_clauses(root, clauses);
147 454656 : if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
148 : {
149 : /*
150 : * Estimate as many clauses as possible using extended statistics.
151 : *
152 : * 'estimatedclauses' is populated with the 0-based list position
153 : * index of clauses estimated here, and that should be ignored below.
154 : */
155 894 : s1 = statext_clauselist_selectivity(root, clauses, varRelid,
156 : jointype, sjinfo, rel,
157 : &estimatedclauses, false);
158 : }
159 :
160 : /*
161 : * Apply normal selectivity estimates for remaining clauses. We'll be
162 : * careful to skip any clauses which were already estimated above.
163 : *
164 : * Anything that doesn't look like a potential rangequery clause gets
165 : * multiplied into s1 and forgotten. Anything that does gets inserted into
166 : * an rqlist entry.
167 : */
2195 simon 168 454656 : listidx = -1;
6892 neilc 169 727446 : foreach(l, clauses)
170 : {
171 272790 : Node *clause = (Node *) lfirst(l);
172 : RestrictInfo *rinfo;
173 : Selectivity s2;
174 :
2195 simon 175 272790 : listidx++;
176 :
177 : /*
178 : * Skip this clause if it's already been estimated by some other
179 : * statistics above.
180 : */
181 272790 : if (bms_is_member(listidx, estimatedclauses))
182 1737 : continue;
183 :
184 : /* Compute the selectivity of this clause in isolation */
857 dean.a.rasheed 185 271053 : s2 = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo,
186 : use_extended_stats);
187 :
188 : /*
189 : * Check for being passed a RestrictInfo.
190 : *
191 : * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
192 : * 0.0; just use that rather than looking for range pairs.
193 : */
7035 tgl 194 271053 : if (IsA(clause, RestrictInfo))
195 : {
196 270463 : rinfo = (RestrictInfo *) clause;
6126 197 270463 : if (rinfo->pseudoconstant)
198 : {
199 10483 : s1 = s1 * s2;
200 10483 : continue;
201 : }
7035 202 259980 : clause = (Node *) rinfo->clause;
203 : }
204 : else
205 590 : rinfo = NULL;
206 :
207 : /*
208 : * See if it looks like a restriction clause with a pseudoconstant on
209 : * one side. (Anything more complicated than that might not behave in
210 : * the simple way we are expecting.) Most of the tests here can be
211 : * done more efficiently with rinfo than without.
212 : */
6888 neilc 213 260570 : if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
214 : {
7423 tgl 215 222425 : OpExpr *expr = (OpExpr *) clause;
7035 216 222425 : bool varonleft = true;
217 : bool ok;
218 :
219 222425 : if (rinfo)
220 : {
69 tgl 221 GNC 362272 : ok = (rinfo->num_base_rels == 1) &&
7035 tgl 222 CBC 138196 : (is_pseudo_constant_clause_relids(lsecond(expr->args),
6385 bruce 223 2233 : rinfo->right_relids) ||
7035 tgl 224 2233 : (varonleft = false,
6385 bruce 225 2233 : is_pseudo_constant_clause_relids(linitial(expr->args),
226 : rinfo->left_relids)));
227 : }
228 : else
229 : {
808 tgl 230 1164 : ok = (NumRelids(root, clause) == 1) &&
7035 231 582 : (is_pseudo_constant_clause(lsecond(expr->args)) ||
7035 tgl 232 UBC 0 : (varonleft = false,
6892 neilc 233 0 : is_pseudo_constant_clause(linitial(expr->args))));
234 : }
235 :
7035 tgl 236 CBC 222425 : if (ok)
237 : {
238 : /*
239 : * If it's not a "<"/"<="/">"/">=" operator, just merge the
240 : * selectivity in generically. But if it's the right oprrest,
241 : * add the clause to rqlist for later processing.
242 : */
243 138623 : switch (get_oprrest(expr->opno))
244 : {
245 6255 : case F_SCALARLTSEL:
246 : case F_SCALARLESEL:
247 6255 : addRangeClause(&rqlist, clause,
248 : varonleft, true, s2);
249 6255 : break;
250 15408 : case F_SCALARGTSEL:
251 : case F_SCALARGESEL:
252 15408 : addRangeClause(&rqlist, clause,
253 : varonleft, false, s2);
254 15408 : break;
255 116960 : default:
256 : /* Just merge the selectivity in generically */
257 116960 : s1 = s1 * s2;
258 116960 : break;
259 : }
260 138623 : continue; /* drop to loop bottom */
261 : }
262 : }
263 :
264 : /* Not the right form, so treat it generically. */
8491 265 121947 : s1 = s1 * s2;
266 : }
267 :
268 : /*
269 : * Now scan the rangequery pair list.
270 : */
8476 271 472054 : while (rqlist != NULL)
272 : {
273 : RangeQueryClause *rqnext;
274 :
275 17398 : if (rqlist->have_lobound && rqlist->have_hibound)
276 4106 : {
277 : /* Successfully matched a pair of range clauses */
278 : Selectivity s2;
279 :
280 : /*
281 : * Exact equality to the default value probably means the
282 : * selectivity function punted. This is not airtight but should
283 : * be good enough.
284 : */
6725 285 4106 : if (rqlist->hibound == DEFAULT_INEQ_SEL ||
286 3094 : rqlist->lobound == DEFAULT_INEQ_SEL)
287 : {
288 1012 : s2 = DEFAULT_RANGE_INEQ_SEL;
289 : }
290 : else
291 : {
292 3094 : s2 = rqlist->hibound + rqlist->lobound - 1.0;
293 :
294 : /* Adjust for double-exclusion of NULLs */
5700 295 3094 : s2 += nulltestsel(root, IS_NULL, rqlist->var,
296 : varRelid, jointype, sjinfo);
297 :
298 : /*
299 : * A zero or slightly negative s2 should be converted into a
300 : * small positive value; we probably are dealing with a very
301 : * tight range and got a bogus result due to roundoff errors.
302 : * However, if s2 is very negative, then we probably have
303 : * default selectivity estimates on one or both sides of the
304 : * range that we failed to recognize above for some reason.
305 : */
6725 306 3094 : if (s2 <= 0.0)
307 : {
308 431 : if (s2 < -0.01)
309 : {
310 : /*
311 : * No data available --- use a default estimate that
312 : * is small, but not real small.
313 : */
314 6 : s2 = DEFAULT_RANGE_INEQ_SEL;
315 : }
316 : else
317 : {
318 : /*
319 : * It's just roundoff error; use a small positive
320 : * value
321 : */
322 425 : s2 = 1.0e-10;
323 : }
324 : }
325 : }
326 : /* Merge in the selectivity of the pair of clauses */
8417 327 4106 : s1 *= s2;
328 : }
329 : else
330 : {
331 : /* Only found one of a pair, merge it in generically */
8476 332 13292 : if (rqlist->have_lobound)
333 11125 : s1 *= rqlist->lobound;
334 : else
335 2167 : s1 *= rqlist->hibound;
336 : }
337 : /* release storage and advance */
338 17398 : rqnext = rqlist->next;
339 17398 : pfree(rqlist);
340 17398 : rqlist = rqnext;
341 : }
342 :
8491 343 454656 : return s1;
344 : }
345 :
346 : /*
347 : * clauselist_selectivity_or -
348 : * Compute the selectivity of an implicitly-ORed list of boolean
349 : * expression clauses. The list can be empty, in which case 0.0
350 : * must be returned. List elements may be either RestrictInfos
351 : * or bare expression clauses --- the former is preferred since
352 : * it allows caching of results.
353 : *
354 : * See clause_selectivity() for the meaning of the additional parameters.
355 : *
356 : * The basic approach is to apply extended statistics first, on as many
357 : * clauses as possible, in order to capture cross-column dependencies etc.
358 : * The remaining clauses are then estimated as if they were independent.
359 : */
360 : static Selectivity
857 dean.a.rasheed 361 5132 : clauselist_selectivity_or(PlannerInfo *root,
362 : List *clauses,
363 : int varRelid,
364 : JoinType jointype,
365 : SpecialJoinInfo *sjinfo,
366 : bool use_extended_stats)
367 : {
368 5132 : Selectivity s1 = 0.0;
369 : RelOptInfo *rel;
370 5132 : Bitmapset *estimatedclauses = NULL;
371 : ListCell *lc;
372 : int listidx;
373 :
374 : /*
375 : * Determine if these clauses reference a single relation. If so, and if
376 : * it has extended statistics, try to apply those.
377 : */
378 5132 : rel = find_single_rel_for_clauses(root, clauses);
379 5132 : if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
380 : {
381 : /*
382 : * Estimate as many clauses as possible using extended statistics.
383 : *
384 : * 'estimatedclauses' is populated with the 0-based list position
385 : * index of clauses estimated here, and that should be ignored below.
386 : */
387 54 : s1 = statext_clauselist_selectivity(root, clauses, varRelid,
388 : jointype, sjinfo, rel,
389 : &estimatedclauses, true);
390 : }
391 :
392 : /*
393 : * Estimate the remaining clauses as if they were independent.
394 : *
395 : * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account
396 : * for the probable overlap of selected tuple sets.
397 : *
398 : * XXX is this too conservative?
399 : */
400 5132 : listidx = -1;
401 17065 : foreach(lc, clauses)
402 : {
403 : Selectivity s2;
404 :
405 11933 : listidx++;
406 :
407 : /*
408 : * Skip this clause if it's already been estimated by some other
409 : * statistics above.
410 : */
411 11933 : if (bms_is_member(listidx, estimatedclauses))
412 120 : continue;
413 :
414 11813 : s2 = clause_selectivity_ext(root, (Node *) lfirst(lc), varRelid,
415 : jointype, sjinfo, use_extended_stats);
416 :
417 11813 : s1 = s1 + s2 - s1 * s2;
418 : }
419 :
420 5132 : return s1;
421 : }
422 :
423 : /*
424 : * addRangeClause --- add a new range clause for clauselist_selectivity
425 : *
426 : * Here is where we try to match up pairs of range-query clauses
427 : */
428 : static void
8476 tgl 429 21663 : addRangeClause(RangeQueryClause **rqlist, Node *clause,
430 : bool varonleft, bool isLTsel, Selectivity s2)
431 : {
432 : RangeQueryClause *rqelem;
433 : Node *var;
434 : bool is_lobound;
435 :
7994 436 21663 : if (varonleft)
437 : {
7389 438 21558 : var = get_leftop((Expr *) clause);
8397 bruce 439 21558 : is_lobound = !isLTsel; /* x < something is high bound */
440 : }
441 : else
442 : {
7389 tgl 443 105 : var = get_rightop((Expr *) clause);
8476 444 105 : is_lobound = isLTsel; /* something < x is low bound */
445 : }
446 :
447 22281 : for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
448 : {
449 : /*
450 : * We use full equal() here because the "var" might be a function of
451 : * one or more attributes of the same relation...
452 : */
8397 bruce 453 4883 : if (!equal(var, rqelem->var))
8476 tgl 454 618 : continue;
455 : /* Found the right group to put this clause in */
456 4265 : if (is_lobound)
457 : {
8397 bruce 458 156 : if (!rqelem->have_lobound)
459 : {
8476 tgl 460 66 : rqelem->have_lobound = true;
461 66 : rqelem->lobound = s2;
462 : }
463 : else
464 : {
465 :
466 : /*------
467 : * We have found two similar clauses, such as
468 : * x < y AND x <= z.
469 : * Keep only the more restrictive one.
470 : *------
471 : */
472 90 : if (rqelem->lobound > s2)
8476 tgl 473 UBC 0 : rqelem->lobound = s2;
474 : }
475 : }
476 : else
477 : {
8397 bruce 478 CBC 4109 : if (!rqelem->have_hibound)
479 : {
8476 tgl 480 4040 : rqelem->have_hibound = true;
481 4040 : rqelem->hibound = s2;
482 : }
483 : else
484 : {
485 :
486 : /*------
487 : * We have found two similar clauses, such as
488 : * x > y AND x >= z.
489 : * Keep only the more restrictive one.
490 : *------
491 : */
492 69 : if (rqelem->hibound > s2)
493 18 : rqelem->hibound = s2;
494 : }
495 : }
496 4265 : return;
497 : }
498 :
499 : /* No matching var found, so make a new clause-pair data structure */
500 17398 : rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
501 17398 : rqelem->var = var;
502 17398 : if (is_lobound)
503 : {
504 15165 : rqelem->have_lobound = true;
505 15165 : rqelem->have_hibound = false;
506 15165 : rqelem->lobound = s2;
507 : }
508 : else
509 : {
510 2233 : rqelem->have_lobound = false;
511 2233 : rqelem->have_hibound = true;
512 2233 : rqelem->hibound = s2;
513 : }
514 17398 : rqelem->next = *rqlist;
515 17398 : *rqlist = rqelem;
516 : }
517 :
518 : /*
519 : * find_single_rel_for_clauses
520 : * Examine each clause in 'clauses' and determine if all clauses
521 : * reference only a single relation. If so return that relation,
522 : * otherwise return NULL.
523 : */
524 : static RelOptInfo *
2192 525 460840 : find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
526 : {
527 460840 : int lastrelid = 0;
528 : ListCell *l;
529 :
2194 simon 530 617942 : foreach(l, clauses)
531 : {
532 226345 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
533 : int relid;
534 :
535 : /*
536 : * If we have a list of bare clauses rather than RestrictInfos, we
537 : * could pull out their relids the hard way with pull_varnos().
538 : * However, currently the extended-stats machinery won't do anything
539 : * with non-RestrictInfo clauses anyway, so there's no point in
540 : * spending extra cycles; just fail if that's what we have.
541 : *
542 : * An exception to that rule is if we have a bare BoolExpr AND clause.
543 : * We treat this as a special case because the restrictinfo machinery
544 : * doesn't build RestrictInfos on top of AND clauses.
545 : */
852 dean.a.rasheed 546 226345 : if (is_andclause(rinfo))
547 894 : {
548 : RelOptInfo *rel;
549 :
550 1052 : rel = find_single_rel_for_clauses(root,
551 : ((BoolExpr *) rinfo)->args);
552 :
553 1052 : if (rel == NULL)
554 69243 : return NULL;
555 894 : if (lastrelid == 0)
556 433 : lastrelid = rel->relid;
557 461 : else if (rel->relid != lastrelid)
852 dean.a.rasheed 558 UBC 0 : return NULL;
559 :
852 dean.a.rasheed 560 CBC 11482 : continue;
561 : }
562 :
2192 tgl 563 225293 : if (!IsA(rinfo, RestrictInfo))
564 511 : return NULL;
565 :
566 224782 : if (bms_is_empty(rinfo->clause_relids))
567 10588 : continue; /* we can ignore variable-free clauses */
568 214194 : if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
569 67412 : return NULL; /* multiple relations in this clause */
570 146782 : if (lastrelid == 0)
571 72125 : lastrelid = relid; /* first clause referencing a relation */
572 74657 : else if (relid != lastrelid)
573 1162 : return NULL; /* relation not same as last one */
574 : }
575 :
2194 simon 576 391597 : if (lastrelid != 0)
577 58638 : return find_base_rel(root, lastrelid);
578 :
2192 tgl 579 332959 : return NULL; /* no clauses */
580 : }
581 :
582 : /*
583 : * treat_as_join_clause -
584 : * Decide whether an operator clause is to be handled by the
585 : * restriction or join estimator. Subroutine for clause_selectivity().
586 : */
587 : static inline bool
808 tgl 588 GIC 348021 : treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo,
589 : int varRelid, SpecialJoinInfo *sjinfo)
590 : {
5349 591 348021 : if (varRelid != 0)
592 : {
593 : /*
594 : * Caller is forcing restriction mode (eg, because we are examining an
595 : * inner indexscan qual).
596 : */
597 127699 : return false;
5349 tgl 598 ECB : }
5349 tgl 599 CBC 220322 : else if (sjinfo == NULL)
600 : {
5349 tgl 601 ECB : /*
602 : * It must be a restriction clause, since it's being evaluated at a
603 : * scan node.
604 : */
5349 tgl 605 GIC 132976 : return false;
606 : }
607 : else
608 : {
609 : /*
610 : * Otherwise, it's a join if there's more than one base relation used.
611 : * We can optimize this calculation if an rinfo was passed.
612 : *
613 : * XXX Since we know the clause is being evaluated at a join, the
614 : * only way it could be single-relation is if it was delayed by outer
615 : * joins. We intentionally count only baserels here, not OJs that
616 : * might be present in rinfo->clause_relids, so that we direct such
617 : * cases to the restriction qual estimators not join estimators.
618 : * Eventually some notice should be taken of the possibility of
619 : * injected nulls, but we'll likely want to do that in the restriction
620 : * estimators rather than starting to treat such cases as join quals.
621 : */
622 87346 : if (rinfo)
69 tgl 623 GNC 87118 : return (rinfo->num_base_rels > 1);
624 : else
808 tgl 625 GIC 228 : return (NumRelids(root, clause) > 1);
626 : }
627 : }
628 :
629 :
630 : /*
631 : * clause_selectivity -
632 : * Compute the selectivity of a general boolean expression clause.
633 : *
634 : * The clause can be either a RestrictInfo or a plain expression. If it's
635 : * a RestrictInfo, we try to cache the selectivity for possible re-use,
636 : * so passing RestrictInfos is preferred.
637 : *
638 : * varRelid is either 0 or a rangetable index.
639 : *
640 : * When varRelid is not 0, only variables belonging to that relation are
641 : * considered in computing selectivity; other vars are treated as constants
642 : * of unknown values. This is appropriate for estimating the selectivity of
643 : * a join clause that is being used as a restriction clause in a scan of a
644 : * nestloop join's inner relation --- varRelid should then be the ID of the
645 : * inner relation.
646 : *
647 : * When varRelid is 0, all variables are treated as variables. This
8477 tgl 648 ECB : * is appropriate for ordinary join clauses and restriction clauses.
649 : *
650 : * jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
651 : * if the clause isn't a join clause.
652 : *
653 : * sjinfo is NULL for a non-join clause, otherwise it provides additional
3260 bruce 654 : * context information about the join being performed. There are some
655 : * special cases:
656 : * 1. For a special (not INNER) join, sjinfo is always a member of
657 : * root->join_info_list.
658 : * 2. For an INNER join, sjinfo is just a transient struct, and only the
659 : * relids and jointype fields in it can be trusted.
660 : * It is possible for jointype to be different from sjinfo->jointype.
661 : * This indicates we are considering a variant join: either with
662 : * the LHS and RHS switched, or with one input unique-ified.
663 : *
664 : * Note: when passing nonzero varRelid, it's normally appropriate to set
5351 tgl 665 : * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
666 : * join clause; because we aren't treating it as a join clause.
667 : */
668 : Selectivity
6517 tgl 669 GIC 186281 : clause_selectivity(PlannerInfo *root,
670 : Node *clause,
671 : int varRelid,
5351 tgl 672 ECB : JoinType jointype,
2194 simon 673 : SpecialJoinInfo *sjinfo)
857 dean.a.rasheed 674 : {
857 dean.a.rasheed 675 GIC 186281 : return clause_selectivity_ext(root, clause, varRelid,
857 dean.a.rasheed 676 ECB : jointype, sjinfo, true);
857 dean.a.rasheed 677 EUB : }
678 :
857 dean.a.rasheed 679 ECB : /*
680 : * clause_selectivity_ext -
681 : * Extended version of clause_selectivity(). If "use_extended_stats" is
682 : * false, all extended statistics will be ignored, and only per-column
683 : * statistics will be used.
684 : */
685 : Selectivity
857 dean.a.rasheed 686 GIC 980847 : clause_selectivity_ext(PlannerInfo *root,
687 : Node *clause,
688 : int varRelid,
689 : JoinType jointype,
690 : SpecialJoinInfo *sjinfo,
857 dean.a.rasheed 691 ECB : bool use_extended_stats)
692 : {
5567 tgl 693 CBC 980847 : Selectivity s1 = 0.5; /* default for any unhandled clause type */
7035 694 980847 : RestrictInfo *rinfo = NULL;
7035 tgl 695 GIC 980847 : bool cacheable = false;
696 :
697 980847 : if (clause == NULL) /* can this still happen? */
8660 tgl 698 UIC 0 : return s1;
699 :
7035 tgl 700 GIC 980847 : if (IsA(clause, RestrictInfo))
701 : {
702 973443 : rinfo = (RestrictInfo *) clause;
703 :
704 : /*
705 : * If the clause is marked pseudoconstant, then it will be used as a
706 : * gating qual and should not affect selectivity estimates; hence
707 : * return 1.0. The only exception is that a constant FALSE may be
6031 bruce 708 ECB : * taken as having selectivity 0.0, since it will surely mean no rows
709 : * out of the plan. This case is simple enough that we need not
710 : * bother caching the result.
6126 tgl 711 : */
6126 tgl 712 GIC 973443 : if (rinfo->pseudoconstant)
713 : {
6031 bruce 714 CBC 10635 : if (!IsA(rinfo->clause, Const))
5567 tgl 715 GIC 10529 : return (Selectivity) 1.0;
6126 tgl 716 ECB : }
717 :
7035 718 : /*
719 : * If possible, cache the result of the selectivity calculation for
720 : * the clause. We can cache if varRelid is zero or the clause
721 : * contains only vars of that relid --- otherwise varRelid will affect
722 : * the result, so mustn't cache. Outer join quals might be examined
723 : * with either their join's actual jointype or JOIN_INNER, so we need
724 : * two cache variables to remember both cases. Note: we assume the
725 : * result won't change if we are switching the input relations or
5175 726 : * considering a unique-ified case, so we only need one cache variable
727 : * for all non-JOIN_INNER cases.
728 : */
7035 tgl 729 CBC 962914 : if (varRelid == 0 ||
69 tgl 730 GNC 374621 : rinfo->num_base_rels == 0 ||
731 626825 : (rinfo->num_base_rels == 1 &&
732 252204 : bms_is_member(varRelid, rinfo->clause_relids)))
733 : {
5351 tgl 734 ECB : /* Cacheable --- do we already have the result? */
5175 tgl 735 GIC 839629 : if (jointype == JOIN_INNER)
5175 tgl 736 ECB : {
5175 tgl 737 GIC 713070 : if (rinfo->norm_selec >= 0)
738 507456 : return rinfo->norm_selec;
739 : }
740 : else
741 : {
5175 tgl 742 CBC 126559 : if (rinfo->outer_selec >= 0)
743 81084 : return rinfo->outer_selec;
744 : }
5351 tgl 745 GIC 251089 : cacheable = true;
7035 tgl 746 ECB : }
747 :
748 : /*
6389 749 : * Proceed with examination of contained clause. If the clause is an
750 : * OR-clause, we want to look at the variant with sub-RestrictInfos,
751 : * so that per-subclause selectivities can be cached.
752 : */
6389 tgl 753 GIC 374374 : if (rinfo->orclause)
6389 tgl 754 CBC 5112 : clause = (Node *) rinfo->orclause;
6389 tgl 755 ECB : else
6389 tgl 756 GIC 369262 : clause = (Node *) rinfo->clause;
7035 tgl 757 ECB : }
758 :
8660 tgl 759 GIC 381778 : if (IsA(clause, Var))
9345 bruce 760 ECB : {
8201 tgl 761 GIC 15206 : Var *var = (Var *) clause;
8397 bruce 762 ECB :
763 : /*
764 : * We probably shouldn't ever see an uplevel Var here, but if we do,
6385 bruce 765 EUB : * return the default selectivity...
766 : */
8201 tgl 767 GBC 15206 : if (var->varlevelsup == 0 &&
768 266 : (varRelid == 0 || varRelid == (int) var->varno))
769 : {
770 : /* Use the restriction selectivity function for a bool Var */
2754 tgl 771 GIC 15085 : s1 = boolvarsel(root, (Node *) var, varRelid);
772 : }
773 : }
8660 774 366572 : else if (IsA(clause, Const))
8660 tgl 775 ECB : {
776 : /* bool constant is pretty easy... */
6031 bruce 777 GIC 859 : Const *con = (Const *) clause;
6126 tgl 778 ECB :
6126 tgl 779 CBC 1663 : s1 = con->constisnull ? 0.0 :
6126 tgl 780 GIC 804 : DatumGetBool(con->constvalue) ? 1.0 : 0.0;
781 : }
6876 782 365713 : else if (IsA(clause, Param))
783 : {
784 : /* see if we can replace the Param */
5893 tgl 785 CBC 12 : Node *subst = estimate_expression_value(root, clause);
786 :
6876 tgl 787 GIC 12 : if (IsA(subst, Const))
6876 tgl 788 ECB : {
789 : /* bool constant is pretty easy... */
6031 bruce 790 UIC 0 : Const *con = (Const *) subst;
791 :
6126 tgl 792 0 : s1 = con->constisnull ? 0.0 :
793 0 : DatumGetBool(con->constvalue) ? 1.0 : 0.0;
794 : }
6876 tgl 795 ECB : else
796 : {
797 : /* XXX any way to do better than default? */
798 : }
799 : }
1531 tgl 800 GIC 365701 : else if (is_notclause(clause))
8660 tgl 801 ECB : {
802 : /* inverse of the selectivity of the underlying clause */
857 dean.a.rasheed 803 GIC 3187 : s1 = 1.0 - clause_selectivity_ext(root,
804 3187 : (Node *) get_notclausearg((Expr *) clause),
805 : varRelid,
806 : jointype,
807 : sjinfo,
857 dean.a.rasheed 808 ECB : use_extended_stats);
8660 tgl 809 : }
1531 tgl 810 CBC 362514 : else if (is_andclause(clause))
8660 tgl 811 ECB : {
812 : /* share code with clauselist_selectivity() */
857 dean.a.rasheed 813 CBC 1302 : s1 = clauselist_selectivity_ext(root,
814 : ((BoolExpr *) clause)->args,
815 : varRelid,
857 dean.a.rasheed 816 ECB : jointype,
817 : sjinfo,
818 : use_extended_stats);
819 : }
1531 tgl 820 GIC 361212 : else if (is_orclause(clause))
821 : {
822 : /*
823 : * Almost the same thing as clauselist_selectivity, but with the
824 : * clauses connected by OR.
8660 tgl 825 ECB : */
857 dean.a.rasheed 826 GIC 5132 : s1 = clauselist_selectivity_or(root,
827 : ((BoolExpr *) clause)->args,
828 : varRelid,
829 : jointype,
830 : sjinfo,
831 : use_extended_stats);
832 : }
5567 tgl 833 356080 : else if (is_opclause(clause) || IsA(clause, DistinctExpr))
9345 bruce 834 335455 : {
3927 tgl 835 335467 : OpExpr *opclause = (OpExpr *) clause;
836 335467 : Oid opno = opclause->opno;
9345 bruce 837 ECB :
808 tgl 838 CBC 335467 : if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo))
839 : {
8477 tgl 840 ECB : /* Estimate selectivity for a join clause. */
7836 bruce 841 GIC 83942 : s1 = join_selectivity(root, opno,
3927 tgl 842 ECB : opclause->args,
843 : opclause->inputcollid,
844 : jointype,
5349 845 : sjinfo);
846 : }
847 : else
848 : {
8477 849 : /* Estimate selectivity for a restriction clause. */
7836 bruce 850 GIC 251525 : s1 = restriction_selectivity(root, opno,
851 : opclause->args,
852 : opclause->inputcollid,
853 : varRelid);
854 : }
5567 tgl 855 ECB :
856 : /*
857 : * DistinctExpr has the same representation as OpExpr, but the
858 : * contained operator is "=" not "<>", so we must negate the result.
859 : * This estimation method doesn't give the right behavior for nulls,
860 : * but it's better than doing nothing.
861 : */
5567 tgl 862 GIC 335455 : if (IsA(clause, DistinctExpr))
863 321 : s1 = 1.0 - s1;
864 : }
1520 865 20613 : else if (is_funcclause(clause))
1520 tgl 866 ECB : {
1520 tgl 867 GIC 4672 : FuncExpr *funcclause = (FuncExpr *) clause;
868 :
1520 tgl 869 ECB : /* Try to get an estimate from the support function, if any */
1520 tgl 870 GIC 4672 : s1 = function_selectivity(root,
871 : funcclause->funcid,
872 : funcclause->args,
873 : funcclause->inputcollid,
808 874 4672 : treat_as_join_clause(root, clause, rinfo,
1520 tgl 875 ECB : varRelid, sjinfo),
876 : varRelid,
877 : jointype,
878 : sjinfo);
879 : }
6344 tgl 880 CBC 15941 : else if (IsA(clause, ScalarArrayOpExpr))
881 : {
882 : /* Use node specific selectivity calculation function */
6344 tgl 883 GIC 7882 : s1 = scalararraysel(root,
884 : (ScalarArrayOpExpr *) clause,
808 tgl 885 CBC 7882 : treat_as_join_clause(root, clause, rinfo,
886 : varRelid, sjinfo),
887 : varRelid,
5351 tgl 888 ECB : jointype,
889 : sjinfo);
6344 890 : }
6294 tgl 891 GIC 8059 : else if (IsA(clause, RowCompareExpr))
892 : {
893 : /* Use node specific selectivity calculation function */
894 78 : s1 = rowcomparesel(root,
6294 tgl 895 ECB : (RowCompareExpr *) clause,
896 : varRelid,
897 : jointype,
5351 898 : sjinfo);
6294 899 : }
7958 tgl 900 GIC 7981 : else if (IsA(clause, NullTest))
7958 tgl 901 ECB : {
902 : /* Use node specific selectivity calculation function */
7477 tgl 903 GIC 6732 : s1 = nulltestsel(root,
7477 tgl 904 ECB : ((NullTest *) clause)->nulltesttype,
7423 tgl 905 GIC 6732 : (Node *) ((NullTest *) clause)->arg,
906 : varRelid,
5351 tgl 907 EUB : jointype,
908 : sjinfo);
909 : }
7958 tgl 910 GIC 1249 : else if (IsA(clause, BooleanTest))
911 : {
912 : /* Use node specific selectivity calculation function */
7477 913 96 : s1 = booltestsel(root,
7477 tgl 914 ECB : ((BooleanTest *) clause)->booltesttype,
7423 tgl 915 GIC 96 : (Node *) ((BooleanTest *) clause)->arg,
916 : varRelid,
5351 tgl 917 EUB : jointype,
918 : sjinfo);
919 : }
5781 tgl 920 GIC 1153 : else if (IsA(clause, CurrentOfExpr))
921 : {
922 : /* CURRENT OF selects at most one row of its table */
923 197 : CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
924 197 : RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
925 :
926 197 : if (crel->tuples > 0)
927 190 : s1 = 1.0 / crel->tuples;
928 : }
8274 929 956 : else if (IsA(clause, RelabelType))
930 : {
931 : /* Not sure this case is needed, but it can't hurt */
857 dean.a.rasheed 932 UIC 0 : s1 = clause_selectivity_ext(root,
857 dean.a.rasheed 933 LBC 0 : (Node *) ((RelabelType *) clause)->arg,
934 : varRelid,
935 : jointype,
936 : sjinfo,
857 dean.a.rasheed 937 ECB : use_extended_stats);
938 : }
7370 tgl 939 CBC 956 : else if (IsA(clause, CoerceToDomain))
7370 tgl 940 ECB : {
941 : /* Not sure this case is needed, but it can't hurt */
857 dean.a.rasheed 942 LBC 0 : s1 = clause_selectivity_ext(root,
857 dean.a.rasheed 943 UIC 0 : (Node *) ((CoerceToDomain *) clause)->arg,
944 : varRelid,
945 : jointype,
946 : sjinfo,
947 : use_extended_stats);
948 : }
2754 tgl 949 ECB : else
950 : {
951 : /*
952 : * For anything else, see if we can consider it as a boolean variable.
953 : * This only works if it's an immutable expression in Vars of a single
954 : * relation; but there's no point in us checking that here because
955 : * boolvarsel() will do it internally, and return a suitable default
956 : * selectivity if not.
957 : */
2754 tgl 958 GIC 956 : s1 = boolvarsel(root, clause, varRelid);
959 : }
960 :
961 : /* Cache the result if possible */
7035 962 381766 : if (cacheable)
963 : {
5175 964 251077 : if (jointype == JOIN_INNER)
965 205602 : rinfo->norm_selec = s1;
966 : else
967 45475 : rinfo->outer_selec = s1;
968 : }
969 :
970 : #ifdef SELECTIVITY_DEBUG
971 : elog(DEBUG4, "clause_selectivity: s1 %f", s1);
972 : #endif /* SELECTIVITY_DEBUG */
973 :
8660 974 381766 : return s1;
975 : }
|