Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * analyzejoins.c
4 : * Routines for simplifying joins after initial query analysis
5 : *
6 : * While we do a great deal of join simplification in prep/prepjointree.c,
7 : * certain optimizations cannot be performed at that stage for lack of
8 : * detailed information about the query. The routines here are invoked
9 : * after initsplan.c has done its work, and can do additional join removal
10 : * and simplification steps based on the information extracted. The penalty
11 : * is that we have to work harder to clean up after ourselves when we modify
12 : * the query, since the derived data structures have to be updated too.
13 : *
14 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
15 : * Portions Copyright (c) 1994, Regents of the University of California
16 : *
17 : *
18 : * IDENTIFICATION
19 : * src/backend/optimizer/plan/analyzejoins.c
20 : *
21 : *-------------------------------------------------------------------------
22 : */
23 : #include "postgres.h"
24 :
25 : #include "nodes/nodeFuncs.h"
26 : #include "optimizer/clauses.h"
27 : #include "optimizer/joininfo.h"
28 : #include "optimizer/optimizer.h"
29 : #include "optimizer/pathnode.h"
30 : #include "optimizer/paths.h"
31 : #include "optimizer/planmain.h"
32 : #include "optimizer/restrictinfo.h"
33 : #include "optimizer/tlist.h"
34 : #include "utils/lsyscache.h"
35 :
36 : /* local functions */
37 : static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
38 : static void remove_rel_from_query(PlannerInfo *root, int relid, int ojrelid,
39 : Relids joinrelids);
40 : static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
41 : int relid, int ojrelid);
42 : static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
43 : static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
44 : static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
45 : List *clause_list);
46 : static Oid distinct_col_search(int colno, List *colnos, List *opids);
47 : static bool is_innerrel_unique_for(PlannerInfo *root,
48 : Relids joinrelids,
49 : Relids outerrelids,
50 : RelOptInfo *innerrel,
51 : JoinType jointype,
52 : List *restrictlist);
53 :
54 :
55 : /*
56 : * remove_useless_joins
57 : * Check for relations that don't actually need to be joined at all,
58 : * and remove them from the query.
59 : *
60 : * We are passed the current joinlist and return the updated list. Other
61 : * data structures that have to be updated are accessible via "root".
62 : */
63 : List *
4760 tgl 64 GIC 128144 : remove_useless_joins(PlannerInfo *root, List *joinlist)
65 : {
66 : ListCell *lc;
4760 tgl 67 ECB :
68 : /*
69 : * We are only interested in relations that are left-joined to, so we can
70 : * scan the join_info_list to find them easily.
71 : */
4760 tgl 72 GIC 132116 : restart:
73 148432 : foreach(lc, root->join_info_list)
74 : {
4760 tgl 75 CBC 20288 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
76 : Relids joinrelids;
4660 bruce 77 ECB : int innerrelid;
78 : int nremoved;
4760 tgl 79 :
80 : /* Skip if not removable */
4760 tgl 81 GIC 20288 : if (!join_is_removable(root, sjinfo))
82 16316 : continue;
83 :
84 : /*
4760 tgl 85 ECB : * Currently, join_is_removable can only succeed when the sjinfo's
86 : * righthand is a single baserel. Remove that rel from the query and
87 : * joinlist.
88 : */
4760 tgl 89 GIC 3972 : innerrelid = bms_singleton_member(sjinfo->min_righthand);
90 :
91 : /* Compute the relid set for the join we are considering */
69 tgl 92 GNC 3972 : joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
93 3972 : if (sjinfo->ojrelid != 0)
94 3972 : joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid);
95 :
96 3972 : remove_rel_from_query(root, innerrelid, sjinfo->ojrelid, joinrelids);
97 :
98 : /* We verify that exactly one reference gets removed from joinlist */
4760 tgl 99 CBC 3972 : nremoved = 0;
100 3972 : joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
101 3972 : if (nremoved != 1)
4760 tgl 102 UIC 0 : elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
4760 tgl 103 ECB :
104 : /*
105 : * We can delete this SpecialJoinInfo from the list too, since it's no
1364 106 : * longer of interest. (Since we'll restart the foreach loop
107 : * immediately, we don't bother with foreach_delete_current.)
4760 108 : */
1364 tgl 109 GBC 3972 : root->join_info_list = list_delete_cell(root->join_info_list, lc);
110 :
111 : /*
112 : * Restart the scan. This is necessary to ensure we find all
113 : * removable joins independently of ordering of the join_info_list
114 : * (note that removal of attr_needed bits may make a join appear
115 : * removable that did not before).
4760 tgl 116 ECB : */
4760 tgl 117 GIC 3972 : goto restart;
118 : }
119 :
120 128144 : return joinlist;
121 : }
122 :
123 : /*
4760 tgl 124 ECB : * clause_sides_match_join
125 : * Determine whether a join clause is of the right form to use in this join.
126 : *
127 : * We already know that the clause is a binary opclause referencing only the
128 : * rels in the current join. The point here is to check whether it has the
129 : * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
130 : * rather than mixing outer and inner vars on either side. If it matches,
131 : * we set the transient flag outer_is_left to identify which side is which.
132 : */
133 : static inline bool
4760 tgl 134 GIC 75027 : clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids,
135 : Relids innerrelids)
136 : {
137 114211 : if (bms_is_subset(rinfo->left_relids, outerrelids) &&
138 39184 : bms_is_subset(rinfo->right_relids, innerrelids))
139 : {
140 : /* lefthand side is outer */
4760 tgl 141 CBC 39184 : rinfo->outer_is_left = true;
4760 tgl 142 GIC 39184 : return true;
143 : }
4760 tgl 144 CBC 71686 : else if (bms_is_subset(rinfo->left_relids, innerrelids) &&
145 35843 : bms_is_subset(rinfo->right_relids, outerrelids))
146 : {
147 : /* righthand side is outer */
148 35843 : rinfo->outer_is_left = false;
149 35843 : return true;
150 : }
4760 tgl 151 LBC 0 : return false; /* no good for these input relations */
4760 tgl 152 ECB : }
153 :
154 : /*
155 : * join_is_removable
156 : * Check whether we need not perform this special join at all, because
157 : * it will just duplicate its left input.
4760 tgl 158 EUB : *
159 : * This is true for a left join for which the join condition cannot match
160 : * more than one inner-side row. (There are other possibly interesting
161 : * cases, but we don't have the infrastructure to prove them.) We also
162 : * have to check that the inner side doesn't generate any variables needed
163 : * above the join.
164 : */
165 : static bool
4760 tgl 166 GIC 20288 : join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
167 : {
168 : int innerrelid;
169 : RelOptInfo *innerrel;
170 : Relids inputrelids;
171 : Relids joinrelids;
172 20288 : List *clause_list = NIL;
173 : ListCell *l;
4760 tgl 174 ECB : int attroff;
175 :
176 : /*
177 : * Must be a left join to a single baserel, else we aren't going to be
178 : * able to do anything with it.
179 : */
69 tgl 180 GNC 20288 : if (sjinfo->jointype != JOIN_LEFT)
3054 tgl 181 GIC 3244 : return false;
182 :
183 17044 : if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
4760 184 792 : return false;
185 :
186 : /*
48 tgl 187 ECB : * Never try to eliminate a left join to the query result rel. Although
188 : * the case is syntactically impossible in standard SQL, MERGE will build
189 : * a join tree that looks exactly like that.
190 : */
48 tgl 191 CBC 16252 : if (innerrelid == root->parse->resultRelation)
48 tgl 192 GIC 277 : return false;
193 :
4760 194 15975 : innerrel = find_base_rel(root, innerrelid);
195 :
196 : /*
197 : * Before we go to the effort of checking whether any innerrel variables
3190 tgl 198 ECB : * are needed above the join, make a quick check to eliminate cases in
199 : * which we will surely be unable to prove uniqueness of the innerrel.
200 : */
2558 tgl 201 CBC 15975 : if (!rel_supports_distinctness(root, innerrel))
2558 tgl 202 GIC 1057 : return false;
203 :
204 : /* Compute the relid set for the join we are considering */
64 tgl 205 GNC 14918 : inputrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
206 14918 : Assert(sjinfo->ojrelid != 0);
207 14918 : joinrelids = bms_copy(inputrelids);
208 14918 : joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid);
209 :
210 : /*
4760 tgl 211 ECB : * We can't remove the join if any inner-rel attributes are used above the
212 : * join. Here, "above" the join includes pushed-down conditions, so we
213 : * should reject if attr_needed includes the OJ's own relid; therefore,
214 : * compare to inputrelids not joinrelids.
215 : *
216 : * As a micro-optimization, it seems better to start with max_attr and
217 : * count down rather than starting with min_attr and counting up, on the
218 : * theory that the system attributes are somewhat less likely to be wanted
219 : * and should be tested last.
220 : */
4760 tgl 221 GIC 14918 : for (attroff = innerrel->max_attr - innerrel->min_attr;
222 203119 : attroff >= 0;
223 188201 : attroff--)
224 : {
64 tgl 225 GNC 199114 : if (!bms_is_subset(innerrel->attr_needed[attroff], inputrelids))
4760 tgl 226 GIC 10913 : return false;
227 : }
228 :
4760 tgl 229 ECB : /*
4579 230 : * Similarly check that the inner rel isn't needed by any PlaceHolderVars
231 : * that will be used above the join. The PHV case is a little bit more
232 : * complicated, because PHVs may have been assigned a ph_eval_at location
233 : * that includes the innerrel, yet their contained expression might not
234 : * actually reference the innerrel (it could be just a constant, for
235 : * instance). If such a PHV is due to be evaluated above the join then it
236 : * needn't prevent join removal.
237 : */
4760 tgl 238 GIC 4026 : foreach(l, root->placeholder_list)
239 : {
240 30 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
241 :
3522 242 30 : if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
243 9 : return false; /* it references innerrel laterally */
4579 tgl 244 CBC 30 : if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
4579 tgl 245 GIC 12 : continue; /* it definitely doesn't reference innerrel */
62 tgl 246 GNC 18 : if (bms_is_subset(phinfo->ph_needed, inputrelids))
247 3 : continue; /* PHV is not used above the join */
248 15 : if (!bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at))
249 9 : return false; /* it has to be evaluated below the join */
250 :
251 : /*
252 : * We need to be sure there will still be a place to evaluate the PHV
253 : * if we remove the join, ie that ph_eval_at wouldn't become empty.
254 : */
255 6 : if (!bms_overlap(sjinfo->min_lefthand, phinfo->ph_eval_at))
3522 tgl 256 UIC 0 : return false; /* there isn't any other place to eval PHV */
257 : /* Check contained expression last, since this is a bit expensive */
808 tgl 258 CBC 6 : if (bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
4579 259 6 : innerrel->relids))
62 tgl 260 UNC 0 : return false; /* contained expression references innerrel */
4760 tgl 261 ECB : }
262 :
263 : /*
264 : * Search for mergejoinable clauses that constrain the inner rel against
265 : * either the outer rel or a pseudoconstant. If an operator is
266 : * mergejoinable then it behaves like equality for some btree opclass, so
267 : * it's what we want. The mergejoinability test also eliminates clauses
268 : * containing volatile functions, which we couldn't depend on.
269 : */
4760 tgl 270 GIC 8113 : foreach(l, innerrel->joininfo)
4760 tgl 271 ECB : {
4760 tgl 272 GBC 4117 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
273 :
274 : /*
275 : * If the current join commutes with some other outer join(s) via
276 : * outer join identity 3, there will be multiple clones of its join
277 : * clauses in the joininfo list. We want to consider only the
278 : * has_clone form of such clauses. Processing more than one form
279 : * would be wasteful, and also some of the others would confuse the
280 : * RINFO_IS_PUSHED_DOWN test below.
281 : */
69 tgl 282 GNC 4117 : if (restrictinfo->is_clone)
283 43 : continue; /* ignore it */
284 :
4760 tgl 285 ECB : /*
4590 286 : * If it's not a join clause for this outer join, we can't use it.
4590 tgl 287 EUB : * Note that if the clause is pushed-down, then it is logically from
288 : * above the outer join, even if it references no other rels (it might
289 : * be from WHERE, for example).
290 : */
1815 tgl 291 GIC 4074 : if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
64 tgl 292 GNC 39 : continue; /* ignore; not useful here */
293 :
294 : /* Ignore if it's not a mergejoinable clause */
4760 tgl 295 GIC 4035 : if (!restrictinfo->can_join ||
296 4029 : restrictinfo->mergeopfamilies == NIL)
297 6 : continue; /* not mergejoinable */
298 :
4760 tgl 299 ECB : /*
2558 300 : * Check if clause has the form "outer op inner" or "inner op outer",
301 : * and if so mark which side is inner.
302 : */
4760 tgl 303 GIC 4029 : if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
304 : innerrel->relids))
4760 tgl 305 UIC 0 : continue; /* no good for these input relations */
306 :
307 : /* OK, add to list */
4760 tgl 308 CBC 4029 : clause_list = lappend(clause_list, restrictinfo);
4760 tgl 309 ECB : }
310 :
311 : /*
2558 312 : * Now that we have the relevant equality join clauses, try to prove the
313 : * innerrel distinct.
4183 314 : */
2558 tgl 315 GIC 3996 : if (rel_is_distinct_for(root, innerrel, clause_list))
316 3972 : return true;
317 :
318 : /*
319 : * Some day it would be nice to check for other methods of establishing
4760 tgl 320 ECB : * distinctness.
321 : */
4760 tgl 322 GBC 24 : return false;
323 : }
324 :
4760 tgl 325 ECB :
326 : /*
327 : * Remove the target relid from the planner's data structures, having
328 : * determined that there is no need to include it in the query.
329 : *
330 : * We are not terribly thorough here. We only bother to update parts of
331 : * the planner's data structures that will actually be consulted later.
332 : */
333 : static void
69 tgl 334 GNC 3972 : remove_rel_from_query(PlannerInfo *root, int relid, int ojrelid,
335 : Relids joinrelids)
336 : {
4760 tgl 337 CBC 3972 : RelOptInfo *rel = find_base_rel(root, relid);
338 : List *joininfos;
339 : Index rti;
340 : ListCell *l;
341 :
342 : /*
4760 tgl 343 ECB : * Remove references to the rel from other baserels' attr_needed arrays.
344 : */
4760 tgl 345 GIC 25450 : for (rti = 1; rti < root->simple_rel_array_size; rti++)
4760 tgl 346 ECB : {
4760 tgl 347 GIC 21478 : RelOptInfo *otherrel = root->simple_rel_array[rti];
348 : int attroff;
349 :
350 : /* there may be empty slots corresponding to non-baserel RTEs */
351 21478 : if (otherrel == NULL)
352 10418 : continue;
353 :
4660 bruce 354 CBC 11060 : Assert(otherrel->relid == rti); /* sanity check on array */
355 :
4760 tgl 356 ECB : /* no point in processing target rel itself */
4760 tgl 357 GIC 11060 : if (otherrel == rel)
358 3972 : continue;
359 :
4760 tgl 360 CBC 7088 : for (attroff = otherrel->max_attr - otherrel->min_attr;
361 166373 : attroff >= 0;
4760 tgl 362 GIC 159285 : attroff--)
4760 tgl 363 ECB : {
4760 tgl 364 GIC 318570 : otherrel->attr_needed[attroff] =
365 159285 : bms_del_member(otherrel->attr_needed[attroff], relid);
69 tgl 366 GNC 159285 : otherrel->attr_needed[attroff] =
367 159285 : bms_del_member(otherrel->attr_needed[attroff], ojrelid);
4760 tgl 368 ECB : }
369 : }
370 :
371 : /*
372 : * Update all_baserels and related relid sets.
373 : */
69 tgl 374 GNC 3972 : root->all_baserels = bms_del_member(root->all_baserels, relid);
375 3972 : root->outer_join_rels = bms_del_member(root->outer_join_rels, ojrelid);
376 3972 : root->all_query_rels = bms_del_member(root->all_query_rels, relid);
377 3972 : root->all_query_rels = bms_del_member(root->all_query_rels, ojrelid);
378 :
4704 tgl 379 ECB : /*
380 : * Likewise remove references from SpecialJoinInfo data structures.
381 : *
382 : * This is relevant in case the outer join we're deleting is nested inside
4382 bruce 383 : * other outer joins: the upper joins' relid sets have to be adjusted. The
384 : * RHS of the target outer join will be made empty here, but that's OK
4660 385 : * since caller will delete that SpecialJoinInfo entirely.
4704 tgl 386 : */
4704 tgl 387 GIC 9493 : foreach(l, root->join_info_list)
388 : {
389 5521 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
390 :
391 5521 : sjinfo->min_lefthand = bms_del_member(sjinfo->min_lefthand, relid);
392 5521 : sjinfo->min_righthand = bms_del_member(sjinfo->min_righthand, relid);
4704 tgl 393 CBC 5521 : sjinfo->syn_lefthand = bms_del_member(sjinfo->syn_lefthand, relid);
394 5521 : sjinfo->syn_righthand = bms_del_member(sjinfo->syn_righthand, relid);
69 tgl 395 GNC 5521 : sjinfo->min_lefthand = bms_del_member(sjinfo->min_lefthand, ojrelid);
396 5521 : sjinfo->min_righthand = bms_del_member(sjinfo->min_righthand, ojrelid);
397 5521 : sjinfo->syn_lefthand = bms_del_member(sjinfo->syn_lefthand, ojrelid);
398 5521 : sjinfo->syn_righthand = bms_del_member(sjinfo->syn_righthand, ojrelid);
399 : /* relid cannot appear in these fields, but ojrelid can: */
400 5521 : sjinfo->commute_above_l = bms_del_member(sjinfo->commute_above_l, ojrelid);
401 5521 : sjinfo->commute_above_r = bms_del_member(sjinfo->commute_above_r, ojrelid);
402 5521 : sjinfo->commute_below = bms_del_member(sjinfo->commute_below, ojrelid);
4704 tgl 403 ECB : }
404 :
405 : /*
406 : * Likewise remove references from PlaceHolderVar data structures,
407 : * removing any no-longer-needed placeholders entirely.
408 : *
409 : * Removal is a bit trickier than it might seem: we can remove PHVs that
410 : * are used at the target rel and/or in the join qual, but not those that
411 : * are used at join partner rels or above the join. It's not that easy to
412 : * distinguish PHVs used at partner rels from those used in the join qual,
413 : * since they will both have ph_needed sets that are subsets of
2802 414 : * joinrelids. However, a PHV used at a partner rel could not have the
415 : * target rel in ph_eval_at, so we check that while deciding whether to
416 : * remove or just update the PHV. There is no corresponding test in
417 : * join_is_removable because it doesn't need to distinguish those cases.
4760 418 : */
1364 tgl 419 CBC 3993 : foreach(l, root->placeholder_list)
4760 tgl 420 ECB : {
4760 tgl 421 CBC 21 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
4760 tgl 422 ECB :
2802 tgl 423 CBC 21 : Assert(!bms_is_member(relid, phinfo->ph_lateral));
424 33 : if (bms_is_subset(phinfo->ph_needed, joinrelids) &&
425 12 : bms_is_member(relid, phinfo->ph_eval_at))
426 : {
1364 tgl 427 GIC 3 : root->placeholder_list = foreach_delete_current(root->placeholder_list,
1364 tgl 428 ECB : l);
235 tgl 429 GNC 3 : root->placeholder_array[phinfo->phid] = NULL;
430 : }
2803 tgl 431 ECB : else
432 : {
60 tgl 433 GNC 18 : PlaceHolderVar *phv = phinfo->ph_var;
434 :
2803 tgl 435 GIC 18 : phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid);
69 tgl 436 GNC 18 : phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, ojrelid);
60 437 18 : Assert(!bms_is_empty(phinfo->ph_eval_at)); /* checked previously */
2803 tgl 438 GIC 18 : phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid);
69 tgl 439 GNC 18 : phinfo->ph_needed = bms_del_member(phinfo->ph_needed, ojrelid);
440 : /* ph_needed might or might not become empty */
60 441 18 : phv->phrels = bms_del_member(phv->phrels, relid);
442 18 : phv->phrels = bms_del_member(phv->phrels, ojrelid);
443 18 : Assert(!bms_is_empty(phv->phrels));
444 18 : Assert(phv->phnullingrels == NULL); /* no need to adjust */
445 : }
446 : }
447 :
448 : /*
449 : * Remove any joinquals referencing the rel from the joininfo lists.
450 : *
451 : * In some cases, a joinqual has to be put back after deleting its
452 : * reference to the target rel. This can occur for pseudoconstant and
453 : * outerjoin-delayed quals, which can get marked as requiring the rel in
454 : * order to force them to be evaluated at or above the join. We can't
455 : * just discard them, though. Only quals that logically belonged to the
456 : * outer join being discarded should be removed from the query.
457 : *
4590 tgl 458 ECB : * We must make a copy of the rel's old joininfo list before starting the
459 : * loop, because otherwise remove_join_clause_from_rels would destroy the
460 : * list while we're scanning it.
461 : */
4590 tgl 462 CBC 3972 : joininfos = list_copy(rel->joininfo);
463 8068 : foreach(l, joininfos)
4590 tgl 464 ECB : {
4590 tgl 465 GIC 4096 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
4590 tgl 466 ECB :
4590 tgl 467 GIC 4096 : remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
4590 tgl 468 ECB :
469 : /*
470 : * If the qual lists ojrelid in its required_relids, it must have come
471 : * from above the outer join we're removing (so we need to keep it);
472 : * if it does not, then it didn't and we can discard it.
473 : */
69 tgl 474 GNC 4096 : if (bms_is_member(ojrelid, rinfo->required_relids))
475 : {
4590 tgl 476 ECB : /*
477 : * There might be references to relid or ojrelid in the
478 : * RestrictInfo, as a consequence of PHVs having ph_eval_at sets
479 : * that include those. We already checked above that any such PHV
480 : * is safe, so we can just drop those references.
481 : */
58 tgl 482 GNC 27 : remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
483 : /* Now throw it back into the joininfo lists */
4590 tgl 484 CBC 27 : distribute_restrictinfo_to_rels(root, rinfo);
4590 tgl 485 ECB : }
486 : }
487 :
488 : /*
489 : * There may be references to the rel in root->fkey_list, but if so,
490 : * match_foreign_keys_to_quals() will get rid of them.
491 : */
492 :
493 : /*
494 : * Finally, remove the rel from the baserel array to prevent it from being
495 : * referenced again. (We can't do this earlier because
496 : * remove_join_clause_from_rels will touch it.)
497 : */
55 tgl 498 GNC 3972 : root->simple_rel_array[relid] = NULL;
499 :
500 : /* And nuke the RelOptInfo, just in case there's another access path */
501 3972 : pfree(rel);
4760 502 3972 : }
503 :
504 : /*
505 : * Remove any references to relid or ojrelid from the RestrictInfo.
506 : *
507 : * We only bother to clean out bits in clause_relids and required_relids,
508 : * not nullingrel bits in contained Vars and PHVs. (This might have to be
509 : * improved sometime.) However, if the RestrictInfo contains an OR clause
510 : * we have to also clean up the sub-clauses.
511 : */
512 : static void
58 513 33 : remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid)
514 : {
515 : /*
516 : * The clause_relids probably aren't shared with anything else, but let's
517 : * copy them just to be sure.
518 : */
519 33 : rinfo->clause_relids = bms_copy(rinfo->clause_relids);
520 33 : rinfo->clause_relids = bms_del_member(rinfo->clause_relids, relid);
521 33 : rinfo->clause_relids = bms_del_member(rinfo->clause_relids, ojrelid);
522 : /* Likewise for required_relids */
523 33 : rinfo->required_relids = bms_copy(rinfo->required_relids);
524 33 : rinfo->required_relids = bms_del_member(rinfo->required_relids, relid);
525 33 : rinfo->required_relids = bms_del_member(rinfo->required_relids, ojrelid);
526 :
527 : /* If it's an OR, recurse to clean up sub-clauses */
528 33 : if (restriction_is_or_clause(rinfo))
529 : {
530 : ListCell *lc;
531 :
532 3 : Assert(is_orclause(rinfo->orclause));
533 9 : foreach(lc, ((BoolExpr *) rinfo->orclause)->args)
534 : {
535 6 : Node *orarg = (Node *) lfirst(lc);
536 :
537 : /* OR arguments should be ANDs or sub-RestrictInfos */
538 6 : if (is_andclause(orarg))
539 : {
58 tgl 540 UNC 0 : List *andargs = ((BoolExpr *) orarg)->args;
541 : ListCell *lc2;
542 :
543 0 : foreach(lc2, andargs)
544 : {
545 0 : RestrictInfo *rinfo2 = lfirst_node(RestrictInfo, lc2);
546 :
547 0 : remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
548 : }
549 : }
550 : else
551 : {
58 tgl 552 GNC 6 : RestrictInfo *rinfo2 = castNode(RestrictInfo, orarg);
553 :
554 6 : remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
555 : }
556 : }
557 : }
58 tgl 558 GIC 33 : }
559 :
560 : /*
561 : * Remove any occurrences of the target relid from a joinlist structure.
562 : *
563 : * It's easiest to build a whole new list structure, so we handle it that
564 : * way. Efficiency is not a big deal here.
565 : *
566 : * *nremoved is incremented by the number of occurrences removed (there
567 : * should be exactly one, but the caller checks that).
568 : */
569 : static List *
4760 tgl 570 CBC 4074 : remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
4760 tgl 571 ECB : {
4760 tgl 572 GIC 4074 : List *result = NIL;
4760 tgl 573 ECB : ListCell *jl;
574 :
4760 tgl 575 CBC 15236 : foreach(jl, joinlist)
576 : {
4760 tgl 577 GIC 11162 : Node *jlnode = (Node *) lfirst(jl);
578 :
579 11162 : if (IsA(jlnode, RangeTblRef))
580 : {
581 11060 : int varno = ((RangeTblRef *) jlnode)->rtindex;
4760 tgl 582 ECB :
4760 tgl 583 GIC 11060 : if (varno == relid)
584 3972 : (*nremoved)++;
585 : else
586 7088 : result = lappend(result, jlnode);
587 : }
588 102 : else if (IsA(jlnode, List))
589 : {
4760 tgl 590 ECB : /* Recurse to handle subproblem */
591 : List *sublist;
592 :
4760 tgl 593 GIC 102 : sublist = remove_rel_from_joinlist((List *) jlnode,
594 : relid, nremoved);
595 : /* Avoid including empty sub-lists in the result */
596 102 : if (sublist)
597 102 : result = lappend(result, sublist);
598 : }
599 : else
600 : {
4760 tgl 601 UIC 0 : elog(ERROR, "unrecognized joinlist node type: %d",
602 : (int) nodeTag(jlnode));
603 : }
604 : }
605 :
4760 tgl 606 CBC 4074 : return result;
607 : }
608 :
3190 tgl 609 ECB :
2169 610 : /*
611 : * reduce_unique_semijoins
612 : * Check for semijoins that can be simplified to plain inner joins
613 : * because the inner relation is provably unique for the join clauses.
614 : *
615 : * Ideally this would happen during reduce_outer_joins, but we don't have
616 : * enough information at that point.
617 : *
618 : * To perform the strength reduction when applicable, we need only delete
619 : * the semijoin's SpecialJoinInfo from root->join_info_list. (We don't
620 : * bother fixing the join type attributed to it in the query jointree,
621 : * since that won't be consulted again.)
622 : */
623 : void
2169 tgl 624 GIC 128144 : reduce_unique_semijoins(PlannerInfo *root)
625 : {
626 : ListCell *lc;
2169 tgl 627 ECB :
628 : /*
1364 629 : * Scan the join_info_list to find semijoins.
630 : */
1364 tgl 631 CBC 144358 : foreach(lc, root->join_info_list)
2169 tgl 632 ECB : {
2169 tgl 633 CBC 16214 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
634 : int innerrelid;
635 : RelOptInfo *innerrel;
2169 tgl 636 ECB : Relids joinrelids;
637 : List *restrictlist;
638 :
639 : /*
640 : * Must be a semijoin to a single baserel, else we aren't going to be
641 : * able to do anything with it.
642 : */
69 tgl 643 GNC 16214 : if (sjinfo->jointype != JOIN_SEMI)
2169 tgl 644 GIC 16165 : continue;
2169 tgl 645 EUB :
2169 tgl 646 GIC 651 : if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
647 51 : continue;
2169 tgl 648 EUB :
2169 tgl 649 GIC 600 : innerrel = find_base_rel(root, innerrelid);
2169 tgl 650 EUB :
651 : /*
652 : * Before we trouble to run generate_join_implied_equalities, make a
653 : * quick check to eliminate cases in which we will surely be unable to
654 : * prove uniqueness of the innerrel.
655 : */
2169 tgl 656 GIC 600 : if (!rel_supports_distinctness(root, innerrel))
2169 tgl 657 CBC 313 : continue;
658 :
2169 tgl 659 ECB : /* Compute the relid set for the join we are considering */
2169 tgl 660 GIC 287 : joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
69 tgl 661 GNC 287 : Assert(sjinfo->ojrelid == 0); /* SEMI joins don't have RT indexes */
662 :
663 : /*
2169 tgl 664 ECB : * Since we're only considering a single-rel RHS, any join clauses it
665 : * has must be clauses linking it to the semijoin's min_lefthand. We
666 : * can also consider EC-derived join clauses.
667 : */
668 : restrictlist =
2169 tgl 669 GIC 287 : list_concat(generate_join_implied_equalities(root,
670 : joinrelids,
671 : sjinfo->min_lefthand,
672 : innerrel,
673 : 0),
674 287 : innerrel->joininfo);
675 :
676 : /* Test whether the innerrel is unique for those clauses. */
1815 tgl 677 CBC 287 : if (!innerrel_is_unique(root,
678 : joinrelids, sjinfo->min_lefthand, innerrel,
2169 tgl 679 ECB : JOIN_SEMI, restrictlist, true))
2169 tgl 680 GIC 238 : continue;
681 :
2169 tgl 682 ECB : /* OK, remove the SpecialJoinInfo from the list. */
1364 tgl 683 GIC 49 : root->join_info_list = foreach_delete_current(root->join_info_list, lc);
2169 tgl 684 ECB : }
2169 tgl 685 GIC 128144 : }
2169 tgl 686 ECB :
687 :
2558 688 : /*
689 : * rel_supports_distinctness
690 : * Could the relation possibly be proven distinct on some set of columns?
691 : *
692 : * This is effectively a pre-checking function for rel_is_distinct_for().
2062 peter_e 693 : * It must return true if rel_is_distinct_for() could possibly return true
694 : * with this rel, but it should not expend a lot of cycles. The idea is
2558 tgl 695 : * that callers can avoid doing possibly-expensive processing to compute
696 : * rel_is_distinct_for()'s argument lists if the call could not possibly
697 : * succeed.
698 : */
699 : static bool
2558 tgl 700 CBC 204739 : rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
701 : {
702 : /* We only know about baserels ... */
703 204739 : if (rel->reloptkind != RELOPT_BASEREL)
704 67672 : return false;
2558 tgl 705 GIC 137067 : if (rel->rtekind == RTE_RELATION)
706 : {
707 : /*
2558 tgl 708 EUB : * For a plain relation, we only know how to prove uniqueness by
709 : * reference to unique indexes. Make sure there's at least one
710 : * suitable unique index. It must be immediately enforced, and if
711 : * it's a partial index, it must match the query. (Keep these
712 : * conditions in sync with relation_has_unique_index_for!)
2558 tgl 713 ECB : */
714 : ListCell *lc;
715 :
2558 tgl 716 GIC 181506 : foreach(lc, rel->indexlist)
717 : {
718 164995 : IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
719 :
720 164995 : if (ind->unique && ind->immediate &&
721 110672 : (ind->indpred == NIL || ind->predOK))
722 110672 : return true;
723 : }
724 : }
725 9884 : else if (rel->rtekind == RTE_SUBQUERY)
726 : {
727 1578 : Query *subquery = root->simple_rte_array[rel->relid]->subquery;
728 :
729 : /* Check if the subquery has any qualities that support distinctness */
730 1578 : if (query_supports_distinctness(subquery))
2558 tgl 731 CBC 976 : return true;
732 : }
733 : /* We have no proof rules for any other rtekinds. */
2558 tgl 734 GIC 25419 : return false;
735 : }
736 :
737 : /*
2558 tgl 738 ECB : * rel_is_distinct_for
739 : * Does the relation return only distinct rows according to clause_list?
740 : *
741 : * clause_list is a list of join restriction clauses involving this rel and
742 : * some other one. Return true if no two rows emitted by this rel could
743 : * possibly join to the same row of the other rel.
744 : *
745 : * The caller must have already determined that each condition is a
746 : * mergejoinable equality with an expression in this relation on one side, and
747 : * an expression not involving this relation on the other. The transient
748 : * outer_is_left flag is used to identify which side references this relation:
749 : * left side if outer_is_left is false, right side if it is true.
750 : *
751 : * Note that the passed-in clause_list may be destructively modified! This
752 : * is OK for current uses, because the clause_list is built by the caller for
753 : * the sole purpose of passing to this function.
754 : */
755 : static bool
2558 tgl 756 CBC 70265 : rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
757 : {
758 : /*
759 : * We could skip a couple of tests here if we assume all callers checked
760 : * rel_supports_distinctness first, but it doesn't seem worth taking any
761 : * risk for.
762 : */
763 70265 : if (rel->reloptkind != RELOPT_BASEREL)
2558 tgl 764 LBC 0 : return false;
2558 tgl 765 GIC 70265 : if (rel->rtekind == RTE_RELATION)
766 : {
2558 tgl 767 ECB : /*
768 : * Examine the indexes to see if we have a matching unique index.
769 : * relation_has_unique_index_for automatically adds any usable
770 : * restriction clauses for the rel, so we needn't do that here.
771 : */
2558 tgl 772 GIC 69619 : if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL))
773 43766 : return true;
774 : }
775 646 : else if (rel->rtekind == RTE_SUBQUERY)
2558 tgl 776 ECB : {
2558 tgl 777 GIC 646 : Index relid = rel->relid;
778 646 : Query *subquery = root->simple_rte_array[relid]->subquery;
779 646 : List *colnos = NIL;
780 646 : List *opids = NIL;
2558 tgl 781 ECB : ListCell *l;
782 :
783 : /*
784 : * Build the argument lists for query_is_distinct_for: a list of
785 : * output column numbers that the query needs to be distinct over, and
786 : * a list of equality operators that the output columns need to be
787 : * distinct according to.
788 : *
789 : * (XXX we are not considering restriction clauses attached to the
790 : * subquery; is that worth doing?)
791 : */
2558 tgl 792 CBC 1277 : foreach(l, clause_list)
793 : {
2190 tgl 794 GIC 631 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
795 : Oid op;
796 : Var *var;
797 :
798 : /*
799 : * Get the equality operator we need uniqueness according to.
800 : * (This might be a cross-type operator and thus not exactly the
801 : * same operator the subquery would consider; that's all right
802 : * since query_is_distinct_for can resolve such cases.) The
803 : * caller's mergejoinability test should have selected only
804 : * OpExprs.
805 : */
2238 peter_e 806 631 : op = castNode(OpExpr, rinfo->clause)->opno;
2558 tgl 807 ECB :
808 : /* caller identified the inner side for us */
2558 tgl 809 GIC 631 : if (rinfo->outer_is_left)
2558 tgl 810 CBC 474 : var = (Var *) get_rightop(rinfo->clause);
2558 tgl 811 ECB : else
2558 tgl 812 CBC 157 : var = (Var *) get_leftop(rinfo->clause);
813 :
814 : /*
815 : * We may ignore any RelabelType node above the operand. (There
816 : * won't be more than one, since eval_const_expressions() has been
817 : * applied already.)
818 : */
2030 tgl 819 GIC 631 : if (var && IsA(var, RelabelType))
820 257 : var = (Var *) ((RelabelType *) var)->arg;
821 :
822 : /*
2558 tgl 823 ECB : * If inner side isn't a Var referencing a subquery output column,
824 : * this clause doesn't help us.
825 : */
2558 tgl 826 GIC 631 : if (!var || !IsA(var, Var) ||
2558 tgl 827 CBC 625 : var->varno != relid || var->varlevelsup != 0)
828 6 : continue;
2558 tgl 829 ECB :
2558 tgl 830 GIC 625 : colnos = lappend_int(colnos, var->varattno);
831 625 : opids = lappend_oid(opids, op);
2558 tgl 832 ECB : }
833 :
2558 tgl 834 CBC 646 : if (query_is_distinct_for(subquery, colnos, opids))
2558 tgl 835 GIC 91 : return true;
836 : }
2558 tgl 837 CBC 26408 : return false;
2558 tgl 838 ECB : }
839 :
840 :
3190 841 : /*
842 : * query_supports_distinctness - could the query possibly be proven distinct
843 : * on some set of output columns?
844 : *
845 : * This is effectively a pre-checking function for query_is_distinct_for().
846 : * It must return true if query_is_distinct_for() could possibly return true
847 : * with this query, but it should not expend a lot of cycles. The idea is
848 : * that callers can avoid doing possibly-expensive processing to compute
849 : * query_is_distinct_for()'s argument lists if the call could not possibly
850 : * succeed.
851 : */
852 : bool
3190 tgl 853 GIC 1834 : query_supports_distinctness(Query *query)
854 : {
855 : /* SRFs break distinctness except with DISTINCT, see below */
1961 856 1834 : if (query->hasTargetSRFs && query->distinctClause == NIL)
2399 857 416 : return false;
858 :
859 : /* check for features we can prove distinctness with */
3190 860 1418 : if (query->distinctClause != NIL ||
861 1346 : query->groupClause != NIL ||
2885 andres 862 1261 : query->groupingSets != NIL ||
3190 tgl 863 CBC 1261 : query->hasAggs ||
3190 tgl 864 GIC 1182 : query->havingQual ||
865 1182 : query->setOperations)
866 1214 : return true;
867 :
868 204 : return false;
869 : }
3190 tgl 870 ECB :
3190 tgl 871 EUB : /*
3190 tgl 872 ECB : * query_is_distinct_for - does query never return duplicates of the
873 : * specified columns?
874 : *
875 : * query is a not-yet-planned subquery (in current usage, it's always from
876 : * a subquery RTE, which the planner avoids scribbling on).
877 : *
878 : * colnos is an integer list of output column numbers (resno's). We are
879 : * interested in whether rows consisting of just these columns are certain
880 : * to be distinct. "Distinctness" is defined according to whether the
881 : * corresponding upper-level equality operators listed in opids would think
882 : * the values are distinct. (Note: the opids entries could be cross-type
883 : * operators, and thus not exactly the equality operators that the subquery
884 : * would use itself. We use equality_ops_are_compatible() to check
885 : * compatibility. That looks at btree or hash opfamily membership, and so
886 : * should give trustworthy answers for all operators that we might need
887 : * to deal with here.)
888 : */
889 : bool
3190 tgl 890 GIC 677 : query_is_distinct_for(Query *query, List *colnos, List *opids)
891 : {
892 : ListCell *l;
893 : Oid opid;
894 :
895 677 : Assert(list_length(colnos) == list_length(opids));
896 :
897 : /*
898 : * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
3190 tgl 899 ECB : * columns in the DISTINCT clause appear in colnos and operator semantics
900 : * match. This is true even if there are SRFs in the DISTINCT columns or
1961 901 : * elsewhere in the tlist.
902 : */
3190 tgl 903 GIC 677 : if (query->distinctClause)
904 : {
905 75 : foreach(l, query->distinctClause)
906 : {
907 60 : SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
908 60 : TargetEntry *tle = get_sortgroupclause_tle(sgc,
909 : query->targetList);
910 :
911 60 : opid = distinct_col_search(tle->resno, colnos, opids);
912 60 : if (!OidIsValid(opid) ||
3190 tgl 913 CBC 24 : !equality_ops_are_compatible(opid, sgc->eqop))
914 : break; /* exit early if no match */
915 : }
916 51 : if (l == NULL) /* had matches for all? */
917 15 : return true;
918 : }
3190 tgl 919 ECB :
920 : /*
921 : * Otherwise, a set-returning function in the query's targetlist can
922 : * result in returning duplicate rows, despite any grouping that might
923 : * occur before tlist evaluation. (If all tlist SRFs are within GROUP BY
924 : * columns, it would be safe because they'd be expanded before grouping.
925 : * But it doesn't currently seem worth the effort to check for that.)
1961 926 : */
1961 tgl 927 CBC 662 : if (query->hasTargetSRFs)
1961 tgl 928 UIC 0 : return false;
929 :
930 : /*
931 : * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
932 : * the grouped columns appear in colnos and operator semantics match.
3190 tgl 933 ECB : */
2885 andres 934 CBC 662 : if (query->groupClause && !query->groupingSets)
3190 tgl 935 ECB : {
3190 tgl 936 GIC 105 : foreach(l, query->groupClause)
3190 tgl 937 ECB : {
3190 tgl 938 CBC 76 : SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
3190 tgl 939 GIC 76 : TargetEntry *tle = get_sortgroupclause_tle(sgc,
940 : query->targetList);
3190 tgl 941 ECB :
3190 tgl 942 CBC 76 : opid = distinct_col_search(tle->resno, colnos, opids);
3190 tgl 943 GIC 76 : if (!OidIsValid(opid) ||
3190 tgl 944 CBC 50 : !equality_ops_are_compatible(opid, sgc->eqop))
945 : break; /* exit early if no match */
946 : }
3190 tgl 947 GIC 55 : if (l == NULL) /* had matches for all? */
948 29 : return true;
949 : }
2885 andres 950 607 : else if (query->groupingSets)
951 : {
952 : /*
953 : * If we have grouping sets with expressions, we probably don't have
954 : * uniqueness and analysis would be hard. Punt.
955 : */
2885 andres 956 UIC 0 : if (query->groupClause)
957 0 : return false;
958 :
959 : /*
2878 bruce 960 ECB : * If we have no groupClause (therefore no grouping expressions), we
961 : * might have one or many empty grouping sets. If there's just one,
962 : * then we're returning only one row and are certainly unique. But
963 : * otherwise, we know we're certainly not unique.
2885 andres 964 : */
2885 andres 965 UIC 0 : if (list_length(query->groupingSets) == 1 &&
2878 bruce 966 0 : ((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY)
2885 andres 967 LBC 0 : return true;
2885 andres 968 ECB : else
2885 andres 969 LBC 0 : return false;
2885 andres 970 ECB : }
3190 tgl 971 : else
972 : {
973 : /*
974 : * If we have no GROUP BY, but do have aggregates or HAVING, then the
975 : * result is at most one row so it's surely unique, for any operators.
976 : */
3190 tgl 977 GIC 607 : if (query->hasAggs || query->havingQual)
978 41 : return true;
979 : }
980 :
981 : /*
982 : * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
983 : * except with ALL.
984 : */
985 592 : if (query->setOperations)
986 : {
2238 peter_e 987 530 : SetOperationStmt *topop = castNode(SetOperationStmt, query->setOperations);
988 :
3190 tgl 989 530 : Assert(topop->op != SETOP_NONE);
990 :
991 530 : if (!topop->all)
992 : {
993 : ListCell *lg;
994 :
995 : /* We're good if all the nonjunk output columns are in colnos */
996 36 : lg = list_head(topop->groupClauses);
3190 tgl 997 CBC 45 : foreach(l, query->targetList)
998 : {
3190 tgl 999 GIC 39 : TargetEntry *tle = (TargetEntry *) lfirst(l);
1000 : SortGroupClause *sgc;
1001 :
3190 tgl 1002 CBC 39 : if (tle->resjunk)
3190 tgl 1003 UIC 0 : continue; /* ignore resjunk columns */
1004 :
1005 : /* non-resjunk columns should have grouping clauses */
3190 tgl 1006 GIC 39 : Assert(lg != NULL);
1007 39 : sgc = (SortGroupClause *) lfirst(lg);
1364 1008 39 : lg = lnext(topop->groupClauses, lg);
1009 :
3190 tgl 1010 CBC 39 : opid = distinct_col_search(tle->resno, colnos, opids);
3190 tgl 1011 GIC 39 : if (!OidIsValid(opid) ||
3190 tgl 1012 CBC 9 : !equality_ops_are_compatible(opid, sgc->eqop))
1013 : break; /* exit early if no match */
3190 tgl 1014 ECB : }
3190 tgl 1015 CBC 36 : if (l == NULL) /* had matches for all? */
3190 tgl 1016 GIC 6 : return true;
1017 : }
3190 tgl 1018 ECB : }
1019 :
1020 : /*
1021 : * XXX Are there any other cases in which we can easily see the result
1022 : * must be distinct?
1023 : *
1024 : * If you do add more smarts to this function, be sure to update
1025 : * query_supports_distinctness() to match.
1026 : */
1027 :
3190 tgl 1028 GIC 586 : return false;
1029 : }
1030 :
1031 : /*
1032 : * distinct_col_search - subroutine for query_is_distinct_for
1033 : *
3190 tgl 1034 ECB : * If colno is in colnos, return the corresponding element of opids,
3190 tgl 1035 EUB : * else return InvalidOid. (Ordinarily colnos would not contain duplicates,
1036 : * but if it does, we arbitrarily select the first match.)
1037 : */
1038 : static Oid
3190 tgl 1039 GIC 175 : distinct_col_search(int colno, List *colnos, List *opids)
1040 : {
3190 tgl 1041 ECB : ListCell *lc1,
1042 : *lc2;
1043 :
3190 tgl 1044 GIC 281 : forboth(lc1, colnos, lc2, opids)
3190 tgl 1045 ECB : {
3190 tgl 1046 CBC 189 : if (colno == lfirst_int(lc1))
3190 tgl 1047 GIC 83 : return lfirst_oid(lc2);
1048 : }
3190 tgl 1049 CBC 92 : return InvalidOid;
3190 tgl 1050 ECB : }
2193 1051 :
1052 :
1053 : /*
1054 : * innerrel_is_unique
1055 : * Check if the innerrel provably contains at most one tuple matching any
1056 : * tuple from the outerrel, based on join clauses in the 'restrictlist'.
1057 : *
1058 : * We need an actual RelOptInfo for the innerrel, but it's sufficient to
1059 : * identify the outerrel by its Relids. This asymmetry supports use of this
1060 : * function before joinrels have been built. (The caller is expected to
1061 : * also supply the joinrelids, just to save recalculating that.)
1062 : *
2193 tgl 1063 EUB : * The proof must be made based only on clauses that will be "joinquals"
1064 : * rather than "otherquals" at execution. For an inner join there's no
1065 : * difference; but if the join is outer, we must ignore pushed-down quals,
1066 : * as those will become "otherquals". Note that this means the answer might
1067 : * vary depending on whether IS_OUTER_JOIN(jointype); since we cache the
1068 : * answer without regard to that, callers must take care not to call this
1069 : * with jointypes that would be classified differently by IS_OUTER_JOIN().
1070 : *
1071 : * The actual proof is undertaken by is_innerrel_unique_for(); this function
1072 : * is a frontend that is mainly concerned with caching the answers.
2169 1073 : * In particular, the force_cache argument allows overriding the internal
1074 : * heuristic about whether to cache negative answers; it should be "true"
1075 : * if making an inquiry that is not part of the normal bottom-up join search
1076 : * sequence.
1077 : */
1078 : bool
2193 tgl 1079 GIC 226958 : innerrel_is_unique(PlannerInfo *root,
1080 : Relids joinrelids,
1081 : Relids outerrelids,
1082 : RelOptInfo *innerrel,
1083 : JoinType jointype,
2169 tgl 1084 ECB : List *restrictlist,
1085 : bool force_cache)
1086 : {
1087 : MemoryContext old_context;
1088 : ListCell *lc;
1089 :
1090 : /* Certainly can't prove uniqueness when there are no joinclauses */
2193 tgl 1091 GIC 226958 : if (restrictlist == NIL)
2193 tgl 1092 CBC 38794 : return false;
1093 :
2193 tgl 1094 ECB : /*
1095 : * Make a quick check to eliminate cases in which we will surely be unable
1096 : * to prove uniqueness of the innerrel.
1097 : */
2193 tgl 1098 CBC 188164 : if (!rel_supports_distinctness(root, innerrel))
2193 tgl 1099 GIC 91721 : return false;
1100 :
1101 : /*
1102 : * Query the cache to see if we've managed to prove that innerrel is
2193 tgl 1103 ECB : * unique for any subset of this outerrel. We don't need an exact match,
1104 : * as extra outerrels can't make the innerrel any less unique (or more
1105 : * formally, the restrictlist for a join to a superset outerrel must be a
1106 : * superset of the conditions we successfully used before).
1107 : */
2193 tgl 1108 GIC 105534 : foreach(lc, innerrel->unique_for_rels)
2193 tgl 1109 ECB : {
2193 tgl 1110 GBC 39265 : Relids unique_for_rels = (Relids) lfirst(lc);
1111 :
2169 tgl 1112 GIC 39265 : if (bms_is_subset(unique_for_rels, outerrelids))
2193 tgl 1113 CBC 30174 : return true; /* Success! */
2193 tgl 1114 ECB : }
1115 :
1116 : /*
1117 : * Conversely, we may have already determined that this outerrel, or some
1118 : * superset thereof, cannot prove this innerrel to be unique.
1119 : */
2193 tgl 1120 GIC 66269 : foreach(lc, innerrel->non_unique_for_rels)
1121 : {
2193 tgl 1122 LBC 0 : Relids unique_for_rels = (Relids) lfirst(lc);
2193 tgl 1123 ECB :
2169 tgl 1124 UIC 0 : if (bms_is_subset(outerrelids, unique_for_rels))
2193 1125 0 : return false;
1126 : }
1127 :
1128 : /* No cached information, so try to make the proof. */
1815 tgl 1129 GIC 66269 : if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
1130 : jointype, restrictlist))
1131 : {
1132 : /*
1133 : * Cache the positive result for future probes, being sure to keep it
1134 : * in the planner_cxt even if we are working in GEQO.
2193 tgl 1135 ECB : *
1136 : * Note: one might consider trying to isolate the minimal subset of
1137 : * the outerrels that proved the innerrel unique. But it's not worth
1138 : * the trouble, because the planner builds up joinrels incrementally
1139 : * and so we'll see the minimally sufficient outerrels before any
1140 : * supersets of them anyway.
1141 : */
2193 tgl 1142 GIC 39885 : old_context = MemoryContextSwitchTo(root->planner_cxt);
1143 39885 : innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
2169 1144 39885 : bms_copy(outerrelids));
2193 1145 39885 : MemoryContextSwitchTo(old_context);
2193 tgl 1146 ECB :
2193 tgl 1147 GIC 39885 : return true; /* Success! */
1148 : }
1149 : else
1150 : {
2193 tgl 1151 ECB : /*
1152 : * None of the join conditions for outerrel proved innerrel unique, so
1153 : * we can safely reject this outerrel or any subset of it in future
1154 : * checks.
1155 : *
1156 : * However, in normal planning mode, caching this knowledge is totally
1157 : * pointless; it won't be queried again, because we build up joinrels
1158 : * from smaller to larger. It is useful in GEQO mode, where the
1159 : * knowledge can be carried across successive planning attempts; and
1160 : * it's likely to be useful when using join-search plugins, too. Hence
1161 : * cache when join_search_private is non-NULL. (Yeah, that's a hack,
1162 : * but it seems reasonable.)
1163 : *
1164 : * Also, allow callers to override that heuristic and force caching;
1165 : * that's useful for reduce_unique_semijoins, which calls here before
1166 : * the normal join search starts.
1167 : */
2169 tgl 1168 GIC 26384 : if (force_cache || root->join_search_private)
1169 : {
2193 1170 238 : old_context = MemoryContextSwitchTo(root->planner_cxt);
1171 238 : innerrel->non_unique_for_rels =
1172 238 : lappend(innerrel->non_unique_for_rels,
2169 1173 238 : bms_copy(outerrelids));
2193 1174 238 : MemoryContextSwitchTo(old_context);
1175 : }
1176 :
1177 26384 : return false;
1178 : }
1179 : }
1180 :
1181 : /*
1182 : * is_innerrel_unique_for
1183 : * Check if the innerrel provably contains at most one tuple matching any
1184 : * tuple from the outerrel, based on join clauses in the 'restrictlist'.
1185 : */
2193 tgl 1186 ECB : static bool
2193 tgl 1187 GIC 66269 : is_innerrel_unique_for(PlannerInfo *root,
1188 : Relids joinrelids,
1189 : Relids outerrelids,
1190 : RelOptInfo *innerrel,
1191 : JoinType jointype,
1192 : List *restrictlist)
1193 : {
1194 66269 : List *clause_list = NIL;
1195 : ListCell *lc;
1196 :
1197 : /*
2193 tgl 1198 ECB : * Search for mergejoinable clauses that constrain the inner rel against
1199 : * the outer rel. If an operator is mergejoinable then it behaves like
1200 : * equality for some btree opclass, so it's what we want. The
1201 : * mergejoinability test also eliminates clauses containing volatile
1202 : * functions, which we couldn't depend on.
1203 : */
2193 tgl 1204 GIC 144195 : foreach(lc, restrictlist)
2193 tgl 1205 ECB : {
2193 tgl 1206 CBC 77926 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
1207 :
1208 : /*
1209 : * As noted above, if it's a pushed-down clause and we're at an outer
1210 : * join, we can't use it.
1211 : */
1815 tgl 1212 GIC 77926 : if (IS_OUTER_JOIN(jointype) &&
1213 37470 : RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
2193 1214 3641 : continue;
2193 tgl 1215 ECB :
1216 : /* Ignore if it's not a mergejoinable clause */
2193 tgl 1217 CBC 74285 : if (!restrictinfo->can_join ||
2193 tgl 1218 GIC 71318 : restrictinfo->mergeopfamilies == NIL)
2193 tgl 1219 CBC 3287 : continue; /* not mergejoinable */
2193 tgl 1220 ECB :
1221 : /*
1222 : * Check if clause has the form "outer op inner" or "inner op outer",
1223 : * and if so mark which side is inner.
1224 : */
2169 tgl 1225 GIC 70998 : if (!clause_sides_match_join(restrictinfo, outerrelids,
1226 : innerrel->relids))
2193 tgl 1227 LBC 0 : continue; /* no good for these input relations */
1228 :
2193 tgl 1229 EUB : /* OK, add to list */
2193 tgl 1230 GIC 70998 : clause_list = lappend(clause_list, restrictinfo);
2193 tgl 1231 EUB : }
1232 :
1233 : /* Let rel_is_distinct_for() do the hard work */
2193 tgl 1234 GIC 66269 : return rel_is_distinct_for(root, innerrel, clause_list);
1235 : }
|