Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * analyzejoins.c
4 : : * Routines for simplifying joins after initial query analysis
5 : : *
6 : : * While we do a great deal of join simplification in prep/prepjointree.c,
7 : : * certain optimizations cannot be performed at that stage for lack of
8 : : * detailed information about the query. The routines here are invoked
9 : : * after initsplan.c has done its work, and can do additional join removal
10 : : * and simplification steps based on the information extracted. The penalty
11 : : * is that we have to work harder to clean up after ourselves when we modify
12 : : * the query, since the derived data structures have to be updated too.
13 : : *
14 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
15 : : * Portions Copyright (c) 1994, Regents of the University of California
16 : : *
17 : : *
18 : : * IDENTIFICATION
19 : : * src/backend/optimizer/plan/analyzejoins.c
20 : : *
21 : : *-------------------------------------------------------------------------
22 : : */
23 : : #include "postgres.h"
24 : :
25 : : #include "catalog/pg_class.h"
26 : : #include "nodes/nodeFuncs.h"
27 : : #include "optimizer/joininfo.h"
28 : : #include "optimizer/optimizer.h"
29 : : #include "optimizer/pathnode.h"
30 : : #include "optimizer/paths.h"
31 : : #include "optimizer/planmain.h"
32 : : #include "optimizer/restrictinfo.h"
33 : : #include "utils/lsyscache.h"
34 : :
35 : : /*
36 : : * The struct containing self-join candidate. Used to find duplicate reloids.
37 : : */
38 : : typedef struct
39 : : {
40 : : int relid;
41 : : Oid reloid;
42 : : } SelfJoinCandidate;
43 : :
44 : : bool enable_self_join_removal;
45 : :
46 : : /* local functions */
47 : : static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
48 : :
49 : : static void remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
50 : : SpecialJoinInfo *sjinfo);
51 : : static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
52 : : int relid, int ojrelid);
53 : : static void remove_rel_from_eclass(EquivalenceClass *ec,
54 : : int relid, int ojrelid);
55 : : static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
56 : : static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
57 : : static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
58 : : List *clause_list, List **extra_clauses);
59 : : static Oid distinct_col_search(int colno, List *colnos, List *opids);
60 : : static bool is_innerrel_unique_for(PlannerInfo *root,
61 : : Relids joinrelids,
62 : : Relids outerrelids,
63 : : RelOptInfo *innerrel,
64 : : JoinType jointype,
65 : : List *restrictlist,
66 : : List **extra_clauses);
67 : : static void replace_varno(Node *node, int from, int to);
68 : : static Bitmapset *replace_relid(Relids relids, int oldId, int newId);
69 : : static int self_join_candidates_cmp(const void *a, const void *b);
70 : :
71 : :
72 : : /*
73 : : * remove_useless_joins
74 : : * Check for relations that don't actually need to be joined at all,
75 : : * and remove them from the query.
76 : : *
77 : : * We are passed the current joinlist and return the updated list. Other
78 : : * data structures that have to be updated are accessible via "root".
79 : : */
80 : : List *
5131 tgl@sss.pgh.pa.us 81 :CBC 139537 : remove_useless_joins(PlannerInfo *root, List *joinlist)
82 : : {
83 : : ListCell *lc;
84 : :
85 : : /*
86 : : * We are only interested in relations that are left-joined to, so we can
87 : : * scan the join_info_list to find them easily.
88 : : */
89 : 144198 : restart:
90 [ + + + + : 163428 : foreach(lc, root->join_info_list)
+ + ]
91 : : {
92 : 23891 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
93 : : int innerrelid;
94 : : int nremoved;
95 : :
96 : : /* Skip if not removable */
97 [ + + ]: 23891 : if (!join_is_removable(root, sjinfo))
98 : 19230 : continue;
99 : :
100 : : /*
101 : : * Currently, join_is_removable can only succeed when the sjinfo's
102 : : * righthand is a single baserel. Remove that rel from the query and
103 : : * joinlist.
104 : : */
105 : 4661 : innerrelid = bms_singleton_member(sjinfo->min_righthand);
106 : :
172 akorotkov@postgresql 107 :GNC 4661 : remove_leftjoinrel_from_query(root, innerrelid, sjinfo);
108 : :
109 : : /* We verify that exactly one reference gets removed from joinlist */
5131 tgl@sss.pgh.pa.us 110 :CBC 4661 : nremoved = 0;
111 : 4661 : joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
112 [ - + ]: 4661 : if (nremoved != 1)
5131 tgl@sss.pgh.pa.us 113 [ # # ]:UBC 0 : elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
114 : :
115 : : /*
116 : : * We can delete this SpecialJoinInfo from the list too, since it's no
117 : : * longer of interest. (Since we'll restart the foreach loop
118 : : * immediately, we don't bother with foreach_delete_current.)
119 : : */
1735 tgl@sss.pgh.pa.us 120 :CBC 4661 : root->join_info_list = list_delete_cell(root->join_info_list, lc);
121 : :
122 : : /*
123 : : * Restart the scan. This is necessary to ensure we find all
124 : : * removable joins independently of ordering of the join_info_list
125 : : * (note that removal of attr_needed bits may make a join appear
126 : : * removable that did not before).
127 : : */
5131 128 : 4661 : goto restart;
129 : : }
130 : :
131 : 139537 : return joinlist;
132 : : }
133 : :
134 : : /*
135 : : * clause_sides_match_join
136 : : * Determine whether a join clause is of the right form to use in this join.
137 : : *
138 : : * We already know that the clause is a binary opclause referencing only the
139 : : * rels in the current join. The point here is to check whether it has the
140 : : * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
141 : : * rather than mixing outer and inner vars on either side. If it matches,
142 : : * we set the transient flag outer_is_left to identify which side is which.
143 : : */
144 : : static inline bool
145 : 89537 : clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids,
146 : : Relids innerrelids)
147 : : {
148 [ + + + + ]: 136130 : if (bms_is_subset(rinfo->left_relids, outerrelids) &&
149 : 46593 : bms_is_subset(rinfo->right_relids, innerrelids))
150 : : {
151 : : /* lefthand side is outer */
152 : 46590 : rinfo->outer_is_left = true;
153 : 46590 : return true;
154 : : }
155 [ + + + - ]: 85884 : else if (bms_is_subset(rinfo->left_relids, innerrelids) &&
156 : 42937 : bms_is_subset(rinfo->right_relids, outerrelids))
157 : : {
158 : : /* righthand side is outer */
159 : 42937 : rinfo->outer_is_left = false;
160 : 42937 : return true;
161 : : }
162 : 10 : return false; /* no good for these input relations */
163 : : }
164 : :
165 : : /*
166 : : * join_is_removable
167 : : * Check whether we need not perform this special join at all, because
168 : : * it will just duplicate its left input.
169 : : *
170 : : * This is true for a left join for which the join condition cannot match
171 : : * more than one inner-side row. (There are other possibly interesting
172 : : * cases, but we don't have the infrastructure to prove them.) We also
173 : : * have to check that the inner side doesn't generate any variables needed
174 : : * above the join.
175 : : */
176 : : static bool
177 : 23891 : join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
178 : : {
179 : : int innerrelid;
180 : : RelOptInfo *innerrel;
181 : : Relids inputrelids;
182 : : Relids joinrelids;
183 : 23891 : List *clause_list = NIL;
184 : : ListCell *l;
185 : : int attroff;
186 : :
187 : : /*
188 : : * Must be a left join to a single baserel, else we aren't going to be
189 : : * able to do anything with it.
190 : : */
440 191 [ + + ]: 23891 : if (sjinfo->jointype != JOIN_LEFT)
3425 192 : 3195 : return false;
193 : :
194 [ + + ]: 20696 : if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
5131 195 : 577 : return false;
196 : :
197 : : /*
198 : : * Never try to eliminate a left join to the query result rel. Although
199 : : * the case is syntactically impossible in standard SQL, MERGE will build
200 : : * a join tree that looks exactly like that.
201 : : */
419 202 [ + + ]: 20119 : if (innerrelid == root->parse->resultRelation)
203 : 347 : return false;
204 : :
5131 205 : 19772 : innerrel = find_base_rel(root, innerrelid);
206 : :
207 : : /*
208 : : * Before we go to the effort of checking whether any innerrel variables
209 : : * are needed above the join, make a quick check to eliminate cases in
210 : : * which we will surely be unable to prove uniqueness of the innerrel.
211 : : */
2929 212 [ + + ]: 19772 : if (!rel_supports_distinctness(root, innerrel))
213 : 1373 : return false;
214 : :
215 : : /* Compute the relid set for the join we are considering */
331 216 : 18399 : inputrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
435 217 [ - + ]: 18399 : Assert(sjinfo->ojrelid != 0);
218 : 18399 : joinrelids = bms_copy(inputrelids);
219 : 18399 : joinrelids = bms_add_member(joinrelids, sjinfo->ojrelid);
220 : :
221 : : /*
222 : : * We can't remove the join if any inner-rel attributes are used above the
223 : : * join. Here, "above" the join includes pushed-down conditions, so we
224 : : * should reject if attr_needed includes the OJ's own relid; therefore,
225 : : * compare to inputrelids not joinrelids.
226 : : *
227 : : * As a micro-optimization, it seems better to start with max_attr and
228 : : * count down rather than starting with min_attr and counting up, on the
229 : : * theory that the system attributes are somewhat less likely to be wanted
230 : : * and should be tested last.
231 : : */
5131 232 : 18399 : for (attroff = innerrel->max_attr - innerrel->min_attr;
233 [ + + ]: 230301 : attroff >= 0;
234 : 211902 : attroff--)
235 : : {
435 236 [ + + ]: 225577 : if (!bms_is_subset(innerrel->attr_needed[attroff], inputrelids))
5131 237 : 13675 : return false;
238 : : }
239 : :
240 : : /*
241 : : * Similarly check that the inner rel isn't needed by any PlaceHolderVars
242 : : * that will be used above the join. The PHV case is a little bit more
243 : : * complicated, because PHVs may have been assigned a ph_eval_at location
244 : : * that includes the innerrel, yet their contained expression might not
245 : : * actually reference the innerrel (it could be just a constant, for
246 : : * instance). If such a PHV is due to be evaluated above the join then it
247 : : * needn't prevent join removal.
248 : : */
249 [ + + + + : 4799 : foreach(l, root->placeholder_list)
+ + ]
250 : : {
251 : 90 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
252 : :
3893 253 [ - + ]: 90 : if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
3893 tgl@sss.pgh.pa.us 254 :UBC 0 : return false; /* it references innerrel laterally */
4950 tgl@sss.pgh.pa.us 255 [ + + ]:CBC 90 : if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
256 : 27 : continue; /* it definitely doesn't reference innerrel */
433 257 [ + + ]: 63 : if (bms_is_subset(phinfo->ph_needed, inputrelids))
258 : 3 : continue; /* PHV is not used above the join */
259 [ + + ]: 60 : if (!bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at))
260 : 15 : return false; /* it has to be evaluated below the join */
261 : :
262 : : /*
263 : : * We need to be sure there will still be a place to evaluate the PHV
264 : : * if we remove the join, ie that ph_eval_at wouldn't become empty.
265 : : */
266 [ - + ]: 45 : if (!bms_overlap(sjinfo->min_lefthand, phinfo->ph_eval_at))
3893 tgl@sss.pgh.pa.us 267 :UBC 0 : return false; /* there isn't any other place to eval PHV */
268 : : /* Check contained expression last, since this is a bit expensive */
1179 tgl@sss.pgh.pa.us 269 [ - + ]:CBC 45 : if (bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
4950 270 : 45 : innerrel->relids))
433 tgl@sss.pgh.pa.us 271 :UBC 0 : return false; /* contained expression references innerrel */
272 : : }
273 : :
274 : : /*
275 : : * Search for mergejoinable clauses that constrain the inner rel against
276 : : * either the outer rel or a pseudoconstant. If an operator is
277 : : * mergejoinable then it behaves like equality for some btree opclass, so
278 : : * it's what we want. The mergejoinability test also eliminates clauses
279 : : * containing volatile functions, which we couldn't depend on.
280 : : */
5131 tgl@sss.pgh.pa.us 281 [ + + + + :CBC 9575 : foreach(l, innerrel->joininfo)
+ + ]
282 : : {
283 : 4866 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
284 : :
285 : : /*
286 : : * If the current join commutes with some other outer join(s) via
287 : : * outer join identity 3, there will be multiple clones of its join
288 : : * clauses in the joininfo list. We want to consider only the
289 : : * has_clone form of such clauses. Processing more than one form
290 : : * would be wasteful, and also some of the others would confuse the
291 : : * RINFO_IS_PUSHED_DOWN test below.
292 : : */
440 293 [ + + ]: 4866 : if (restrictinfo->is_clone)
294 : 61 : continue; /* ignore it */
295 : :
296 : : /*
297 : : * If it's not a join clause for this outer join, we can't use it.
298 : : * Note that if the clause is pushed-down, then it is logically from
299 : : * above the outer join, even if it references no other rels (it might
300 : : * be from WHERE, for example).
301 : : */
2186 302 [ + + + + ]: 4805 : if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
435 303 : 51 : continue; /* ignore; not useful here */
304 : :
305 : : /* Ignore if it's not a mergejoinable clause */
5131 306 [ + + ]: 4754 : if (!restrictinfo->can_join ||
307 [ - + ]: 4727 : restrictinfo->mergeopfamilies == NIL)
308 : 27 : continue; /* not mergejoinable */
309 : :
310 : : /*
311 : : * Check if clause has the form "outer op inner" or "inner op outer",
312 : : * and if so mark which side is inner.
313 : : */
314 [ + + ]: 4727 : if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
315 : : innerrel->relids))
316 : 3 : continue; /* no good for these input relations */
317 : :
318 : : /* OK, add to list */
319 : 4724 : clause_list = lappend(clause_list, restrictinfo);
320 : : }
321 : :
322 : : /*
323 : : * Now that we have the relevant equality join clauses, try to prove the
324 : : * innerrel distinct.
325 : : */
172 akorotkov@postgresql 326 [ + + ]:GNC 4709 : if (rel_is_distinct_for(root, innerrel, clause_list, NULL))
2929 tgl@sss.pgh.pa.us 327 :CBC 4661 : return true;
328 : :
329 : : /*
330 : : * Some day it would be nice to check for other methods of establishing
331 : : * distinctness.
332 : : */
5131 333 : 48 : return false;
334 : : }
335 : :
336 : :
337 : : /*
338 : : * Remove the target rel->relid and references to the target join from the
339 : : * planner's data structures, having determined that there is no need
340 : : * to include them in the query. Optionally replace them with subst if subst
341 : : * is non-negative.
342 : : *
343 : : * This function updates only parts needed for both left-join removal and
344 : : * self-join removal.
345 : : */
346 : : static void
172 akorotkov@postgresql 347 :GNC 4974 : remove_rel_from_query(PlannerInfo *root, RelOptInfo *rel,
348 : : int subst, SpecialJoinInfo *sjinfo,
349 : : Relids joinrelids)
350 : : {
351 : 4974 : int relid = rel->relid;
352 [ + + ]: 4974 : int ojrelid = (sjinfo != NULL) ? sjinfo->ojrelid : -1;
353 : : Index rti;
354 : : ListCell *l;
355 : :
356 : : /*
357 : : * Remove references to the rel from other baserels' attr_needed arrays
358 : : * and lateral_vars lists.
359 : : */
5131 tgl@sss.pgh.pa.us 360 [ + + ]:CBC 31110 : for (rti = 1; rti < root->simple_rel_array_size; rti++)
361 : : {
362 : 26136 : RelOptInfo *otherrel = root->simple_rel_array[rti];
363 : : int attroff;
364 : :
365 : : /* there may be empty slots corresponding to non-baserel RTEs */
366 [ + + ]: 26136 : if (otherrel == NULL)
367 : 12565 : continue;
368 : :
5031 bruce@momjian.us 369 [ - + ]: 13571 : Assert(otherrel->relid == rti); /* sanity check on array */
370 : :
371 : : /* no point in processing target rel itself */
5131 tgl@sss.pgh.pa.us 372 [ + + ]: 13571 : if (otherrel == rel)
373 : 4974 : continue;
374 : :
375 : 8597 : for (attroff = otherrel->max_attr - otherrel->min_attr;
376 [ + + ]: 196938 : attroff >= 0;
377 : 188341 : attroff--)
378 : : {
379 : 376682 : otherrel->attr_needed[attroff] =
172 akorotkov@postgresql 380 :GNC 188341 : replace_relid(otherrel->attr_needed[attroff], relid, subst);
440 tgl@sss.pgh.pa.us 381 :CBC 188341 : otherrel->attr_needed[attroff] =
172 akorotkov@postgresql 382 :GNC 188341 : replace_relid(otherrel->attr_needed[attroff], ojrelid, subst);
383 : : }
384 : :
385 : : /* Update lateral_vars list. */
50 386 : 8597 : replace_varno((Node *) otherrel->lateral_vars, relid, subst);
387 : : }
388 : :
389 : : /*
390 : : * Update all_baserels and related relid sets.
391 : : */
172 392 : 4974 : root->all_baserels = replace_relid(root->all_baserels, relid, subst);
393 : 4974 : root->outer_join_rels = replace_relid(root->outer_join_rels, ojrelid, subst);
394 : 4974 : root->all_query_rels = replace_relid(root->all_query_rels, relid, subst);
395 : 4974 : root->all_query_rels = replace_relid(root->all_query_rels, ojrelid, subst);
396 : :
397 : : /*
398 : : * Likewise remove references from SpecialJoinInfo data structures.
399 : : *
400 : : * This is relevant in case the outer join we're deleting is nested inside
401 : : * other outer joins: the upper joins' relid sets have to be adjusted. The
402 : : * RHS of the target outer join will be made empty here, but that's OK
403 : : * since caller will delete that SpecialJoinInfo entirely.
404 : : */
5075 tgl@sss.pgh.pa.us 405 [ + + + + :CBC 11388 : foreach(l, root->join_info_list)
+ + ]
406 : : {
324 407 : 6414 : SpecialJoinInfo *sjinf = (SpecialJoinInfo *) lfirst(l);
408 : :
172 akorotkov@postgresql 409 :GNC 6414 : sjinf->min_lefthand = replace_relid(sjinf->min_lefthand, relid, subst);
410 : 6414 : sjinf->min_righthand = replace_relid(sjinf->min_righthand, relid, subst);
411 : 6414 : sjinf->syn_lefthand = replace_relid(sjinf->syn_lefthand, relid, subst);
412 : 6414 : sjinf->syn_righthand = replace_relid(sjinf->syn_righthand, relid, subst);
413 : 6414 : sjinf->min_lefthand = replace_relid(sjinf->min_lefthand, ojrelid, subst);
414 : 6414 : sjinf->min_righthand = replace_relid(sjinf->min_righthand, ojrelid, subst);
415 : 6414 : sjinf->syn_lefthand = replace_relid(sjinf->syn_lefthand, ojrelid, subst);
416 : 6414 : sjinf->syn_righthand = replace_relid(sjinf->syn_righthand, ojrelid, subst);
417 : : /* relid cannot appear in these fields, but ojrelid can: */
418 : 6414 : sjinf->commute_above_l = replace_relid(sjinf->commute_above_l, ojrelid, subst);
419 : 6414 : sjinf->commute_above_r = replace_relid(sjinf->commute_above_r, ojrelid, subst);
420 : 6414 : sjinf->commute_below_l = replace_relid(sjinf->commute_below_l, ojrelid, subst);
421 : 6414 : sjinf->commute_below_r = replace_relid(sjinf->commute_below_r, ojrelid, subst);
422 : :
423 : 6414 : replace_varno((Node *) sjinf->semi_rhs_exprs, relid, subst);
424 : : }
425 : :
426 : : /*
427 : : * Likewise remove references from PlaceHolderVar data structures,
428 : : * removing any no-longer-needed placeholders entirely.
429 : : *
430 : : * Removal is a bit trickier than it might seem: we can remove PHVs that
431 : : * are used at the target rel and/or in the join qual, but not those that
432 : : * are used at join partner rels or above the join. It's not that easy to
433 : : * distinguish PHVs used at partner rels from those used in the join qual,
434 : : * since they will both have ph_needed sets that are subsets of
435 : : * joinrelids. However, a PHV used at a partner rel could not have the
436 : : * target rel in ph_eval_at, so we check that while deciding whether to
437 : : * remove or just update the PHV. There is no corresponding test in
438 : : * join_is_removable because it doesn't need to distinguish those cases.
439 : : */
1735 tgl@sss.pgh.pa.us 440 [ + + + + :CBC 5055 : foreach(l, root->placeholder_list)
+ + ]
441 : : {
5131 442 : 81 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
443 : :
111 akorotkov@postgresql 444 [ + + - + ]:GNC 81 : Assert(sjinfo == NULL || !bms_is_member(relid, phinfo->ph_lateral));
3173 tgl@sss.pgh.pa.us 445 [ + + + + ]:CBC 99 : if (bms_is_subset(phinfo->ph_needed, joinrelids) &&
311 446 [ + + ]: 27 : bms_is_member(relid, phinfo->ph_eval_at) &&
90 akorotkov@postgresql 447 [ + + ]:GNC 6 : (sjinfo == NULL || !bms_is_member(ojrelid, phinfo->ph_eval_at)))
448 : : {
1735 tgl@sss.pgh.pa.us 449 :CBC 6 : root->placeholder_list = foreach_delete_current(root->placeholder_list,
450 : : l);
606 451 : 6 : root->placeholder_array[phinfo->phid] = NULL;
452 : : }
453 : : else
454 : : {
431 455 : 75 : PlaceHolderVar *phv = phinfo->ph_var;
456 : :
172 akorotkov@postgresql 457 :GNC 75 : phinfo->ph_eval_at = replace_relid(phinfo->ph_eval_at, relid, subst);
458 : 75 : phinfo->ph_eval_at = replace_relid(phinfo->ph_eval_at, ojrelid, subst);
431 tgl@sss.pgh.pa.us 459 [ - + ]:CBC 75 : Assert(!bms_is_empty(phinfo->ph_eval_at)); /* checked previously */
172 akorotkov@postgresql 460 :GNC 75 : phinfo->ph_needed = replace_relid(phinfo->ph_needed, relid, subst);
461 : 75 : phinfo->ph_needed = replace_relid(phinfo->ph_needed, ojrelid, subst);
462 : : /* ph_needed might or might not become empty */
111 463 : 75 : phinfo->ph_lateral = replace_relid(phinfo->ph_lateral, relid, subst);
464 : : /* ph_lateral might or might not be empty */
172 465 : 75 : phv->phrels = replace_relid(phv->phrels, relid, subst);
466 : 75 : phv->phrels = replace_relid(phv->phrels, ojrelid, subst);
431 tgl@sss.pgh.pa.us 467 [ - + ]:CBC 75 : Assert(!bms_is_empty(phv->phrels));
157 akorotkov@postgresql 468 :GNC 75 : replace_varno((Node *) phv->phexpr, relid, subst);
431 tgl@sss.pgh.pa.us 469 [ - + ]:CBC 75 : Assert(phv->phnullingrels == NULL); /* no need to adjust */
470 : : }
471 : : }
172 akorotkov@postgresql 472 :GNC 4974 : }
473 : :
474 : : /*
475 : : * Remove the target relid and references to the target join from the
476 : : * planner's data structures, having determined that there is no need
477 : : * to include them in the query.
478 : : *
479 : : * We are not terribly thorough here. We only bother to update parts of
480 : : * the planner's data structures that will actually be consulted later.
481 : : */
482 : : static void
483 : 4661 : remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
484 : : SpecialJoinInfo *sjinfo)
485 : : {
486 : : List *joininfos;
487 : 4661 : int ojrelid = sjinfo->ojrelid;
488 : 4661 : RelOptInfo *rel = find_base_rel(root, relid);
489 : : Relids join_plus_commute;
490 : : Relids joinrelids;
491 : : ListCell *l;
492 : :
493 : : /* Compute the relid set for the join we are considering */
494 : 4661 : joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
495 [ - + ]: 4661 : Assert(ojrelid != 0);
496 : 4661 : joinrelids = bms_add_member(joinrelids, ojrelid);
497 : :
498 : 4661 : remove_rel_from_query(root, rel, -1, sjinfo, joinrelids);
499 : :
500 : : /*
501 : : * Remove any joinquals referencing the rel from the joininfo lists.
502 : : *
503 : : * In some cases, a joinqual has to be put back after deleting its
504 : : * reference to the target rel. This can occur for pseudoconstant and
505 : : * outerjoin-delayed quals, which can get marked as requiring the rel in
506 : : * order to force them to be evaluated at or above the join. We can't
507 : : * just discard them, though. Only quals that logically belonged to the
508 : : * outer join being discarded should be removed from the query.
509 : : *
510 : : * We might encounter a qual that is a clone of a deletable qual with some
511 : : * outer-join relids added (see deconstruct_distribute_oj_quals). To
512 : : * ensure we get rid of such clones as well, add the relids of all OJs
513 : : * commutable with this one to the set we test against for
514 : : * pushed-down-ness.
515 : : */
324 tgl@sss.pgh.pa.us 516 :CBC 4661 : join_plus_commute = bms_union(joinrelids,
517 : 4661 : sjinfo->commute_above_r);
518 : 4661 : join_plus_commute = bms_add_members(join_plus_commute,
519 : 4661 : sjinfo->commute_below_l);
520 : :
521 : : /*
522 : : * We must make a copy of the rel's old joininfo list before starting the
523 : : * loop, because otherwise remove_join_clause_from_rels would destroy the
524 : : * list while we're scanning it.
525 : : */
4961 526 : 4661 : joininfos = list_copy(rel->joininfo);
527 [ + + + + : 9488 : foreach(l, joininfos)
+ + ]
528 : : {
529 : 4827 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
530 : :
531 : 4827 : remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
532 : :
324 533 [ + + + + ]: 4827 : if (RINFO_IS_PUSHED_DOWN(rinfo, join_plus_commute))
534 : : {
535 : : /*
536 : : * There might be references to relid or ojrelid in the
537 : : * RestrictInfo's relid sets, as a consequence of PHVs having had
538 : : * ph_eval_at sets that include those. We already checked above
539 : : * that any such PHV is safe (and updated its ph_eval_at), so we
540 : : * can just drop those references.
541 : : */
429 542 : 51 : remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
543 : :
544 : : /*
545 : : * Cross-check that the clause itself does not reference the
546 : : * target rel or join.
547 : : */
548 : : #ifdef USE_ASSERT_CHECKING
549 : : {
324 550 : 51 : Relids clause_varnos = pull_varnos(root,
551 : 51 : (Node *) rinfo->clause);
552 : :
553 [ - + ]: 51 : Assert(!bms_is_member(relid, clause_varnos));
554 [ - + ]: 51 : Assert(!bms_is_member(ojrelid, clause_varnos));
555 : : }
556 : : #endif
557 : : /* Now throw it back into the joininfo lists */
4961 558 : 51 : distribute_restrictinfo_to_rels(root, rinfo);
559 : : }
560 : : }
561 : :
562 : : /*
563 : : * Likewise remove references from EquivalenceClasses.
564 : : */
304 565 [ + - + + : 25414 : foreach(l, root->eq_classes)
+ + ]
566 : : {
567 : 20753 : EquivalenceClass *ec = (EquivalenceClass *) lfirst(l);
568 : :
569 [ + + - + ]: 34907 : if (bms_is_member(relid, ec->ec_relids) ||
570 : 14154 : bms_is_member(ojrelid, ec->ec_relids))
571 : 6599 : remove_rel_from_eclass(ec, relid, ojrelid);
572 : : }
573 : :
574 : : /*
575 : : * There may be references to the rel in root->fkey_list, but if so,
576 : : * match_foreign_keys_to_quals() will get rid of them.
577 : : */
578 : :
579 : : /*
580 : : * Finally, remove the rel from the baserel array to prevent it from being
581 : : * referenced again. (We can't do this earlier because
582 : : * remove_join_clause_from_rels will touch it.)
583 : : */
426 584 : 4661 : root->simple_rel_array[relid] = NULL;
585 : :
586 : : /* And nuke the RelOptInfo, just in case there's another access path */
587 : 4661 : pfree(rel);
5131 588 : 4661 : }
589 : :
590 : : /*
591 : : * Remove any references to relid or ojrelid from the RestrictInfo.
592 : : *
593 : : * We only bother to clean out bits in clause_relids and required_relids,
594 : : * not nullingrel bits in contained Vars and PHVs. (This might have to be
595 : : * improved sometime.) However, if the RestrictInfo contains an OR clause
596 : : * we have to also clean up the sub-clauses.
597 : : */
598 : : static void
429 599 : 1944 : remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid)
600 : : {
601 : : /*
602 : : * The clause_relids probably aren't shared with anything else, but let's
603 : : * copy them just to be sure.
604 : : */
605 : 1944 : rinfo->clause_relids = bms_copy(rinfo->clause_relids);
606 : 1944 : rinfo->clause_relids = bms_del_member(rinfo->clause_relids, relid);
607 : 1944 : rinfo->clause_relids = bms_del_member(rinfo->clause_relids, ojrelid);
608 : : /* Likewise for required_relids */
609 : 1944 : rinfo->required_relids = bms_copy(rinfo->required_relids);
610 : 1944 : rinfo->required_relids = bms_del_member(rinfo->required_relids, relid);
611 : 1944 : rinfo->required_relids = bms_del_member(rinfo->required_relids, ojrelid);
612 : :
613 : : /* If it's an OR, recurse to clean up sub-clauses */
614 [ + + ]: 1944 : if (restriction_is_or_clause(rinfo))
615 : : {
616 : : ListCell *lc;
617 : :
618 [ - + ]: 3 : Assert(is_orclause(rinfo->orclause));
619 [ + - + + : 9 : foreach(lc, ((BoolExpr *) rinfo->orclause)->args)
+ + ]
620 : : {
621 : 6 : Node *orarg = (Node *) lfirst(lc);
622 : :
623 : : /* OR arguments should be ANDs or sub-RestrictInfos */
624 [ - + ]: 6 : if (is_andclause(orarg))
625 : : {
429 tgl@sss.pgh.pa.us 626 :UBC 0 : List *andargs = ((BoolExpr *) orarg)->args;
627 : : ListCell *lc2;
628 : :
629 [ # # # # : 0 : foreach(lc2, andargs)
# # ]
630 : : {
631 : 0 : RestrictInfo *rinfo2 = lfirst_node(RestrictInfo, lc2);
632 : :
633 : 0 : remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
634 : : }
635 : : }
636 : : else
637 : : {
429 tgl@sss.pgh.pa.us 638 :CBC 6 : RestrictInfo *rinfo2 = castNode(RestrictInfo, orarg);
639 : :
640 : 6 : remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
641 : : }
642 : : }
643 : : }
644 : 1944 : }
645 : :
646 : : /*
647 : : * Remove any references to relid or ojrelid from the EquivalenceClass.
648 : : *
649 : : * Like remove_rel_from_restrictinfo, we don't worry about cleaning out
650 : : * any nullingrel bits in contained Vars and PHVs. (This might have to be
651 : : * improved sometime.) We do need to fix the EC and EM relid sets to ensure
652 : : * that implied join equalities will be generated at the appropriate join
653 : : * level(s).
654 : : */
655 : : static void
304 656 : 6599 : remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid)
657 : : {
658 : : ListCell *lc;
659 : :
660 : : /* Fix up the EC's overall relids */
661 : 6599 : ec->ec_relids = bms_del_member(ec->ec_relids, relid);
662 : 6599 : ec->ec_relids = bms_del_member(ec->ec_relids, ojrelid);
663 : :
664 : : /*
665 : : * Fix up the member expressions. Any non-const member that ends with
666 : : * empty em_relids must be a Var or PHV of the removed relation. We don't
667 : : * need it anymore, so we can drop it.
668 : : */
669 [ + + + + : 15085 : foreach(lc, ec->ec_members)
+ + ]
670 : : {
671 : 8486 : EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc);
672 : :
673 [ + + - + ]: 10373 : if (bms_is_member(relid, cur_em->em_relids) ||
674 : 1887 : bms_is_member(ojrelid, cur_em->em_relids))
675 : : {
676 [ - + ]: 6599 : Assert(!cur_em->em_is_const);
677 : 6599 : cur_em->em_relids = bms_del_member(cur_em->em_relids, relid);
678 : 6599 : cur_em->em_relids = bms_del_member(cur_em->em_relids, ojrelid);
679 [ + + ]: 6599 : if (bms_is_empty(cur_em->em_relids))
680 : 6593 : ec->ec_members = foreach_delete_current(ec->ec_members, lc);
681 : : }
682 : : }
683 : :
684 : : /* Fix up the source clauses, in case we can re-use them later */
685 [ + + + + : 8486 : foreach(lc, ec->ec_sources)
+ + ]
686 : : {
687 : 1887 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
688 : :
689 : 1887 : remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
690 : : }
691 : :
692 : : /*
693 : : * Rather than expend code on fixing up any already-derived clauses, just
694 : : * drop them. (At this point, any such clauses would be base restriction
695 : : * clauses, which we'd not need anymore anyway.)
696 : : */
697 : 6599 : ec->ec_derives = NIL;
698 : 6599 : }
699 : :
700 : : /*
701 : : * Remove any occurrences of the target relid from a joinlist structure.
702 : : *
703 : : * It's easiest to build a whole new list structure, so we handle it that
704 : : * way. Efficiency is not a big deal here.
705 : : *
706 : : * *nremoved is incremented by the number of occurrences removed (there
707 : : * should be exactly one, but the caller checks that).
708 : : */
709 : : static List *
5131 710 : 5106 : remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
711 : : {
712 : 5106 : List *result = NIL;
713 : : ListCell *jl;
714 : :
715 [ + - + + : 18809 : foreach(jl, joinlist)
+ + ]
716 : : {
717 : 13703 : Node *jlnode = (Node *) lfirst(jl);
718 : :
719 [ + + ]: 13703 : if (IsA(jlnode, RangeTblRef))
720 : : {
721 : 13571 : int varno = ((RangeTblRef *) jlnode)->rtindex;
722 : :
723 [ + + ]: 13571 : if (varno == relid)
724 : 4974 : (*nremoved)++;
725 : : else
726 : 8597 : result = lappend(result, jlnode);
727 : : }
728 [ + - ]: 132 : else if (IsA(jlnode, List))
729 : : {
730 : : /* Recurse to handle subproblem */
731 : : List *sublist;
732 : :
733 : 132 : sublist = remove_rel_from_joinlist((List *) jlnode,
734 : : relid, nremoved);
735 : : /* Avoid including empty sub-lists in the result */
736 [ + - ]: 132 : if (sublist)
737 : 132 : result = lappend(result, sublist);
738 : : }
739 : : else
740 : : {
5131 tgl@sss.pgh.pa.us 741 [ # # ]:UBC 0 : elog(ERROR, "unrecognized joinlist node type: %d",
742 : : (int) nodeTag(jlnode));
743 : : }
744 : : }
745 : :
5131 tgl@sss.pgh.pa.us 746 :CBC 5106 : return result;
747 : : }
748 : :
749 : :
750 : : /*
751 : : * reduce_unique_semijoins
752 : : * Check for semijoins that can be simplified to plain inner joins
753 : : * because the inner relation is provably unique for the join clauses.
754 : : *
755 : : * Ideally this would happen during reduce_outer_joins, but we don't have
756 : : * enough information at that point.
757 : : *
758 : : * To perform the strength reduction when applicable, we need only delete
759 : : * the semijoin's SpecialJoinInfo from root->join_info_list. (We don't
760 : : * bother fixing the join type attributed to it in the query jointree,
761 : : * since that won't be consulted again.)
762 : : */
763 : : void
2540 764 : 139537 : reduce_unique_semijoins(PlannerInfo *root)
765 : : {
766 : : ListCell *lc;
767 : :
768 : : /*
769 : : * Scan the join_info_list to find semijoins.
770 : : */
1735 771 [ + + + + : 158661 : foreach(lc, root->join_info_list)
+ + ]
772 : : {
2540 773 : 19124 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
774 : : int innerrelid;
775 : : RelOptInfo *innerrel;
776 : : Relids joinrelids;
777 : : List *restrictlist;
778 : :
779 : : /*
780 : : * Must be a semijoin to a single baserel, else we aren't going to be
781 : : * able to do anything with it.
782 : : */
440 783 [ + + ]: 19124 : if (sjinfo->jointype != JOIN_SEMI)
2540 784 : 18997 : continue;
785 : :
786 [ + + ]: 976 : if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
787 : 82 : continue;
788 : :
789 : 894 : innerrel = find_base_rel(root, innerrelid);
790 : :
791 : : /*
792 : : * Before we trouble to run generate_join_implied_equalities, make a
793 : : * quick check to eliminate cases in which we will surely be unable to
794 : : * prove uniqueness of the innerrel.
795 : : */
796 [ + + ]: 894 : if (!rel_supports_distinctness(root, innerrel))
797 : 389 : continue;
798 : :
799 : : /* Compute the relid set for the join we are considering */
800 : 505 : joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
440 801 [ - + ]: 505 : Assert(sjinfo->ojrelid == 0); /* SEMI joins don't have RT indexes */
802 : :
803 : : /*
804 : : * Since we're only considering a single-rel RHS, any join clauses it
805 : : * has must be clauses linking it to the semijoin's min_lefthand. We
806 : : * can also consider EC-derived join clauses.
807 : : */
808 : : restrictlist =
2540 809 : 505 : list_concat(generate_join_implied_equalities(root,
810 : : joinrelids,
811 : : sjinfo->min_lefthand,
812 : : innerrel,
813 : : NULL),
814 : 505 : innerrel->joininfo);
815 : :
816 : : /* Test whether the innerrel is unique for those clauses. */
2186 817 [ + + ]: 505 : if (!innerrel_is_unique(root,
818 : : joinrelids, sjinfo->min_lefthand, innerrel,
819 : : JOIN_SEMI, restrictlist, true))
2540 820 : 378 : continue;
821 : :
822 : : /* OK, remove the SpecialJoinInfo from the list. */
1735 823 : 127 : root->join_info_list = foreach_delete_current(root->join_info_list, lc);
824 : : }
2540 825 : 139537 : }
826 : :
827 : :
828 : : /*
829 : : * rel_supports_distinctness
830 : : * Could the relation possibly be proven distinct on some set of columns?
831 : : *
832 : : * This is effectively a pre-checking function for rel_is_distinct_for().
833 : : * It must return true if rel_is_distinct_for() could possibly return true
834 : : * with this rel, but it should not expend a lot of cycles. The idea is
835 : : * that callers can avoid doing possibly-expensive processing to compute
836 : : * rel_is_distinct_for()'s argument lists if the call could not possibly
837 : : * succeed.
838 : : */
839 : : static bool
2929 840 : 248515 : rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
841 : : {
842 : : /* We only know about baserels ... */
843 [ + + ]: 248515 : if (rel->reloptkind != RELOPT_BASEREL)
844 : 81540 : return false;
845 [ + + ]: 166975 : if (rel->rtekind == RTE_RELATION)
846 : : {
847 : : /*
848 : : * For a plain relation, we only know how to prove uniqueness by
849 : : * reference to unique indexes. Make sure there's at least one
850 : : * suitable unique index. It must be immediately enforced, and not a
851 : : * partial index. (Keep these conditions in sync with
852 : : * relation_has_unique_index_for!)
853 : : */
854 : : ListCell *lc;
855 : :
856 [ + + + + : 221415 : foreach(lc, rel->indexlist)
+ + ]
857 : : {
858 : 200919 : IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
859 : :
300 drowley@postgresql.o 860 [ + + + - : 200919 : if (ind->unique && ind->immediate && ind->indpred == NIL)
+ + ]
2929 tgl@sss.pgh.pa.us 861 : 134445 : return true;
862 : : }
863 : : }
864 [ + + ]: 12034 : else if (rel->rtekind == RTE_SUBQUERY)
865 : : {
866 : 1957 : Query *subquery = root->simple_rte_array[rel->relid]->subquery;
867 : :
868 : : /* Check if the subquery has any qualities that support distinctness */
869 [ + + ]: 1957 : if (query_supports_distinctness(subquery))
870 : 1263 : return true;
871 : : }
872 : : /* We have no proof rules for any other rtekinds. */
873 : 31267 : return false;
874 : : }
875 : :
876 : : /*
877 : : * rel_is_distinct_for
878 : : * Does the relation return only distinct rows according to clause_list?
879 : : *
880 : : * clause_list is a list of join restriction clauses involving this rel and
881 : : * some other one. Return true if no two rows emitted by this rel could
882 : : * possibly join to the same row of the other rel.
883 : : *
884 : : * The caller must have already determined that each condition is a
885 : : * mergejoinable equality with an expression in this relation on one side, and
886 : : * an expression not involving this relation on the other. The transient
887 : : * outer_is_left flag is used to identify which side references this relation:
888 : : * left side if outer_is_left is false, right side if it is true.
889 : : *
890 : : * Note that the passed-in clause_list may be destructively modified! This
891 : : * is OK for current uses, because the clause_list is built by the caller for
892 : : * the sole purpose of passing to this function.
893 : : *
894 : : * outer_exprs contains the right sides of baserestrictinfo clauses looking
895 : : * like x = const if distinctness is derived from such clauses, not joininfo
896 : : * clause. Pass NULL to the outer_exprs, if its value is not needed.
897 : : */
898 : : static bool
172 akorotkov@postgresql 899 :GNC 84251 : rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list,
900 : : List **extra_clauses)
901 : : {
902 : : /*
903 : : * We could skip a couple of tests here if we assume all callers checked
904 : : * rel_supports_distinctness first, but it doesn't seem worth taking any
905 : : * risk for.
906 : : */
2929 tgl@sss.pgh.pa.us 907 [ - + ]:CBC 84251 : if (rel->reloptkind != RELOPT_BASEREL)
2929 tgl@sss.pgh.pa.us 908 :UBC 0 : return false;
2929 tgl@sss.pgh.pa.us 909 [ + + ]:CBC 84251 : if (rel->rtekind == RTE_RELATION)
910 : : {
911 : : /*
912 : : * Examine the indexes to see if we have a matching unique index.
913 : : * relation_has_unique_index_ext automatically adds any usable
914 : : * restriction clauses for the rel, so we needn't do that here.
915 : : */
172 akorotkov@postgresql 916 [ + + ]:GNC 83400 : if (relation_has_unique_index_ext(root, rel, clause_list, NIL, NIL,
917 : : extra_clauses))
2929 tgl@sss.pgh.pa.us 918 :CBC 50943 : return true;
919 : : }
920 [ + - ]: 851 : else if (rel->rtekind == RTE_SUBQUERY)
921 : : {
922 : 851 : Index relid = rel->relid;
923 : 851 : Query *subquery = root->simple_rte_array[relid]->subquery;
924 : 851 : List *colnos = NIL;
925 : 851 : List *opids = NIL;
926 : : ListCell *l;
927 : :
928 : : /*
929 : : * Build the argument lists for query_is_distinct_for: a list of
930 : : * output column numbers that the query needs to be distinct over, and
931 : : * a list of equality operators that the output columns need to be
932 : : * distinct according to.
933 : : *
934 : : * (XXX we are not considering restriction clauses attached to the
935 : : * subquery; is that worth doing?)
936 : : */
937 [ + + + + : 1690 : foreach(l, clause_list)
+ + ]
938 : : {
2561 939 : 839 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
940 : : Oid op;
941 : : Var *var;
942 : :
943 : : /*
944 : : * Get the equality operator we need uniqueness according to.
945 : : * (This might be a cross-type operator and thus not exactly the
946 : : * same operator the subquery would consider; that's all right
947 : : * since query_is_distinct_for can resolve such cases.) The
948 : : * caller's mergejoinability test should have selected only
949 : : * OpExprs.
950 : : */
2609 peter_e@gmx.net 951 : 839 : op = castNode(OpExpr, rinfo->clause)->opno;
952 : :
953 : : /* caller identified the inner side for us */
2929 tgl@sss.pgh.pa.us 954 [ + + ]: 839 : if (rinfo->outer_is_left)
955 : 682 : var = (Var *) get_rightop(rinfo->clause);
956 : : else
957 : 157 : var = (Var *) get_leftop(rinfo->clause);
958 : :
959 : : /*
960 : : * We may ignore any RelabelType node above the operand. (There
961 : : * won't be more than one, since eval_const_expressions() has been
962 : : * applied already.)
963 : : */
2401 964 [ + - + + ]: 839 : if (var && IsA(var, RelabelType))
965 : 300 : var = (Var *) ((RelabelType *) var)->arg;
966 : :
967 : : /*
968 : : * If inner side isn't a Var referencing a subquery output column,
969 : : * this clause doesn't help us.
970 : : */
2929 971 [ + - + + ]: 839 : if (!var || !IsA(var, Var) ||
972 [ + - - + ]: 833 : var->varno != relid || var->varlevelsup != 0)
973 : 6 : continue;
974 : :
975 : 833 : colnos = lappend_int(colnos, var->varattno);
976 : 833 : opids = lappend_oid(opids, op);
977 : : }
978 : :
979 [ + + ]: 851 : if (query_is_distinct_for(subquery, colnos, opids))
980 : 94 : return true;
981 : : }
982 : 33214 : return false;
983 : : }
984 : :
985 : :
986 : : /*
987 : : * query_supports_distinctness - could the query possibly be proven distinct
988 : : * on some set of output columns?
989 : : *
990 : : * This is effectively a pre-checking function for query_is_distinct_for().
991 : : * It must return true if query_is_distinct_for() could possibly return true
992 : : * with this query, but it should not expend a lot of cycles. The idea is
993 : : * that callers can avoid doing possibly-expensive processing to compute
994 : : * query_is_distinct_for()'s argument lists if the call could not possibly
995 : : * succeed.
996 : : */
997 : : bool
3561 998 : 2280 : query_supports_distinctness(Query *query)
999 : : {
1000 : : /* SRFs break distinctness except with DISTINCT, see below */
2332 1001 [ + + + - ]: 2280 : if (query->hasTargetSRFs && query->distinctClause == NIL)
2770 1002 : 454 : return false;
1003 : :
1004 : : /* check for features we can prove distinctness with */
3561 1005 [ + + ]: 1826 : if (query->distinctClause != NIL ||
1006 [ + + ]: 1754 : query->groupClause != NIL ||
3256 andres@anarazel.de 1007 [ + - ]: 1651 : query->groupingSets != NIL ||
3561 tgl@sss.pgh.pa.us 1008 [ + + ]: 1651 : query->hasAggs ||
1009 [ + - ]: 1563 : query->havingQual ||
1010 [ + + ]: 1563 : query->setOperations)
1011 : 1568 : return true;
1012 : :
1013 : 258 : return false;
1014 : : }
1015 : :
1016 : : /*
1017 : : * query_is_distinct_for - does query never return duplicates of the
1018 : : * specified columns?
1019 : : *
1020 : : * query is a not-yet-planned subquery (in current usage, it's always from
1021 : : * a subquery RTE, which the planner avoids scribbling on).
1022 : : *
1023 : : * colnos is an integer list of output column numbers (resno's). We are
1024 : : * interested in whether rows consisting of just these columns are certain
1025 : : * to be distinct. "Distinctness" is defined according to whether the
1026 : : * corresponding upper-level equality operators listed in opids would think
1027 : : * the values are distinct. (Note: the opids entries could be cross-type
1028 : : * operators, and thus not exactly the equality operators that the subquery
1029 : : * would use itself. We use equality_ops_are_compatible() to check
1030 : : * compatibility. That looks at btree or hash opfamily membership, and so
1031 : : * should give trustworthy answers for all operators that we might need
1032 : : * to deal with here.)
1033 : : */
1034 : : bool
1035 : 912 : query_is_distinct_for(Query *query, List *colnos, List *opids)
1036 : : {
1037 : : ListCell *l;
1038 : : Oid opid;
1039 : :
1040 [ - + ]: 912 : Assert(list_length(colnos) == list_length(opids));
1041 : :
1042 : : /*
1043 : : * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
1044 : : * columns in the DISTINCT clause appear in colnos and operator semantics
1045 : : * match. This is true even if there are SRFs in the DISTINCT columns or
1046 : : * elsewhere in the tlist.
1047 : : */
1048 [ + + ]: 912 : if (query->distinctClause)
1049 : : {
1050 [ + - + + : 75 : foreach(l, query->distinctClause)
+ + ]
1051 : : {
1052 : 60 : SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
1053 : 60 : TargetEntry *tle = get_sortgroupclause_tle(sgc,
1054 : : query->targetList);
1055 : :
1056 : 60 : opid = distinct_col_search(tle->resno, colnos, opids);
1057 [ + + ]: 60 : if (!OidIsValid(opid) ||
1058 [ + - ]: 24 : !equality_ops_are_compatible(opid, sgc->eqop))
1059 : : break; /* exit early if no match */
1060 : : }
1061 [ + + ]: 51 : if (l == NULL) /* had matches for all? */
1062 : 15 : return true;
1063 : : }
1064 : :
1065 : : /*
1066 : : * Otherwise, a set-returning function in the query's targetlist can
1067 : : * result in returning duplicate rows, despite any grouping that might
1068 : : * occur before tlist evaluation. (If all tlist SRFs are within GROUP BY
1069 : : * columns, it would be safe because they'd be expanded before grouping.
1070 : : * But it doesn't currently seem worth the effort to check for that.)
1071 : : */
2332 1072 [ - + ]: 897 : if (query->hasTargetSRFs)
2332 tgl@sss.pgh.pa.us 1073 :UBC 0 : return false;
1074 : :
1075 : : /*
1076 : : * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
1077 : : * the grouped columns appear in colnos and operator semantics match.
1078 : : */
3256 andres@anarazel.de 1079 [ + + + - ]:CBC 897 : if (query->groupClause && !query->groupingSets)
1080 : : {
3561 tgl@sss.pgh.pa.us 1081 [ + - + + : 129 : foreach(l, query->groupClause)
+ + ]
1082 : : {
1083 : 100 : SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
1084 : 100 : TargetEntry *tle = get_sortgroupclause_tle(sgc,
1085 : : query->targetList);
1086 : :
1087 : 100 : opid = distinct_col_search(tle->resno, colnos, opids);
1088 [ + + ]: 100 : if (!OidIsValid(opid) ||
1089 [ + - ]: 62 : !equality_ops_are_compatible(opid, sgc->eqop))
1090 : : break; /* exit early if no match */
1091 : : }
1092 [ + + ]: 67 : if (l == NULL) /* had matches for all? */
1093 : 29 : return true;
1094 : : }
3256 andres@anarazel.de 1095 [ - + ]: 830 : else if (query->groupingSets)
1096 : : {
1097 : : /*
1098 : : * If we have grouping sets with expressions, we probably don't have
1099 : : * uniqueness and analysis would be hard. Punt.
1100 : : */
3256 andres@anarazel.de 1101 [ # # ]:UBC 0 : if (query->groupClause)
1102 : 0 : return false;
1103 : :
1104 : : /*
1105 : : * If we have no groupClause (therefore no grouping expressions), we
1106 : : * might have one or many empty grouping sets. If there's just one,
1107 : : * then we're returning only one row and are certainly unique. But
1108 : : * otherwise, we know we're certainly not unique.
1109 : : */
1110 [ # # ]: 0 : if (list_length(query->groupingSets) == 1 &&
3249 bruce@momjian.us 1111 [ # # ]: 0 : ((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY)
3256 andres@anarazel.de 1112 : 0 : return true;
1113 : : else
1114 : 0 : return false;
1115 : : }
1116 : : else
1117 : : {
1118 : : /*
1119 : : * If we have no GROUP BY, but do have aggregates or HAVING, then the
1120 : : * result is at most one row so it's surely unique, for any operators.
1121 : : */
3561 tgl@sss.pgh.pa.us 1122 [ + + - + ]:CBC 830 : if (query->hasAggs || query->havingQual)
1123 : 44 : return true;
1124 : : }
1125 : :
1126 : : /*
1127 : : * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
1128 : : * except with ALL.
1129 : : */
1130 [ + + ]: 824 : if (query->setOperations)
1131 : : {
2609 peter_e@gmx.net 1132 : 750 : SetOperationStmt *topop = castNode(SetOperationStmt, query->setOperations);
1133 : :
3561 tgl@sss.pgh.pa.us 1134 [ - + ]: 750 : Assert(topop->op != SETOP_NONE);
1135 : :
1136 [ + + ]: 750 : if (!topop->all)
1137 : : {
1138 : : ListCell *lg;
1139 : :
1140 : : /* We're good if all the nonjunk output columns are in colnos */
1141 : 36 : lg = list_head(topop->groupClauses);
1142 [ + - + + : 45 : foreach(l, query->targetList)
+ + ]
1143 : : {
1144 : 39 : TargetEntry *tle = (TargetEntry *) lfirst(l);
1145 : : SortGroupClause *sgc;
1146 : :
1147 [ - + ]: 39 : if (tle->resjunk)
3561 tgl@sss.pgh.pa.us 1148 :UBC 0 : continue; /* ignore resjunk columns */
1149 : :
1150 : : /* non-resjunk columns should have grouping clauses */
3561 tgl@sss.pgh.pa.us 1151 [ - + ]:CBC 39 : Assert(lg != NULL);
1152 : 39 : sgc = (SortGroupClause *) lfirst(lg);
1735 1153 : 39 : lg = lnext(topop->groupClauses, lg);
1154 : :
3561 1155 : 39 : opid = distinct_col_search(tle->resno, colnos, opids);
1156 [ + + ]: 39 : if (!OidIsValid(opid) ||
1157 [ + - ]: 9 : !equality_ops_are_compatible(opid, sgc->eqop))
1158 : : break; /* exit early if no match */
1159 : : }
1160 [ + + ]: 36 : if (l == NULL) /* had matches for all? */
1161 : 6 : return true;
1162 : : }
1163 : : }
1164 : :
1165 : : /*
1166 : : * XXX Are there any other cases in which we can easily see the result
1167 : : * must be distinct?
1168 : : *
1169 : : * If you do add more smarts to this function, be sure to update
1170 : : * query_supports_distinctness() to match.
1171 : : */
1172 : :
1173 : 818 : return false;
1174 : : }
1175 : :
1176 : : /*
1177 : : * distinct_col_search - subroutine for query_is_distinct_for
1178 : : *
1179 : : * If colno is in colnos, return the corresponding element of opids,
1180 : : * else return InvalidOid. (Ordinarily colnos would not contain duplicates,
1181 : : * but if it does, we arbitrarily select the first match.)
1182 : : */
1183 : : static Oid
1184 : 199 : distinct_col_search(int colno, List *colnos, List *opids)
1185 : : {
1186 : : ListCell *lc1,
1187 : : *lc2;
1188 : :
1189 [ + - + + : 317 : forboth(lc1, colnos, lc2, opids)
+ - + + +
+ + - +
+ ]
1190 : : {
1191 [ + + ]: 213 : if (colno == lfirst_int(lc1))
1192 : 95 : return lfirst_oid(lc2);
1193 : : }
1194 : 104 : return InvalidOid;
1195 : : }
1196 : :
1197 : :
1198 : : /*
1199 : : * innerrel_is_unique
1200 : : * Check if the innerrel provably contains at most one tuple matching any
1201 : : * tuple from the outerrel, based on join clauses in the 'restrictlist'.
1202 : : *
1203 : : * We need an actual RelOptInfo for the innerrel, but it's sufficient to
1204 : : * identify the outerrel by its Relids. This asymmetry supports use of this
1205 : : * function before joinrels have been built. (The caller is expected to
1206 : : * also supply the joinrelids, just to save recalculating that.)
1207 : : *
1208 : : * The proof must be made based only on clauses that will be "joinquals"
1209 : : * rather than "otherquals" at execution. For an inner join there's no
1210 : : * difference; but if the join is outer, we must ignore pushed-down quals,
1211 : : * as those will become "otherquals". Note that this means the answer might
1212 : : * vary depending on whether IS_OUTER_JOIN(jointype); since we cache the
1213 : : * answer without regard to that, callers must take care not to call this
1214 : : * with jointypes that would be classified differently by IS_OUTER_JOIN().
1215 : : *
1216 : : * The actual proof is undertaken by is_innerrel_unique_for(); this function
1217 : : * is a frontend that is mainly concerned with caching the answers.
1218 : : * In particular, the force_cache argument allows overriding the internal
1219 : : * heuristic about whether to cache negative answers; it should be "true"
1220 : : * if making an inquiry that is not part of the normal bottom-up join search
1221 : : * sequence.
1222 : : */
1223 : : bool
2564 1224 : 273738 : innerrel_is_unique(PlannerInfo *root,
1225 : : Relids joinrelids,
1226 : : Relids outerrelids,
1227 : : RelOptInfo *innerrel,
1228 : : JoinType jointype,
1229 : : List *restrictlist,
1230 : : bool force_cache)
1231 : : {
172 akorotkov@postgresql 1232 :GNC 273738 : return innerrel_is_unique_ext(root, joinrelids, outerrelids, innerrel,
1233 : : jointype, restrictlist, force_cache, NULL);
1234 : : }
1235 : :
1236 : : /*
1237 : : * innerrel_is_unique_ext
1238 : : * Do the same as innerrel_is_unique(), but also set to '*extra_clauses'
1239 : : * additional clauses from a baserestrictinfo list that were used to prove
1240 : : * uniqueness. A non NULL 'extra_clauses' indicates that we're checking
1241 : : * for self-join and correspondingly dealing with filtered clauses.
1242 : : */
1243 : : bool
1244 : 276549 : innerrel_is_unique_ext(PlannerInfo *root,
1245 : : Relids joinrelids,
1246 : : Relids outerrelids,
1247 : : RelOptInfo *innerrel,
1248 : : JoinType jointype,
1249 : : List *restrictlist,
1250 : : bool force_cache,
1251 : : List **extra_clauses)
1252 : : {
1253 : : MemoryContext old_context;
1254 : : ListCell *lc;
1255 : : UniqueRelInfo *uniqueRelInfo;
1256 : 276549 : List *outer_exprs = NIL;
96 1257 : 276549 : bool self_join = (extra_clauses != NULL);
1258 : :
1259 : : /* Certainly can't prove uniqueness when there are no joinclauses */
2564 tgl@sss.pgh.pa.us 1260 [ + + ]:CBC 276549 : if (restrictlist == NIL)
1261 : 48700 : return false;
1262 : :
1263 : : /*
1264 : : * Make a quick check to eliminate cases in which we will surely be unable
1265 : : * to prove uniqueness of the innerrel.
1266 : : */
1267 [ + + ]: 227849 : if (!rel_supports_distinctness(root, innerrel))
1268 : 111045 : return false;
1269 : :
1270 : : /*
1271 : : * Query the cache to see if we've managed to prove that innerrel is
1272 : : * unique for any subset of this outerrel. For non self-join search, we
1273 : : * don't need an exact match, as extra outerrels can't make the innerrel
1274 : : * any less unique (or more formally, the restrictlist for a join to a
1275 : : * superset outerrel must be a superset of the conditions we successfully
1276 : : * used before). For self-join search, we require an exact match of
1277 : : * outerrels, because we need extra clauses to be valid for our case.
1278 : : * Also, for self-join checking we've filtered the clauses list. Thus,
1279 : : * for a self-join search, we can match only the result cached for another
1280 : : * self-join check.
1281 : : */
1282 [ + + + + : 128130 : foreach(lc, innerrel->unique_for_rels)
+ + ]
1283 : : {
172 akorotkov@postgresql 1284 :GNC 48429 : uniqueRelInfo = (UniqueRelInfo *) lfirst(lc);
1285 : :
96 1286 [ + + + + : 48429 : if ((!self_join && bms_is_subset(uniqueRelInfo->outerrelids, outerrelids)) ||
+ + ]
1287 [ + + ]: 37 : (self_join && bms_equal(uniqueRelInfo->outerrelids, outerrelids) &&
1288 [ + + ]: 28 : uniqueRelInfo->self_join))
1289 : : {
172 1290 [ + + ]: 37103 : if (extra_clauses)
1291 : 6 : *extra_clauses = uniqueRelInfo->extra_clauses;
2564 tgl@sss.pgh.pa.us 1292 :CBC 37103 : return true; /* Success! */
1293 : : }
1294 : : }
1295 : :
1296 : : /*
1297 : : * Conversely, we may have already determined that this outerrel, or some
1298 : : * superset thereof, cannot prove this innerrel to be unique.
1299 : : */
1300 [ + + + + : 80187 : foreach(lc, innerrel->non_unique_for_rels)
+ + ]
1301 : : {
2564 tgl@sss.pgh.pa.us 1302 :GBC 645 : Relids unique_for_rels = (Relids) lfirst(lc);
1303 : :
2540 1304 [ + + ]: 645 : if (bms_is_subset(outerrelids, unique_for_rels))
2564 1305 : 159 : return false;
1306 : : }
1307 : :
1308 : : /* No cached information, so try to make the proof. */
2186 tgl@sss.pgh.pa.us 1309 [ + + + + ]:CBC 79542 : if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
1310 : : jointype, restrictlist,
1311 : : self_join ? &outer_exprs : NULL))
1312 : : {
1313 : : /*
1314 : : * Cache the positive result for future probes, being sure to keep it
1315 : : * in the planner_cxt even if we are working in GEQO.
1316 : : *
1317 : : * Note: one might consider trying to isolate the minimal subset of
1318 : : * the outerrels that proved the innerrel unique. But it's not worth
1319 : : * the trouble, because the planner builds up joinrels incrementally
1320 : : * and so we'll see the minimally sufficient outerrels before any
1321 : : * supersets of them anyway.
1322 : : */
2564 1323 : 46376 : old_context = MemoryContextSwitchTo(root->planner_cxt);
160 akorotkov@postgresql 1324 :GNC 46376 : uniqueRelInfo = makeNode(UniqueRelInfo);
172 1325 : 46376 : uniqueRelInfo->outerrelids = bms_copy(outerrelids);
96 1326 : 46376 : uniqueRelInfo->self_join = self_join;
1327 : 46376 : uniqueRelInfo->extra_clauses = outer_exprs;
2564 tgl@sss.pgh.pa.us 1328 :CBC 46376 : innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
1329 : : uniqueRelInfo);
1330 : 46376 : MemoryContextSwitchTo(old_context);
1331 : :
172 akorotkov@postgresql 1332 [ + + ]:GNC 46376 : if (extra_clauses)
1333 : 347 : *extra_clauses = outer_exprs;
2564 tgl@sss.pgh.pa.us 1334 :CBC 46376 : return true; /* Success! */
1335 : : }
1336 : : else
1337 : : {
1338 : : /*
1339 : : * None of the join conditions for outerrel proved innerrel unique, so
1340 : : * we can safely reject this outerrel or any subset of it in future
1341 : : * checks.
1342 : : *
1343 : : * However, in normal planning mode, caching this knowledge is totally
1344 : : * pointless; it won't be queried again, because we build up joinrels
1345 : : * from smaller to larger. It is useful in GEQO mode, where the
1346 : : * knowledge can be carried across successive planning attempts; and
1347 : : * it's likely to be useful when using join-search plugins, too. Hence
1348 : : * cache when join_search_private is non-NULL. (Yeah, that's a hack,
1349 : : * but it seems reasonable.)
1350 : : *
1351 : : * Also, allow callers to override that heuristic and force caching;
1352 : : * that's useful for reduce_unique_semijoins, which calls here before
1353 : : * the normal join search starts.
1354 : : */
2540 1355 [ + + - + ]: 33166 : if (force_cache || root->join_search_private)
1356 : : {
2564 1357 : 806 : old_context = MemoryContextSwitchTo(root->planner_cxt);
1358 : 806 : innerrel->non_unique_for_rels =
1359 : 806 : lappend(innerrel->non_unique_for_rels,
2540 1360 : 806 : bms_copy(outerrelids));
2564 1361 : 806 : MemoryContextSwitchTo(old_context);
1362 : : }
1363 : :
1364 : 33166 : return false;
1365 : : }
1366 : : }
1367 : :
1368 : : /*
1369 : : * is_innerrel_unique_for
1370 : : * Check if the innerrel provably contains at most one tuple matching any
1371 : : * tuple from the outerrel, based on join clauses in the 'restrictlist'.
1372 : : */
1373 : : static bool
1374 : 79542 : is_innerrel_unique_for(PlannerInfo *root,
1375 : : Relids joinrelids,
1376 : : Relids outerrelids,
1377 : : RelOptInfo *innerrel,
1378 : : JoinType jointype,
1379 : : List *restrictlist,
1380 : : List **extra_clauses)
1381 : : {
1382 : 79542 : List *clause_list = NIL;
1383 : : ListCell *lc;
1384 : :
1385 : : /*
1386 : : * Search for mergejoinable clauses that constrain the inner rel against
1387 : : * the outer rel. If an operator is mergejoinable then it behaves like
1388 : : * equality for some btree opclass, so it's what we want. The
1389 : : * mergejoinability test also eliminates clauses containing volatile
1390 : : * functions, which we couldn't depend on.
1391 : : */
1392 [ + - + + : 174012 : foreach(lc, restrictlist)
+ + ]
1393 : : {
1394 : 94470 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
1395 : :
1396 : : /*
1397 : : * As noted above, if it's a pushed-down clause and we're at an outer
1398 : : * join, we can't use it.
1399 : : */
2186 1400 [ + + ]: 94470 : if (IS_OUTER_JOIN(jointype) &&
1401 [ + + - + ]: 47290 : RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
2564 1402 : 1720 : continue;
1403 : :
1404 : : /* Ignore if it's not a mergejoinable clause */
1405 [ + + ]: 92750 : if (!restrictinfo->can_join ||
1406 [ + + ]: 85833 : restrictinfo->mergeopfamilies == NIL)
1407 : 7940 : continue; /* not mergejoinable */
1408 : :
1409 : : /*
1410 : : * Check if the clause has the form "outer op inner" or "inner op
1411 : : * outer", and if so mark which side is inner.
1412 : : */
2540 1413 [ + + ]: 84810 : if (!clause_sides_match_join(restrictinfo, outerrelids,
1414 : : innerrel->relids))
2564 tgl@sss.pgh.pa.us 1415 :GBC 7 : continue; /* no good for these input relations */
1416 : :
1417 : : /* OK, add to the list */
2564 tgl@sss.pgh.pa.us 1418 :CBC 84803 : clause_list = lappend(clause_list, restrictinfo);
1419 : : }
1420 : :
1421 : : /* Let rel_is_distinct_for() do the hard work */
172 akorotkov@postgresql 1422 :GNC 79542 : return rel_is_distinct_for(root, innerrel, clause_list, extra_clauses);
1423 : : }
1424 : :
1425 : : /*
1426 : : * replace_varno - find in the given tree any Vars, PlaceHolderVar, and Relids
1427 : : * that reference the removing relid, and change them to the reference to
1428 : : * the replacement relid.
1429 : : *
1430 : : * NOTE: although this has the form of a walker, we cheat and modify the
1431 : : * nodes in-place.
1432 : : */
1433 : :
1434 : : typedef struct
1435 : : {
1436 : : int from;
1437 : : int to;
1438 : : int sublevels_up;
1439 : : } ReplaceVarnoContext;
1440 : :
1441 : : static bool
1442 : 30542 : replace_varno_walker(Node *node, ReplaceVarnoContext *ctx)
1443 : : {
1444 [ + + ]: 30542 : if (node == NULL)
1445 : 9080 : return false;
1446 [ + + ]: 21462 : if (IsA(node, Var))
1447 : : {
1448 : 9980 : Var *var = (Var *) node;
1449 : :
50 1450 [ + + ]: 9980 : if (var->varno == ctx->from &&
1451 [ + + ]: 4142 : var->varlevelsup == ctx->sublevels_up)
1452 : : {
172 1453 : 4136 : var->varno = ctx->to;
1454 : 4136 : var->varnosyn = ctx->to;
1455 : : }
1456 : 9980 : return false;
1457 : : }
111 1458 [ + + ]: 11482 : else if (IsA(node, PlaceHolderVar))
1459 : : {
1460 : 33 : PlaceHolderVar *phv = (PlaceHolderVar *) node;
1461 : :
50 1462 [ + - ]: 33 : if (phv->phlevelsup == ctx->sublevels_up)
1463 : : {
1464 : 33 : phv->phrels =
1465 : 33 : replace_relid(phv->phrels, ctx->from, ctx->to);
1466 : 33 : phv->phnullingrels =
1467 : 33 : replace_relid(phv->phnullingrels, ctx->from, ctx->to);
1468 : : }
1469 : :
1470 : : /* fall through to recurse into the placeholder's expression */
1471 : : }
1472 [ + + ]: 11449 : else if (IsA(node, Query))
1473 : : {
1474 : : /* Recurse into subselects */
1475 : : bool result;
1476 : :
1477 : 15 : ctx->sublevels_up++;
1478 : 15 : result = query_tree_walker((Query *) node,
1479 : : replace_varno_walker,
1480 : : (void *) ctx,
1481 : : QTW_EXAMINE_SORTGROUP);
1482 : 15 : ctx->sublevels_up--;
1483 : 15 : return result;
1484 : : }
111 1485 [ + + ]: 11434 : else if (IsA(node, RestrictInfo))
1486 : : {
172 1487 : 926 : RestrictInfo *rinfo = (RestrictInfo *) node;
1488 : 926 : int relid = -1;
1489 : 926 : bool is_req_equal =
1490 : 926 : (rinfo->required_relids == rinfo->clause_relids);
1491 : :
1492 [ + + ]: 926 : if (bms_is_member(ctx->from, rinfo->clause_relids))
1493 : : {
1494 : 899 : replace_varno((Node *) rinfo->clause, ctx->from, ctx->to);
1495 : 899 : replace_varno((Node *) rinfo->orclause, ctx->from, ctx->to);
50 1496 : 899 : rinfo->clause_relids =
1497 : 899 : replace_relid(rinfo->clause_relids, ctx->from, ctx->to);
1498 : 899 : rinfo->left_relids =
1499 : 899 : replace_relid(rinfo->left_relids, ctx->from, ctx->to);
1500 : 899 : rinfo->right_relids =
1501 : 899 : replace_relid(rinfo->right_relids, ctx->from, ctx->to);
1502 : : }
1503 : :
172 1504 [ + + ]: 926 : if (is_req_equal)
1505 : 6 : rinfo->required_relids = rinfo->clause_relids;
1506 : : else
50 1507 : 920 : rinfo->required_relids =
1508 : 920 : replace_relid(rinfo->required_relids, ctx->from, ctx->to);
1509 : :
1510 : 926 : rinfo->outer_relids =
1511 : 926 : replace_relid(rinfo->outer_relids, ctx->from, ctx->to);
1512 : 926 : rinfo->incompatible_relids =
1513 : 926 : replace_relid(rinfo->incompatible_relids, ctx->from, ctx->to);
1514 : :
172 1515 [ + + + + ]: 1759 : if (rinfo->mergeopfamilies &&
1516 : 833 : bms_get_singleton_member(rinfo->clause_relids, &relid) &&
1517 [ + - + - ]: 775 : relid == ctx->to && IsA(rinfo->clause, OpExpr))
1518 : : {
1519 : : Expr *leftOp;
1520 : : Expr *rightOp;
1521 : :
1522 : 775 : leftOp = (Expr *) get_leftop(rinfo->clause);
1523 : 775 : rightOp = (Expr *) get_rightop(rinfo->clause);
1524 : :
1525 [ + - + + ]: 775 : if (leftOp != NULL && equal(leftOp, rightOp))
1526 : : {
1527 : 621 : NullTest *ntest = makeNode(NullTest);
1528 : :
1529 : 621 : ntest->arg = leftOp;
1530 : 621 : ntest->nulltesttype = IS_NOT_NULL;
1531 : 621 : ntest->argisrow = false;
1532 : 621 : ntest->location = -1;
1533 : 621 : rinfo->clause = (Expr *) ntest;
1534 : 621 : rinfo->mergeopfamilies = NIL;
1535 : : }
1536 [ - + ]: 775 : Assert(rinfo->orclause == NULL);
1537 : : }
1538 : :
1539 : 926 : return false;
1540 : : }
1541 : :
50 1542 : 10541 : return expression_tree_walker(node, replace_varno_walker,
1543 : : (void *) ctx);
1544 : : }
1545 : :
1546 : : static void
1547 : 21255 : replace_varno(Node *node, int from, int to)
1548 : : {
1549 : : ReplaceVarnoContext ctx;
1550 : :
1551 [ + + ]: 21255 : if (to <= 0)
1552 : 14568 : return;
1553 : :
1554 : 6687 : ctx.from = from;
1555 : 6687 : ctx.to = to;
1556 : 6687 : ctx.sublevels_up = 0;
1557 : :
1558 : : /*
1559 : : * Must be prepared to start with a Query or a bare expression tree.
1560 : : */
1561 : 6687 : query_or_expression_tree_walker(node,
1562 : : replace_varno_walker,
1563 : : (void *) &ctx,
1564 : : QTW_EXAMINE_SORTGROUP);
1565 : : }
1566 : :
1567 : : /*
1568 : : * Substitute newId by oldId in relids.
1569 : : *
1570 : : * We must make a copy of the original Bitmapset before making any
1571 : : * modifications, because the same pointer to it might be shared among
1572 : : * different places.
1573 : : */
1574 : : static Bitmapset *
172 1575 : 485428 : replace_relid(Relids relids, int oldId, int newId)
1576 : : {
1577 [ + + ]: 485428 : if (oldId < 0)
1578 : 6033 : return relids;
1579 : :
1580 : : /* Delete relid without substitution. */
1581 [ + + ]: 479395 : if (newId < 0)
109 1582 : 462185 : return bms_del_member(bms_copy(relids), oldId);
1583 : :
1584 : : /* Substitute newId for oldId. */
172 1585 [ + + ]: 17210 : if (bms_is_member(oldId, relids))
109 1586 : 5464 : return bms_add_member(bms_del_member(bms_copy(relids), oldId), newId);
1587 : :
172 1588 : 11746 : return relids;
1589 : : }
1590 : :
1591 : : /*
1592 : : * Update EC members to point to the remaining relation instead of the removed
1593 : : * one, removing duplicates.
1594 : : *
1595 : : * Restriction clauses for base relations are already distributed to
1596 : : * the respective baserestrictinfo lists (see
1597 : : * generate_implied_equalities_for_column). The above code has already processed
1598 : : * this list, and updated these clauses to reference the remaining
1599 : : * relation, so we can skip them here based on their relids.
1600 : : *
1601 : : * Likewise, we have already processed the join clauses that join the
1602 : : * removed relation to the remaining one.
1603 : : *
1604 : : * Finally, there are join clauses that join the removed relation to
1605 : : * some third relation. We can't just delete the source clauses and
1606 : : * regenerate them from the EC because the corresponding equality
1607 : : * operators might be missing (see the handling of ec_broken).
1608 : : * Therefore, we will update the references in the source clauses.
1609 : : *
1610 : : * Derived clauses can be generated again, so it is simpler to just
1611 : : * delete them.
1612 : : */
1613 : : static void
1614 : 466 : update_eclasses(EquivalenceClass *ec, int from, int to)
1615 : : {
1616 : 466 : List *new_members = NIL;
1617 : 466 : List *new_sources = NIL;
1618 : : ListCell *lc;
1619 : : ListCell *lc1;
1620 : :
1621 [ + - + + : 1447 : foreach(lc, ec->ec_members)
+ + ]
1622 : : {
1623 : 981 : EquivalenceMember *em = lfirst_node(EquivalenceMember, lc);
1624 : 981 : bool is_redundant = false;
1625 : :
1626 [ + + ]: 981 : if (!bms_is_member(from, em->em_relids))
1627 : : {
1628 : 506 : new_members = lappend(new_members, em);
1629 : 506 : continue;
1630 : : }
1631 : :
1632 : 475 : em->em_relids = replace_relid(em->em_relids, from, to);
1633 : 475 : em->em_jdomain->jd_relids = replace_relid(em->em_jdomain->jd_relids, from, to);
1634 : :
1635 : : /* We only process inner joins */
1636 : 475 : replace_varno((Node *) em->em_expr, from, to);
1637 : :
1638 [ + + + + : 487 : foreach(lc1, new_members)
+ + ]
1639 : : {
1640 : 162 : EquivalenceMember *other = lfirst_node(EquivalenceMember, lc1);
1641 : :
1642 [ + + ]: 162 : if (!equal(em->em_relids, other->em_relids))
1643 : 12 : continue;
1644 : :
1645 [ + - ]: 150 : if (equal(em->em_expr, other->em_expr))
1646 : : {
1647 : 150 : is_redundant = true;
1648 : 150 : break;
1649 : : }
1650 : : }
1651 : :
1652 [ + + ]: 475 : if (!is_redundant)
1653 : 325 : new_members = lappend(new_members, em);
1654 : : }
1655 : :
1656 : 466 : list_free(ec->ec_members);
1657 : 466 : ec->ec_members = new_members;
1658 : :
1659 : 466 : list_free(ec->ec_derives);
1660 : 466 : ec->ec_derives = NULL;
1661 : :
1662 : : /* Update EC source expressions */
1663 [ + + + + : 981 : foreach(lc, ec->ec_sources)
+ + ]
1664 : : {
1665 : 515 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
1666 : 515 : bool is_redundant = false;
1667 : :
1668 [ + + ]: 515 : if (!bms_is_member(from, rinfo->required_relids))
1669 : : {
1670 : 72 : new_sources = lappend(new_sources, rinfo);
1671 : 72 : continue;
1672 : : }
1673 : :
1674 : 443 : replace_varno((Node *) rinfo, from, to);
1675 : :
1676 : : /*
1677 : : * After switching the clause to the remaining relation, check it for
1678 : : * redundancy with existing ones. We don't have to check for
1679 : : * redundancy with derived clauses, because we've just deleted them.
1680 : : */
1681 [ + + + + : 476 : foreach(lc1, new_sources)
+ + ]
1682 : : {
1683 : 52 : RestrictInfo *other = lfirst_node(RestrictInfo, lc1);
1684 : :
1685 [ + + ]: 52 : if (!equal(rinfo->clause_relids, other->clause_relids))
1686 : 12 : continue;
1687 : :
1688 [ + + ]: 40 : if (equal(rinfo->clause, other->clause))
1689 : : {
1690 : 19 : is_redundant = true;
1691 : 19 : break;
1692 : : }
1693 : : }
1694 : :
1695 [ + + ]: 443 : if (!is_redundant)
1696 : 424 : new_sources = lappend(new_sources, rinfo);
1697 : : }
1698 : :
1699 : 466 : list_free(ec->ec_sources);
1700 : 466 : ec->ec_sources = new_sources;
1701 : 466 : ec->ec_relids = replace_relid(ec->ec_relids, from, to);
1702 : 466 : }
1703 : :
1704 : : /*
1705 : : * "Logically" compares two RestrictInfo's ignoring the 'rinfo_serial' field,
1706 : : * which makes almost every RestrictInfo unique. This type of comparison is
1707 : : * useful when removing duplicates while moving RestrictInfo's from removed
1708 : : * relation to remaining relation during self-join elimination.
1709 : : *
1710 : : * XXX: In the future, we might remove the 'rinfo_serial' field completely and
1711 : : * get rid of this function.
1712 : : */
1713 : : static bool
99 1714 : 278 : restrict_infos_logically_equal(RestrictInfo *a, RestrictInfo *b)
1715 : : {
1716 : 278 : int saved_rinfo_serial = a->rinfo_serial;
1717 : : bool result;
1718 : :
1719 : 278 : a->rinfo_serial = b->rinfo_serial;
1720 : 278 : result = equal(a, b);
1721 : 278 : a->rinfo_serial = saved_rinfo_serial;
1722 : :
1723 : 278 : return result;
1724 : : }
1725 : :
1726 : : /*
1727 : : * Remove a relation after we have proven that it participates only in an
1728 : : * unneeded unique self join.
1729 : : *
1730 : : * Replace any links in planner info structures.
1731 : : *
1732 : : * Transfer join and restriction clauses from the removed relation to the
1733 : : * remaining one. We change the Vars of the clause to point to the
1734 : : * remaining relation instead of the removed one. The clauses that require
1735 : : * a subset of joinrelids become restriction clauses of the remaining
1736 : : * relation, and others remain join clauses. We append them to
1737 : : * baserestrictinfo and joininfo respectively, trying not to introduce
1738 : : * duplicates.
1739 : : *
1740 : : * We also have to process the 'joinclauses' list here, because it
1741 : : * contains EC-derived join clauses which must become filter clauses. It
1742 : : * is not enough to just correct the ECs because the EC-derived
1743 : : * restrictions are generated before join removal (see
1744 : : * generate_base_implied_equalities).
1745 : : */
1746 : : static void
172 1747 : 313 : remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark,
1748 : : RelOptInfo *toKeep, RelOptInfo *toRemove,
1749 : : List *restrictlist)
1750 : : {
1751 : : List *joininfos;
1752 : : ListCell *lc;
1753 : : int i;
1754 : 313 : List *jinfo_candidates = NIL;
1755 : 313 : List *binfo_candidates = NIL;
1756 : :
1757 [ - + ]: 313 : Assert(toKeep->relid != -1);
1758 : :
1759 : : /*
1760 : : * Replace the index of the removing table with the keeping one. The
1761 : : * technique of removing/distributing restrictinfo is used here to attach
1762 : : * just appeared (for keeping relation) join clauses and avoid adding
1763 : : * duplicates of those that already exist in the joininfo list.
1764 : : */
1765 : 313 : joininfos = list_copy(toRemove->joininfo);
1766 [ + + + + : 358 : foreach(lc, joininfos)
+ + ]
1767 : : {
1768 : 45 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
1769 : :
1770 : 45 : remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
1771 : 45 : replace_varno((Node *) rinfo, toRemove->relid, toKeep->relid);
1772 : :
1773 [ + + ]: 45 : if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
1774 : 30 : jinfo_candidates = lappend(jinfo_candidates, rinfo);
1775 : : else
1776 : 15 : binfo_candidates = lappend(binfo_candidates, rinfo);
1777 : : }
1778 : :
1779 : : /*
1780 : : * Concatenate restrictlist to the list of base restrictions of the
1781 : : * removing table just to simplify the replacement procedure: all of them
1782 : : * weren't connected to any keeping relations and need to be added to some
1783 : : * rels.
1784 : : */
1785 : 313 : toRemove->baserestrictinfo = list_concat(toRemove->baserestrictinfo,
1786 : : restrictlist);
1787 [ + - + + : 745 : foreach(lc, toRemove->baserestrictinfo)
+ + ]
1788 : : {
1789 : 432 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
1790 : :
1791 : 432 : replace_varno((Node *) rinfo, toRemove->relid, toKeep->relid);
1792 : :
1793 [ - + ]: 432 : if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
172 akorotkov@postgresql 1794 :UNC 0 : jinfo_candidates = lappend(jinfo_candidates, rinfo);
1795 : : else
172 akorotkov@postgresql 1796 :GNC 432 : binfo_candidates = lappend(binfo_candidates, rinfo);
1797 : : }
1798 : :
1799 : : /*
1800 : : * Now, add all non-redundant clauses to the keeping relation.
1801 : : * Contradictory operation. On the one side, we reduce the length of
1802 : : * restrict lists that can impact planning or executing time.
1803 : : * Additionally, we improve the accuracy of cardinality estimation. On the
1804 : : * other side, it is one more place that can make planning time much
1805 : : * longer in specific cases. It would have been better to avoid calling
1806 : : * the equal() function here, but it's the only way to detect duplicated
1807 : : * inequality expressions.
1808 : : */
1809 [ + - + + : 760 : foreach(lc, binfo_candidates)
+ + ]
1810 : : {
1811 : 447 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
1812 : 447 : ListCell *olc = NULL;
1813 : 447 : bool is_redundant = false;
1814 : :
1815 [ - + ]: 447 : Assert(!bms_is_member(toRemove->relid, rinfo->required_relids));
1816 : :
1817 [ + + + + : 629 : foreach(olc, toKeep->baserestrictinfo)
+ + ]
1818 : : {
1819 : 281 : RestrictInfo *src = lfirst_node(RestrictInfo, olc);
1820 : :
1821 [ + + ]: 281 : if (!bms_equal(src->clause_relids, rinfo->clause_relids))
1822 : : /* Const and non-const expressions can't be equal */
1823 : 9 : continue;
1824 : :
1825 [ + - ]: 272 : if (src == rinfo ||
1826 [ + + ]: 272 : (rinfo->parent_ec != NULL
1827 [ + + ]: 152 : && src->parent_ec == rinfo->parent_ec)
99 1828 [ + + ]: 269 : || restrict_infos_logically_equal(rinfo, src))
1829 : : {
172 1830 : 99 : is_redundant = true;
1831 : 99 : break;
1832 : : }
1833 : : }
1834 [ + + ]: 447 : if (!is_redundant)
1835 : 348 : distribute_restrictinfo_to_rels(root, rinfo);
1836 : : }
1837 [ + + + + : 343 : foreach(lc, jinfo_candidates)
+ + ]
1838 : : {
1839 : 30 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
1840 : 30 : ListCell *olc = NULL;
1841 : 30 : bool is_redundant = false;
1842 : :
1843 [ - + ]: 30 : Assert(!bms_is_member(toRemove->relid, rinfo->required_relids));
1844 : :
1845 [ + + + + : 39 : foreach(olc, toKeep->joininfo)
+ + ]
1846 : : {
1847 : 9 : RestrictInfo *src = lfirst_node(RestrictInfo, olc);
1848 : :
1849 [ - + ]: 9 : if (!bms_equal(src->clause_relids, rinfo->clause_relids))
1850 : : /* Can't compare trivially different clauses */
172 akorotkov@postgresql 1851 :UNC 0 : continue;
1852 : :
172 akorotkov@postgresql 1853 [ + - ]:GNC 9 : if (src == rinfo ||
1854 [ - + ]: 9 : (rinfo->parent_ec != NULL
172 akorotkov@postgresql 1855 [ # # ]:UNC 0 : && src->parent_ec == rinfo->parent_ec)
99 akorotkov@postgresql 1856 [ - + ]:GNC 9 : || restrict_infos_logically_equal(rinfo, src))
1857 : : {
172 akorotkov@postgresql 1858 :UNC 0 : is_redundant = true;
1859 : 0 : break;
1860 : : }
1861 : : }
172 akorotkov@postgresql 1862 [ + - ]:GNC 30 : if (!is_redundant)
1863 : 30 : distribute_restrictinfo_to_rels(root, rinfo);
1864 : : }
1865 : 313 : list_free(binfo_candidates);
1866 : 313 : list_free(jinfo_candidates);
1867 : :
1868 : : /*
1869 : : * Arrange equivalence classes, mentioned removing a table, with the
1870 : : * keeping one: varno of removing table should be replaced in members and
1871 : : * sources lists. Also, remove duplicated elements if this replacement
1872 : : * procedure created them.
1873 : : */
1874 : 313 : i = -1;
1875 [ + + ]: 779 : while ((i = bms_next_member(toRemove->eclass_indexes, i)) >= 0)
1876 : : {
1877 : 466 : EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, i);
1878 : :
1879 : 466 : update_eclasses(ec, toRemove->relid, toKeep->relid);
1880 : 466 : toKeep->eclass_indexes = bms_add_member(toKeep->eclass_indexes, i);
1881 : : }
1882 : :
1883 : : /*
1884 : : * Transfer the targetlist and attr_needed flags.
1885 : : */
1886 : :
1887 [ + - + + : 1242 : foreach(lc, toRemove->reltarget->exprs)
+ + ]
1888 : : {
1889 : 929 : Node *node = lfirst(lc);
1890 : :
1891 : 929 : replace_varno(node, toRemove->relid, toKeep->relid);
1892 [ + + ]: 929 : if (!list_member(toKeep->reltarget->exprs, node))
1893 : 113 : toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs, node);
1894 : : }
1895 : :
1896 [ + + ]: 4093 : for (i = toKeep->min_attr; i <= toKeep->max_attr; i++)
1897 : : {
1898 : 3780 : int attno = i - toKeep->min_attr;
1899 : :
1900 : 7560 : toRemove->attr_needed[attno] = replace_relid(toRemove->attr_needed[attno],
1901 : 3780 : toRemove->relid, toKeep->relid);
1902 : 3780 : toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno],
1903 : 3780 : toRemove->attr_needed[attno]);
1904 : : }
1905 : :
1906 : : /*
1907 : : * If the removed relation has a row mark, transfer it to the remaining
1908 : : * one.
1909 : : *
1910 : : * If both rels have row marks, just keep the one corresponding to the
1911 : : * remaining relation, because we verified earlier that they have the same
1912 : : * strength.
1913 : : */
1914 [ + + ]: 313 : if (rmark)
1915 : : {
1916 [ + - ]: 40 : if (kmark)
1917 : : {
1918 [ - + ]: 40 : Assert(kmark->markType == rmark->markType);
1919 : :
1920 : 40 : root->rowMarks = list_delete_ptr(root->rowMarks, rmark);
1921 : : }
1922 : : else
1923 : : {
1924 : : /* Shouldn't have inheritance children here. */
172 akorotkov@postgresql 1925 [ # # ]:UNC 0 : Assert(rmark->rti == rmark->prti);
1926 : :
1927 : 0 : rmark->rti = rmark->prti = toKeep->relid;
1928 : : }
1929 : : }
1930 : :
1931 : : /* Replace varno in all the query structures */
50 akorotkov@postgresql 1932 :GNC 313 : replace_varno((Node *) root->parse, toRemove->relid, toKeep->relid);
1933 : :
1934 : : /* See remove_self_joins_one_group() */
96 1935 [ - + ]: 313 : Assert(root->parse->resultRelation != toRemove->relid);
1936 [ - + ]: 313 : Assert(root->parse->resultRelation != toKeep->relid);
1937 : :
1938 : : /* Replace links in the planner info */
172 1939 : 313 : remove_rel_from_query(root, toRemove, toKeep->relid, NULL, NULL);
1940 : :
1941 : : /* At last, replace varno in root targetlist and HAVING clause */
1942 : 313 : replace_varno((Node *) root->processed_tlist,
1943 : 313 : toRemove->relid, toKeep->relid);
1944 : 313 : replace_varno((Node *) root->processed_groupClause,
1945 : 313 : toRemove->relid, toKeep->relid);
103 1946 : 313 : replace_relid(root->all_result_relids, toRemove->relid, toKeep->relid);
1947 : 313 : replace_relid(root->leaf_result_relids, toRemove->relid, toKeep->relid);
1948 : :
1949 : :
1950 : : /*
1951 : : * There may be references to the rel in root->fkey_list, but if so,
1952 : : * match_foreign_keys_to_quals() will get rid of them.
1953 : : */
1954 : :
1955 : : /*
1956 : : * Finally, remove the rel from the baserel array to prevent it from being
1957 : : * referenced again. (We can't do this earlier because
1958 : : * remove_join_clause_from_rels will touch it.)
1959 : : */
172 1960 : 313 : root->simple_rel_array[toRemove->relid] = NULL;
1961 : :
1962 : : /* And nuke the RelOptInfo, just in case there's another access path */
1963 : 313 : pfree(toRemove);
1964 : 313 : }
1965 : :
1966 : : /*
1967 : : * split_selfjoin_quals
1968 : : * Processes 'joinquals' by building two lists: one containing the quals
1969 : : * where the columns/exprs are on either side of the join match, and
1970 : : * another one containing the remaining quals.
1971 : : *
1972 : : * 'joinquals' must only contain quals for a RTE_RELATION being joined to
1973 : : * itself.
1974 : : */
1975 : : static void
1976 : 2811 : split_selfjoin_quals(PlannerInfo *root, List *joinquals, List **selfjoinquals,
1977 : : List **otherjoinquals, int from, int to)
1978 : : {
1979 : : ListCell *lc;
1980 : 2811 : List *sjoinquals = NIL;
1981 : 2811 : List *ojoinquals = NIL;
1982 : :
1983 [ + + + + : 3815 : foreach(lc, joinquals)
+ + ]
1984 : : {
1985 : 1004 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
1986 : : OpExpr *expr;
1987 : : Node *leftexpr;
1988 : : Node *rightexpr;
1989 : :
1990 : : /* In general, clause looks like F(arg1) = G(arg2) */
1991 [ + - + - ]: 2008 : if (!rinfo->mergeopfamilies ||
1992 [ + - ]: 2008 : bms_num_members(rinfo->clause_relids) != 2 ||
1993 [ - + ]: 2008 : bms_membership(rinfo->left_relids) != BMS_SINGLETON ||
1994 : 1004 : bms_membership(rinfo->right_relids) != BMS_SINGLETON)
1995 : : {
172 akorotkov@postgresql 1996 :UNC 0 : ojoinquals = lappend(ojoinquals, rinfo);
1997 : 0 : continue;
1998 : : }
1999 : :
172 akorotkov@postgresql 2000 :GNC 1004 : expr = (OpExpr *) rinfo->clause;
2001 : :
2002 [ + - - + ]: 1004 : if (!IsA(expr, OpExpr) || list_length(expr->args) != 2)
2003 : : {
172 akorotkov@postgresql 2004 :UNC 0 : ojoinquals = lappend(ojoinquals, rinfo);
2005 : 0 : continue;
2006 : : }
2007 : :
172 akorotkov@postgresql 2008 :GNC 1004 : leftexpr = get_leftop(rinfo->clause);
2009 : 1004 : rightexpr = copyObject(get_rightop(rinfo->clause));
2010 : :
2011 [ + - + + ]: 1004 : if (leftexpr && IsA(leftexpr, RelabelType))
2012 : 6 : leftexpr = (Node *) ((RelabelType *) leftexpr)->arg;
2013 [ + - + + ]: 1004 : if (rightexpr && IsA(rightexpr, RelabelType))
2014 : 3 : rightexpr = (Node *) ((RelabelType *) rightexpr)->arg;
2015 : :
2016 : : /*
2017 : : * Quite an expensive operation, narrowing the use case. For example,
2018 : : * when we have cast of the same var to different (but compatible)
2019 : : * types.
2020 : : */
2021 : 1004 : replace_varno(rightexpr, bms_singleton_member(rinfo->right_relids),
2022 : 1004 : bms_singleton_member(rinfo->left_relids));
2023 : :
2024 [ + + ]: 1004 : if (equal(leftexpr, rightexpr))
2025 : 773 : sjoinquals = lappend(sjoinquals, rinfo);
2026 : : else
2027 : 231 : ojoinquals = lappend(ojoinquals, rinfo);
2028 : : }
2029 : :
2030 : 2811 : *selfjoinquals = sjoinquals;
2031 : 2811 : *otherjoinquals = ojoinquals;
2032 : 2811 : }
2033 : :
2034 : : /*
2035 : : * Check for a case when uniqueness is at least partly derived from a
2036 : : * baserestrictinfo clause. In this case, we have a chance to return only
2037 : : * one row (if such clauses on both sides of SJ are equal) or nothing (if they
2038 : : * are different).
2039 : : */
2040 : : static bool
2041 : 353 : match_unique_clauses(PlannerInfo *root, RelOptInfo *outer, List *uclauses,
2042 : : Index relid)
2043 : : {
2044 : : ListCell *lc;
2045 : :
2046 [ + + + + : 417 : foreach(lc, uclauses)
+ + ]
2047 : : {
2048 : 104 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
2049 : : Expr *clause;
2050 : : Node *iclause;
2051 : : Node *c1;
2052 : 104 : bool matched = false;
2053 : : ListCell *olc;
2054 : :
2055 [ + - - + ]: 104 : Assert(outer->relid > 0 && relid > 0);
2056 : :
2057 : : /* Only filters like f(R.x1,...,R.xN) == expr we should consider. */
2058 [ - + ]: 104 : Assert(bms_is_empty(rinfo->left_relids) ^
2059 : : bms_is_empty(rinfo->right_relids));
2060 : :
2061 : 104 : clause = (Expr *) copyObject(rinfo->clause);
2062 : 104 : replace_varno((Node *) clause, relid, outer->relid);
2063 : :
2064 [ + + ]: 104 : iclause = bms_is_empty(rinfo->left_relids) ? get_rightop(clause) :
2065 : 101 : get_leftop(clause);
2066 [ + + ]: 104 : c1 = bms_is_empty(rinfo->left_relids) ? get_leftop(clause) :
2067 : 101 : get_rightop(clause);
2068 : :
2069 : : /*
2070 : : * Compare these left and right sides with the corresponding sides of
2071 : : * the outer's filters. If no one is detected - return immediately.
2072 : : */
2073 [ + + + + : 159 : foreach(olc, outer->baserestrictinfo)
+ + ]
2074 : : {
2075 : 119 : RestrictInfo *orinfo = lfirst_node(RestrictInfo, olc);
2076 : : Node *oclause;
2077 : : Node *c2;
2078 : :
2079 [ + + ]: 119 : if (orinfo->mergeopfamilies == NIL)
2080 : : /* Don't consider clauses which aren't similar to 'F(X)=G(Y)' */
2081 : 30 : continue;
2082 : :
2083 [ - + ]: 89 : Assert(is_opclause(orinfo->clause));
2084 : :
2085 : 178 : oclause = bms_is_empty(orinfo->left_relids) ?
2086 [ + + ]: 89 : get_rightop(orinfo->clause) : get_leftop(orinfo->clause);
2087 : 178 : c2 = (bms_is_empty(orinfo->left_relids) ?
2088 [ + + ]: 89 : get_leftop(orinfo->clause) : get_rightop(orinfo->clause));
2089 : :
2090 [ + + + + ]: 89 : if (equal(iclause, oclause) && equal(c1, c2))
2091 : : {
2092 : 64 : matched = true;
2093 : 64 : break;
2094 : : }
2095 : : }
2096 : :
2097 [ + + ]: 104 : if (!matched)
2098 : 40 : return false;
2099 : : }
2100 : :
2101 : 313 : return true;
2102 : : }
2103 : :
2104 : : /*
2105 : : * Find and remove unique self joins in a group of base relations that have
2106 : : * the same Oid.
2107 : : *
2108 : : * Returns a set of relids that were removed.
2109 : : */
2110 : : static Relids
2111 : 6283 : remove_self_joins_one_group(PlannerInfo *root, Relids relids)
2112 : : {
2113 : 6283 : Relids result = NULL;
2114 : : int k; /* Index of kept relation */
2115 : 6283 : int r = -1; /* Index of removed relation */
2116 : :
2117 [ + + ]: 20458 : while ((r = bms_next_member(relids, r)) > 0)
2118 : : {
2119 : 14175 : RelOptInfo *inner = root->simple_rel_array[r];
2120 : :
2121 : : /*
2122 : : * We don't accept result relation as either source or target relation
2123 : : * of SJE, because result relation has different behavior in
2124 : : * EvalPlanQual() and RETURNING clause.
2125 : : */
96 2126 [ + + ]: 14175 : if (root->parse->resultRelation == r)
2127 : 126 : continue;
2128 : :
172 2129 : 14049 : k = r;
2130 : :
2131 [ + + ]: 23207 : while ((k = bms_next_member(relids, k)) > 0)
2132 : : {
2133 : 9471 : Relids joinrelids = NULL;
2134 : 9471 : RelOptInfo *outer = root->simple_rel_array[k];
2135 : : List *restrictlist;
2136 : : List *selfjoinquals;
2137 : : List *otherjoinquals;
2138 : : ListCell *lc;
2139 : 9471 : bool jinfo_check = true;
2140 : 9471 : PlanRowMark *omark = NULL;
2141 : 9471 : PlanRowMark *imark = NULL;
2142 : 9471 : List *uclauses = NIL;
2143 : :
96 2144 [ + + ]: 9471 : if (root->parse->resultRelation == k)
2145 : 9158 : continue;
2146 : :
2147 : : /* A sanity check: the relations have the same Oid. */
172 2148 [ - + ]: 9462 : Assert(root->simple_rte_array[k]->relid ==
2149 : : root->simple_rte_array[r]->relid);
2150 : :
2151 : : /*
2152 : : * It is impossible to eliminate join of two relations if they
2153 : : * belong to different rules of order. Otherwise planner can't be
2154 : : * able to find any variants of correct query plan.
2155 : : */
2156 [ + + + + : 12132 : foreach(lc, root->join_info_list)
+ + ]
2157 : : {
2158 : 9295 : SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc);
2159 : :
2160 [ + + ]: 18590 : if ((bms_is_member(k, info->syn_lefthand) ^
2161 [ + + ]: 13758 : bms_is_member(r, info->syn_lefthand)) ||
2162 : 4463 : (bms_is_member(k, info->syn_righthand) ^
2163 : 4463 : bms_is_member(r, info->syn_righthand)))
2164 : : {
2165 : 6625 : jinfo_check = false;
2166 : 6625 : break;
2167 : : }
2168 : : }
2169 [ + + ]: 9462 : if (!jinfo_check)
2170 : 6625 : continue;
2171 : :
2172 : : /*
2173 : : * Check Row Marks equivalence. We can't remove the join if the
2174 : : * relations have row marks of different strength (e.g. one is
2175 : : * locked FOR UPDATE and another just has ROW_MARK_REFERENCE for
2176 : : * EvalPlanQual rechecking).
2177 : : */
2178 [ + + + - : 2929 : foreach(lc, root->rowMarks)
+ + ]
2179 : : {
2180 : 172 : PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc);
2181 : :
2182 [ + + ]: 172 : if (rowMark->rti == k)
2183 : : {
2184 [ - + ]: 80 : Assert(imark == NULL);
2185 : 80 : imark = rowMark;
2186 : : }
2187 [ + + ]: 92 : else if (rowMark->rti == r)
2188 : : {
2189 [ - + ]: 80 : Assert(omark == NULL);
2190 : 80 : omark = rowMark;
2191 : : }
2192 : :
2193 [ + + + + ]: 172 : if (omark && imark)
2194 : 80 : break;
2195 : : }
2196 [ + + + - : 2837 : if (omark && imark && omark->markType != imark->markType)
+ + ]
2197 : 26 : continue;
2198 : :
2199 : : /*
2200 : : * We only deal with base rels here, so their relids bitset
2201 : : * contains only one member -- their relid.
2202 : : */
2203 : 2811 : joinrelids = bms_add_member(joinrelids, r);
2204 : 2811 : joinrelids = bms_add_member(joinrelids, k);
2205 : :
2206 : : /*
2207 : : * PHVs should not impose any constraints on removing self joins.
2208 : : */
2209 : :
2210 : : /*
2211 : : * At this stage, joininfo lists of inner and outer can contain
2212 : : * only clauses, required for a superior outer join that can't
2213 : : * influence this optimization. So, we can avoid to call the
2214 : : * build_joinrel_restrictlist() routine.
2215 : : */
2216 : 2811 : restrictlist = generate_join_implied_equalities(root, joinrelids,
2217 : : inner->relids,
2218 : : outer, NULL);
2219 : :
2220 : : /*
2221 : : * Process restrictlist to separate the self join quals out of the
2222 : : * other quals. e.g x = x goes to selfjoinquals and a = b to
2223 : : * otherjoinquals.
2224 : : */
2225 : 2811 : split_selfjoin_quals(root, restrictlist, &selfjoinquals,
2226 : 2811 : &otherjoinquals, inner->relid, outer->relid);
2227 : :
2228 : : /*
2229 : : * To enable SJE for the only degenerate case without any self
2230 : : * join clauses at all, add baserestrictinfo to this list. The
2231 : : * degenerate case works only if both sides have the same clause.
2232 : : * So doesn't matter which side to add.
2233 : : */
2234 : 2811 : selfjoinquals = list_concat(selfjoinquals, outer->baserestrictinfo);
2235 : :
2236 : : /*
2237 : : * Determine if the inner table can duplicate outer rows. We must
2238 : : * bypass the unique rel cache here since we're possibly using a
2239 : : * subset of join quals. We can use 'force_cache' == true when all
2240 : : * join quals are self-join quals. Otherwise, we could end up
2241 : : * putting false negatives in the cache.
2242 : : */
2243 [ + + ]: 2811 : if (!innerrel_is_unique_ext(root, joinrelids, inner->relids,
2244 : : outer, JOIN_INNER, selfjoinquals,
2245 : 2811 : list_length(otherjoinquals) == 0,
2246 : : &uclauses))
2247 : 2458 : continue;
2248 : :
2249 : : /*
2250 : : * We have proven that for both relations, the same unique index
2251 : : * guarantees that there is at most one row where columns equal
2252 : : * given values. These values must be the same for both relations,
2253 : : * or else we won't match the same row on each side of the join.
2254 : : */
2255 [ + + ]: 353 : if (!match_unique_clauses(root, inner, uclauses, outer->relid))
2256 : 40 : continue;
2257 : :
2258 : : /*
2259 : : * We can remove either relation, so remove the inner one in order
2260 : : * to simplify this loop.
2261 : : */
2262 : 313 : remove_self_join_rel(root, omark, imark, outer, inner, restrictlist);
2263 : :
2264 : 313 : result = bms_add_member(result, r);
2265 : :
2266 : : /* We have removed the outer relation, try the next one. */
2267 : 313 : break;
2268 : : }
2269 : : }
2270 : :
2271 : 6283 : return result;
2272 : : }
2273 : :
2274 : : /*
2275 : : * Gather indexes of base relations from the joinlist and try to eliminate self
2276 : : * joins.
2277 : : */
2278 : : static Relids
2279 : 42489 : remove_self_joins_recurse(PlannerInfo *root, List *joinlist, Relids toRemove)
2280 : : {
2281 : : ListCell *jl;
2282 : 42489 : Relids relids = NULL;
2283 : 42489 : SelfJoinCandidate *candidates = NULL;
2284 : : int i;
2285 : : int j;
2286 : : int numRels;
2287 : :
2288 : : /* Collect indexes of base relations of the join tree */
2289 [ + - + + : 141899 : foreach(jl, joinlist)
+ + ]
2290 : : {
2291 : 99410 : Node *jlnode = (Node *) lfirst(jl);
2292 : :
2293 [ + + ]: 99410 : if (IsA(jlnode, RangeTblRef))
2294 : : {
2295 : 97769 : RangeTblRef *ref = (RangeTblRef *) jlnode;
2296 : 97769 : RangeTblEntry *rte = root->simple_rte_array[ref->rtindex];
2297 : :
2298 : : /*
2299 : : * We only care about base relations from which we select
2300 : : * something.
2301 : : */
2302 [ + + ]: 97769 : if (rte->rtekind == RTE_RELATION &&
2303 [ + + ]: 85790 : rte->relkind == RELKIND_RELATION &&
2304 [ + - ]: 83411 : root->simple_rel_array[ref->rtindex] != NULL)
2305 : : {
2306 [ - + ]: 83411 : Assert(!bms_is_member(ref->rtindex, relids));
2307 : 83411 : relids = bms_add_member(relids, ref->rtindex);
2308 : : }
2309 : : }
2310 [ + - ]: 1641 : else if (IsA(jlnode, List))
2311 : : /* Recursively go inside the sub-joinlist */
2312 : 1641 : toRemove = remove_self_joins_recurse(root, (List *) jlnode,
2313 : : toRemove);
2314 : : else
172 akorotkov@postgresql 2315 [ # # ]:UNC 0 : elog(ERROR, "unrecognized joinlist node type: %d",
2316 : : (int) nodeTag(jlnode));
2317 : : }
2318 : :
172 akorotkov@postgresql 2319 :GNC 42489 : numRels = bms_num_members(relids);
2320 : :
2321 : : /* Need at least two relations for the join */
2322 [ + + ]: 42489 : if (numRels < 2)
2323 : 11853 : return toRemove;
2324 : :
2325 : : /*
2326 : : * In order to find relations with the same oid we first build an array of
2327 : : * candidates and then sort it by oid.
2328 : : */
2329 : 30636 : candidates = (SelfJoinCandidate *) palloc(sizeof(SelfJoinCandidate) *
2330 : : numRels);
2331 : 30636 : i = -1;
2332 : 30636 : j = 0;
2333 [ + + ]: 105790 : while ((i = bms_next_member(relids, i)) >= 0)
2334 : : {
2335 : 75154 : candidates[j].relid = i;
2336 : 75154 : candidates[j].reloid = root->simple_rte_array[i]->relid;
2337 : 75154 : j++;
2338 : : }
2339 : :
58 nathan@postgresql.or 2340 : 30636 : qsort(candidates, numRels, sizeof(SelfJoinCandidate),
2341 : : self_join_candidates_cmp);
2342 : :
2343 : : /*
2344 : : * Iteratively form a group of relation indexes with the same oid and
2345 : : * launch the routine that detects self-joins in this group and removes
2346 : : * excessive range table entries.
2347 : : *
2348 : : * At the end of the iteration, exclude the group from the overall relids
2349 : : * list. So each next iteration of the cycle will involve less and less
2350 : : * value of relids.
2351 : : */
172 akorotkov@postgresql 2352 : 30636 : i = 0;
2353 [ + + ]: 105790 : for (j = 1; j < numRels + 1; j++)
2354 : : {
2355 [ + + + + ]: 75154 : if (j == numRels || candidates[j].reloid != candidates[i].reloid)
2356 : : {
2357 [ + + ]: 67295 : if (j - i >= 2)
2358 : : {
2359 : : /* Create a group of relation indexes with the same oid */
2360 : 6253 : Relids group = NULL;
2361 : : Relids removed;
2362 : :
2363 [ + + ]: 20365 : while (i < j)
2364 : : {
2365 : 14112 : group = bms_add_member(group, candidates[i].relid);
2366 : 14112 : i++;
2367 : : }
2368 : :
2369 : 6253 : relids = bms_del_members(relids, group);
2370 : :
2371 : : /*
2372 : : * Try to remove self-joins from a group of identical entries.
2373 : : * Make the next attempt iteratively - if something is deleted
2374 : : * from a group, changes in clauses and equivalence classes
2375 : : * can give us a chance to find more candidates.
2376 : : */
2377 : : do
2378 : : {
2379 [ - + ]: 6283 : Assert(!bms_overlap(group, toRemove));
2380 : 6283 : removed = remove_self_joins_one_group(root, group);
2381 : 6283 : toRemove = bms_add_members(toRemove, removed);
2382 : 6283 : group = bms_del_members(group, removed);
2383 [ + + + + ]: 6584 : } while (!bms_is_empty(removed) &&
2384 : 301 : bms_membership(group) == BMS_MULTIPLE);
2385 : 6253 : bms_free(removed);
2386 : 6253 : bms_free(group);
2387 : : }
2388 : : else
2389 : : {
2390 : : /* Single relation, just remove it from the set */
2391 : 61042 : relids = bms_del_member(relids, candidates[i].relid);
2392 : 61042 : i = j;
2393 : : }
2394 : : }
2395 : : }
2396 : :
2397 [ - + ]: 30636 : Assert(bms_is_empty(relids));
2398 : :
2399 : 30636 : return toRemove;
2400 : : }
2401 : :
2402 : : /*
2403 : : * Compare self-join candidates by their oids.
2404 : : */
2405 : : static int
2406 : 56185 : self_join_candidates_cmp(const void *a, const void *b)
2407 : : {
2408 : 56185 : const SelfJoinCandidate *ca = (const SelfJoinCandidate *) a;
2409 : 56185 : const SelfJoinCandidate *cb = (const SelfJoinCandidate *) b;
2410 : :
2411 [ + + ]: 56185 : if (ca->reloid != cb->reloid)
2412 [ + + ]: 48296 : return (ca->reloid < cb->reloid ? -1 : 1);
2413 : : else
2414 : 7889 : return 0;
2415 : : }
2416 : :
2417 : : /*
2418 : : * Find and remove useless self joins.
2419 : : *
2420 : : * Search for joins where a relation is joined to itself. If the join clause
2421 : : * for each tuple from one side of the join is proven to match the same
2422 : : * physical row (or nothing) on the other side, that self-join can be
2423 : : * eliminated from the query. Suitable join clauses are assumed to be in the
2424 : : * form of X = X, and can be replaced with NOT NULL clauses.
2425 : : *
2426 : : * For the sake of simplicity, we don't apply this optimization to special
2427 : : * joins. Here is a list of what we could do in some particular cases:
2428 : : * 'a a1 semi join a a2': is reduced to inner by reduce_unique_semijoins,
2429 : : * and then removed normally.
2430 : : * 'a a1 anti join a a2': could simplify to a scan with 'outer quals AND
2431 : : * (IS NULL on join columns OR NOT inner quals)'.
2432 : : * 'a a1 left join a a2': could simplify to a scan like inner but without
2433 : : * NOT NULL conditions on join columns.
2434 : : * 'a a1 left join (a a2 join b)': can't simplify this, because join to b
2435 : : * can both remove rows and introduce duplicates.
2436 : : *
2437 : : * To search for removable joins, we order all the relations on their Oid,
2438 : : * go over each set with the same Oid, and consider each pair of relations
2439 : : * in this set.
2440 : : *
2441 : : * To remove the join, we mark one of the participating relations as dead
2442 : : * and rewrite all references to it to point to the remaining relation.
2443 : : * This includes modifying RestrictInfos, EquivalenceClasses, and
2444 : : * EquivalenceMembers. We also have to modify the row marks. The join clauses
2445 : : * of the removed relation become either restriction or join clauses, based on
2446 : : * whether they reference any relations not participating in the removed join.
2447 : : *
2448 : : * 'targetlist' is the top-level targetlist of the query. If it has any
2449 : : * references to the removed relations, we update them to point to the
2450 : : * remaining ones.
2451 : : */
2452 : : List *
2453 : 139537 : remove_useless_self_joins(PlannerInfo *root, List *joinlist)
2454 : : {
2455 : 139537 : Relids toRemove = NULL;
2456 : 139537 : int relid = -1;
2457 : :
2458 [ + - + - : 279074 : if (!enable_self_join_removal || joinlist == NIL ||
+ + ]
2459 [ + + ]: 238654 : (list_length(joinlist) == 1 && !IsA(linitial(joinlist), List)))
2460 : 98689 : return joinlist;
2461 : :
2462 : : /*
2463 : : * Merge pairs of relations participated in self-join. Remove unnecessary
2464 : : * range table entries.
2465 : : */
2466 : 40848 : toRemove = remove_self_joins_recurse(root, joinlist, toRemove);
2467 : :
2468 [ + + ]: 40848 : if (unlikely(toRemove != NULL))
2469 : : {
2470 : 295 : int nremoved = 0;
2471 : :
2472 : : /* At the end, remove orphaned relation links */
2473 [ + + ]: 608 : while ((relid = bms_next_member(toRemove, relid)) >= 0)
2474 : 313 : joinlist = remove_rel_from_joinlist(joinlist, relid, &nremoved);
2475 : : }
2476 : :
2477 : 40848 : return joinlist;
2478 : : }
|