Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * joinpath.c
4 : : * Routines to find all possible paths for processing a set of joins
5 : : *
6 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/optimizer/path/joinpath.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : #include "postgres.h"
16 : :
17 : : #include <math.h>
18 : :
19 : : #include "executor/executor.h"
20 : : #include "foreign/fdwapi.h"
21 : : #include "nodes/nodeFuncs.h"
22 : : #include "optimizer/cost.h"
23 : : #include "optimizer/optimizer.h"
24 : : #include "optimizer/pathnode.h"
25 : : #include "optimizer/paths.h"
26 : : #include "optimizer/planmain.h"
27 : : #include "utils/typcache.h"
28 : :
29 : : /* Hook for plugins to get control in add_paths_to_joinrel() */
30 : : set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
31 : :
32 : : /*
33 : : * Paths parameterized by a parent rel can be considered to be parameterized
34 : : * by any of its children, when we are performing partitionwise joins. These
35 : : * macros simplify checking for such cases. Beware multiple eval of args.
36 : : */
37 : : #define PATH_PARAM_BY_PARENT(path, rel) \
38 : : ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \
39 : : (rel)->top_parent_relids))
40 : : #define PATH_PARAM_BY_REL_SELF(path, rel) \
41 : : ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
42 : :
43 : : #define PATH_PARAM_BY_REL(path, rel) \
44 : : (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel))
45 : :
46 : : static void try_partial_mergejoin_path(PlannerInfo *root,
47 : : RelOptInfo *joinrel,
48 : : Path *outer_path,
49 : : Path *inner_path,
50 : : List *pathkeys,
51 : : List *mergeclauses,
52 : : List *outersortkeys,
53 : : List *innersortkeys,
54 : : JoinType jointype,
55 : : JoinPathExtraData *extra);
56 : : static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
57 : : RelOptInfo *outerrel, RelOptInfo *innerrel,
58 : : JoinType jointype, JoinPathExtraData *extra);
59 : : static inline bool clause_sides_match_join(RestrictInfo *rinfo,
60 : : RelOptInfo *outerrel,
61 : : RelOptInfo *innerrel);
62 : : static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
63 : : RelOptInfo *outerrel, RelOptInfo *innerrel,
64 : : JoinType jointype, JoinPathExtraData *extra);
65 : : static void consider_parallel_nestloop(PlannerInfo *root,
66 : : RelOptInfo *joinrel,
67 : : RelOptInfo *outerrel,
68 : : RelOptInfo *innerrel,
69 : : JoinType jointype,
70 : : JoinPathExtraData *extra);
71 : : static void consider_parallel_mergejoin(PlannerInfo *root,
72 : : RelOptInfo *joinrel,
73 : : RelOptInfo *outerrel,
74 : : RelOptInfo *innerrel,
75 : : JoinType jointype,
76 : : JoinPathExtraData *extra,
77 : : Path *inner_cheapest_total);
78 : : static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
79 : : RelOptInfo *outerrel, RelOptInfo *innerrel,
80 : : JoinType jointype, JoinPathExtraData *extra);
81 : : static List *select_mergejoin_clauses(PlannerInfo *root,
82 : : RelOptInfo *joinrel,
83 : : RelOptInfo *outerrel,
84 : : RelOptInfo *innerrel,
85 : : List *restrictlist,
86 : : JoinType jointype,
87 : : bool *mergejoin_allowed);
88 : : static void generate_mergejoin_paths(PlannerInfo *root,
89 : : RelOptInfo *joinrel,
90 : : RelOptInfo *innerrel,
91 : : Path *outerpath,
92 : : JoinType jointype,
93 : : JoinPathExtraData *extra,
94 : : bool useallclauses,
95 : : Path *inner_cheapest_total,
96 : : List *merge_pathkeys,
97 : : bool is_partial);
98 : :
99 : :
100 : : /*
101 : : * add_paths_to_joinrel
102 : : * Given a join relation and two component rels from which it can be made,
103 : : * consider all possible paths that use the two component rels as outer
104 : : * and inner rel respectively. Add these paths to the join rel's pathlist
105 : : * if they survive comparison with other paths (and remove any existing
106 : : * paths that are dominated by these paths).
107 : : *
108 : : * Modifies the pathlist field of the joinrel node to contain the best
109 : : * paths found so far.
110 : : *
111 : : * jointype is not necessarily the same as sjinfo->jointype; it might be
112 : : * "flipped around" if we are considering joining the rels in the opposite
113 : : * direction from what's indicated in sjinfo.
114 : : *
115 : : * Also, this routine and others in this module accept the special JoinTypes
116 : : * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
117 : : * unique-ify the outer or inner relation and then apply a regular inner
118 : : * join. These values are not allowed to propagate outside this module,
119 : : * however. Path cost estimation code may need to recognize that it's
120 : : * dealing with such a case --- the combination of nominal jointype INNER
121 : : * with sjinfo->jointype == JOIN_SEMI indicates that.
122 : : */
123 : : void
6888 tgl@sss.pgh.pa.us 124 :CBC 279894 : add_paths_to_joinrel(PlannerInfo *root,
125 : : RelOptInfo *joinrel,
126 : : RelOptInfo *outerrel,
127 : : RelOptInfo *innerrel,
128 : : JoinType jointype,
129 : : SpecialJoinInfo *sjinfo,
130 : : List *restrictlist)
131 : : {
132 : : JoinPathExtraData extra;
4853 133 : 279894 : bool mergejoin_allowed = true;
134 : : ListCell *lc;
135 : : Relids joinrelids;
136 : :
137 : : /*
138 : : * PlannerInfo doesn't contain the SpecialJoinInfos created for joins
139 : : * between child relations, even if there is a SpecialJoinInfo node for
140 : : * the join between the topmost parents. So, while calculating Relids set
141 : : * representing the restriction, consider relids of topmost parent of
142 : : * partitions.
143 : : */
2382 rhaas@postgresql.org 144 [ + + ]: 279894 : if (joinrel->reloptkind == RELOPT_OTHER_JOINREL)
145 : 5294 : joinrelids = joinrel->top_parent_relids;
146 : : else
147 : 274600 : joinrelids = joinrel->relids;
148 : :
3262 tgl@sss.pgh.pa.us 149 : 279894 : extra.restrictlist = restrictlist;
150 : 279894 : extra.mergeclause_list = NIL;
151 : 279894 : extra.sjinfo = sjinfo;
152 : 279894 : extra.param_source_rels = NULL;
153 : :
154 : : /*
155 : : * See if the inner relation is provably unique for this outer rel.
156 : : *
157 : : * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
158 : : * matter since the executor can make the equivalent optimization anyway;
159 : : * we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, we
160 : : * must be considering a semijoin whose inner side is not provably unique
161 : : * (else reduce_unique_semijoins would've simplified it), so there's no
162 : : * point in calling innerrel_is_unique. However, if the LHS covers all of
163 : : * the semijoin's min_lefthand, then it's appropriate to set inner_unique
164 : : * because the path produced by create_unique_path will be unique relative
165 : : * to the LHS. (If we have an LHS that's only part of the min_lefthand,
166 : : * that is *not* true.) For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
167 : : * letting that value escape this module.
168 : : */
2564 169 [ + + + + ]: 279894 : switch (jointype)
170 : : {
171 : 4414 : case JOIN_SEMI:
172 : : case JOIN_ANTI:
173 : :
174 : : /*
175 : : * XXX it may be worth proving this to allow a Memoize to be
176 : : * considered for Nested Loop Semi/Anti Joins.
177 : : */
178 : 4414 : extra.inner_unique = false; /* well, unproven */
179 : 4414 : break;
180 : 2247 : case JOIN_UNIQUE_INNER:
2540 181 : 4494 : extra.inner_unique = bms_is_subset(sjinfo->min_lefthand,
182 : 2247 : outerrel->relids);
2564 183 : 2247 : break;
184 : 2247 : case JOIN_UNIQUE_OUTER:
2540 185 : 2247 : extra.inner_unique = innerrel_is_unique(root,
186 : : joinrel->relids,
187 : : outerrel->relids,
188 : : innerrel,
189 : : JOIN_INNER,
190 : : restrictlist,
191 : : false);
2564 192 : 2247 : break;
193 : 270986 : default:
2540 194 : 270986 : extra.inner_unique = innerrel_is_unique(root,
195 : : joinrel->relids,
196 : : outerrel->relids,
197 : : innerrel,
198 : : jointype,
199 : : restrictlist,
200 : : false);
2564 201 : 270986 : break;
202 : : }
203 : :
204 : : /*
205 : : * Find potential mergejoin clauses. We can skip this if we are not
206 : : * interested in doing a mergejoin. However, mergejoin may be our only
207 : : * way of implementing a full outer join, so override enable_mergejoin if
208 : : * it's a full join.
209 : : */
8543 210 [ + + - + ]: 279894 : if (enable_mergejoin || jointype == JOIN_FULL)
3262 211 : 278183 : extra.mergeclause_list = select_mergejoin_clauses(root,
212 : : joinrel,
213 : : outerrel,
214 : : innerrel,
215 : : restrictlist,
216 : : jointype,
217 : : &mergejoin_allowed);
218 : :
219 : : /*
220 : : * If it's SEMI, ANTI, or inner_unique join, compute correction factors
221 : : * for cost estimation. These will be the same for all paths.
222 : : */
2564 223 [ + + + + : 279894 : if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique)
+ + ]
2186 224 : 89511 : compute_semi_anti_join_factors(root, joinrel, outerrel, innerrel,
225 : : jointype, sjinfo, restrictlist,
226 : : &extra.semifactors);
227 : :
228 : : /*
229 : : * Decide whether it's sensible to generate parameterized paths for this
230 : : * joinrel, and if so, which relations such paths should require. There
231 : : * is usually no need to create a parameterized result path unless there
232 : : * is a join order restriction that prevents joining one of our input rels
233 : : * directly to the parameter source rel instead of joining to the other
234 : : * input rel. (But see allow_star_schema_join().) This restriction
235 : : * reduces the number of parameterized paths we have to deal with at
236 : : * higher join levels, without compromising the quality of the resulting
237 : : * plan. We express the restriction as a Relids set that must overlap the
238 : : * parameterization of any proposed join path. Note: param_source_rels
239 : : * should contain only baserels, not OJ relids, so starting from
240 : : * all_baserels not all_query_rels is correct.
241 : : */
4461 242 [ + + + + : 612006 : foreach(lc, root->join_info_list)
+ + ]
243 : : {
2699 244 : 332112 : SpecialJoinInfo *sjinfo2 = (SpecialJoinInfo *) lfirst(lc);
245 : :
246 : : /*
247 : : * SJ is relevant to this join if we have some part of its RHS
248 : : * (possibly not all of it), and haven't yet joined to its LHS. (This
249 : : * test is pretty simplistic, but should be sufficient considering the
250 : : * join has already been proven legal.) If the SJ is relevant, it
251 : : * presents constraints for joining to anything not in its RHS.
252 : : */
2382 rhaas@postgresql.org 253 [ + + ]: 332112 : if (bms_overlap(joinrelids, sjinfo2->min_righthand) &&
254 [ + + ]: 218129 : !bms_overlap(joinrelids, sjinfo2->min_lefthand))
3262 tgl@sss.pgh.pa.us 255 : 10200 : extra.param_source_rels = bms_join(extra.param_source_rels,
2489 256 : 10200 : bms_difference(root->all_baserels,
257 : 10200 : sjinfo2->min_righthand));
258 : :
259 : : /* full joins constrain both sides symmetrically */
2699 260 [ + + + + ]: 334528 : if (sjinfo2->jointype == JOIN_FULL &&
2382 rhaas@postgresql.org 261 : 2416 : bms_overlap(joinrelids, sjinfo2->min_lefthand) &&
262 [ + + ]: 2388 : !bms_overlap(joinrelids, sjinfo2->min_righthand))
3262 tgl@sss.pgh.pa.us 263 : 336 : extra.param_source_rels = bms_join(extra.param_source_rels,
2489 264 : 336 : bms_difference(root->all_baserels,
265 : 336 : sjinfo2->min_lefthand));
266 : : }
267 : :
268 : : /*
269 : : * However, when a LATERAL subquery is involved, there will simply not be
270 : : * any paths for the joinrel that aren't parameterized by whatever the
271 : : * subquery is parameterized by, unless its parameterization is resolved
272 : : * within the joinrel. So we might as well allow additional dependencies
273 : : * on whatever residual lateral dependencies the joinrel will have.
274 : : */
3051 275 : 559788 : extra.param_source_rels = bms_add_members(extra.param_source_rels,
276 : 279894 : joinrel->lateral_relids);
277 : :
278 : : /*
279 : : * 1. Consider mergejoin paths where both relations must be explicitly
280 : : * sorted. Skip this if we can't mergejoin.
281 : : */
4853 282 [ + + ]: 279894 : if (mergejoin_allowed)
4854 283 : 275674 : sort_inner_and_outer(root, joinrel, outerrel, innerrel,
284 : : jointype, &extra);
285 : :
286 : : /*
287 : : * 2. Consider paths where the outer relation need not be explicitly
288 : : * sorted. This includes both nestloops and mergejoins where the outer
289 : : * path is already ordered. Again, skip this if we can't mergejoin.
290 : : * (That's okay because we know that nestloop can't handle
291 : : * right/right-anti/full joins at all, so it wouldn't work in the
292 : : * prohibited cases either.)
293 : : */
4853 294 [ + + ]: 279894 : if (mergejoin_allowed)
4854 295 : 275674 : match_unsorted_outer(root, joinrel, outerrel, innerrel,
296 : : jointype, &extra);
297 : :
298 : : #ifdef NOT_USED
299 : :
300 : : /*
301 : : * 3. Consider paths where the inner relation need not be explicitly
302 : : * sorted. This includes mergejoins only (nestloops were already built in
303 : : * match_unsorted_outer).
304 : : *
305 : : * Diked out as redundant 2/13/2000 -- tgl. There isn't any really
306 : : * significant difference between the inner and outer side of a mergejoin,
307 : : * so match_unsorted_inner creates no paths that aren't equivalent to
308 : : * those made by match_unsorted_outer when add_paths_to_joinrel() is
309 : : * invoked with the two rels given in the other order.
310 : : */
311 : : if (mergejoin_allowed)
312 : : match_unsorted_inner(root, joinrel, outerrel, innerrel,
313 : : jointype, &extra);
314 : : #endif
315 : :
316 : : /*
317 : : * 4. Consider paths where both outer and inner relations must be hashed
318 : : * before being joined. As above, disregard enable_hashjoin for full
319 : : * joins, because there may be no other alternative.
320 : : */
321 [ + + + + ]: 279894 : if (enable_hashjoin || jointype == JOIN_FULL)
8825 322 : 277462 : hash_inner_and_outer(root, joinrel, outerrel, innerrel,
323 : : jointype, &extra);
324 : :
325 : : /*
326 : : * 5. If inner and outer relations are foreign tables (or joins) belonging
327 : : * to the same server and assigned to the same user to check access
328 : : * permissions as, give the FDW a chance to push down joins.
329 : : */
3271 rhaas@postgresql.org 330 [ + + ]: 279894 : if (joinrel->fdwroutine &&
243 efujita@postgresql.o 331 [ + + ]:GNC 1182 : joinrel->fdwroutine->GetForeignJoinPaths)
3271 rhaas@postgresql.org 332 :CBC 1180 : joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
333 : : outerrel, innerrel,
334 : : jointype, &extra);
335 : :
336 : : /*
337 : : * 6. Finally, give extensions a chance to manipulate the path list. They
338 : : * could add new paths (such as CustomPaths) by calling add_path(), or
339 : : * add_partial_path() if parallel aware. They could also delete or modify
340 : : * paths added by the core code.
341 : : */
243 efujita@postgresql.o 342 [ - + ]:GNC 279893 : if (set_join_pathlist_hook)
3271 rhaas@postgresql.org 343 :UBC 0 : set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
344 : : jointype, &extra);
4461 tgl@sss.pgh.pa.us 345 :CBC 279893 : }
346 : :
347 : : /*
348 : : * We override the param_source_rels heuristic to accept nestloop paths in
349 : : * which the outer rel satisfies some but not all of the inner path's
350 : : * parameterization. This is necessary to get good plans for star-schema
351 : : * scenarios, in which a parameterized path for a large table may require
352 : : * parameters from multiple small tables that will not get joined directly to
353 : : * each other. We can handle that by stacking nestloops that have the small
354 : : * tables on the outside; but this breaks the rule the param_source_rels
355 : : * heuristic is based on, namely that parameters should not be passed down
356 : : * across joins unless there's a join-order-constraint-based reason to do so.
357 : : * So we ignore the param_source_rels restriction when this case applies.
358 : : *
359 : : * allow_star_schema_join() returns true if the param_source_rels restriction
360 : : * should be overridden, ie, it's okay to perform this join.
361 : : */
362 : : static inline bool
3176 363 : 104812 : allow_star_schema_join(PlannerInfo *root,
364 : : Relids outerrelids,
365 : : Relids inner_paramrels)
366 : : {
367 : : /*
368 : : * It's a star-schema case if the outer rel provides some but not all of
369 : : * the inner rel's parameterization.
370 : : */
2434 rhaas@postgresql.org 371 [ + + + + ]: 121487 : return (bms_overlap(inner_paramrels, outerrelids) &&
372 : 16675 : bms_nonempty_difference(inner_paramrels, outerrelids));
373 : : }
374 : :
375 : : /*
376 : : * If the parameterization is only partly satisfied by the outer rel,
377 : : * the unsatisfied part can't include any outer-join relids that could
378 : : * null rels of the satisfied part. That would imply that we're trying
379 : : * to use a clause involving a Var with nonempty varnullingrels at
380 : : * a join level where that value isn't yet computable.
381 : : *
382 : : * In practice, this test never finds a problem because earlier join order
383 : : * restrictions prevent us from attempting a join that would cause a problem.
384 : : * (That's unsurprising, because the code worked before we ever added
385 : : * outer-join relids to expression relids.) It still seems worth checking
386 : : * as a backstop, but we only do so in assert-enabled builds.
387 : : */
388 : : #ifdef USE_ASSERT_CHECKING
389 : : static inline bool
440 tgl@sss.pgh.pa.us 390 : 1117195 : have_unsafe_outer_join_ref(PlannerInfo *root,
391 : : Relids outerrelids,
392 : : Relids inner_paramrels)
393 : : {
394 : 1117195 : bool result = false;
395 : 1117195 : Relids unsatisfied = bms_difference(inner_paramrels, outerrelids);
426 396 : 1117195 : Relids satisfied = bms_intersect(inner_paramrels, outerrelids);
397 : :
398 [ + + ]: 1117195 : if (bms_overlap(unsatisfied, root->outer_join_rels))
399 : : {
400 : : ListCell *lc;
401 : :
440 402 [ + - + + : 90 : foreach(lc, root->join_info_list)
+ + ]
403 : : {
404 : 60 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
405 : :
406 [ + + ]: 60 : if (!bms_is_member(sjinfo->ojrelid, unsatisfied))
407 : 12 : continue; /* not relevant */
426 408 [ + - ]: 48 : if (bms_overlap(satisfied, sjinfo->min_righthand) ||
440 409 [ - + - - ]: 48 : (sjinfo->jointype == JOIN_FULL &&
426 tgl@sss.pgh.pa.us 410 :UBC 0 : bms_overlap(satisfied, sjinfo->min_lefthand)))
411 : : {
440 412 : 0 : result = true; /* doesn't work */
413 : 0 : break;
414 : : }
415 : : }
416 : : }
417 : :
418 : : /* Waste no memory when we reject a path here */
440 tgl@sss.pgh.pa.us 419 :CBC 1117195 : bms_free(unsatisfied);
426 420 : 1117195 : bms_free(satisfied);
421 : :
440 422 : 1117195 : return result;
423 : : }
424 : : #endif /* USE_ASSERT_CHECKING */
425 : :
426 : : /*
427 : : * paraminfo_get_equal_hashops
428 : : * Determine if the clauses in param_info and innerrel's lateral_vars
429 : : * can be hashed.
430 : : * Returns true if hashing is possible, otherwise false.
431 : : *
432 : : * Additionally, on success we collect the outer expressions and the
433 : : * appropriate equality operators for each hashable parameter to innerrel.
434 : : * These are returned in parallel lists in *param_exprs and *operators.
435 : : * We also set *binary_mode to indicate whether strict binary matching is
436 : : * required.
437 : : */
438 : : static bool
1108 drowley@postgresql.o 439 : 161821 : paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
440 : : RelOptInfo *outerrel, RelOptInfo *innerrel,
441 : : List **param_exprs, List **operators,
442 : : bool *binary_mode)
443 : :
444 : : {
445 : : ListCell *lc;
446 : :
447 : 161821 : *param_exprs = NIL;
448 : 161821 : *operators = NIL;
872 449 : 161821 : *binary_mode = false;
450 : :
451 : : /* Add join clauses from param_info to the hash key */
1108 452 [ + - ]: 161821 : if (param_info != NULL)
453 : : {
454 : 161821 : List *clauses = param_info->ppi_clauses;
455 : :
456 [ + + + + : 293902 : foreach(lc, clauses)
+ + ]
457 : : {
458 : 173920 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
459 : : OpExpr *opexpr;
460 : : Node *expr;
461 : : Oid hasheqoperator;
462 : :
888 463 : 173920 : opexpr = (OpExpr *) rinfo->clause;
464 : :
465 : : /*
466 : : * Bail if the rinfo is not compatible. We need a join OpExpr
467 : : * with 2 args.
468 : : */
469 [ + + + - ]: 173920 : if (!IsA(opexpr, OpExpr) || list_length(opexpr->args) != 2 ||
1108 470 [ + + ]: 162388 : !clause_sides_match_join(rinfo, outerrel, innerrel))
471 : : {
472 : 41767 : list_free(*operators);
473 : 41767 : list_free(*param_exprs);
474 : 41839 : return false;
475 : : }
476 : :
477 [ + + ]: 132153 : if (rinfo->outer_is_left)
478 : : {
479 : 78657 : expr = (Node *) linitial(opexpr->args);
888 480 : 78657 : hasheqoperator = rinfo->left_hasheqoperator;
481 : : }
482 : : else
483 : : {
1108 484 : 53496 : expr = (Node *) lsecond(opexpr->args);
888 485 : 53496 : hasheqoperator = rinfo->right_hasheqoperator;
486 : : }
487 : :
488 : : /* can't do memoize if we can't hash the outer type */
489 [ + + ]: 132153 : if (!OidIsValid(hasheqoperator))
490 : : {
491 : 72 : list_free(*operators);
492 : 72 : list_free(*param_exprs);
493 : 72 : return false;
494 : : }
495 : :
496 : : /*
497 : : * 'expr' may already exist as a parameter from a previous item in
498 : : * ppi_clauses. No need to include it again, however we'd better
499 : : * ensure we do switch into binary mode if required. See below.
500 : : */
79 drowley@postgresql.o 501 [ + + ]:GNC 132081 : if (!list_member(*param_exprs, expr))
502 : : {
503 : 132078 : *operators = lappend_oid(*operators, hasheqoperator);
504 : 132078 : *param_exprs = lappend(*param_exprs, expr);
505 : : }
506 : :
507 : : /*
508 : : * When the join operator is not hashable then it's possible that
509 : : * the operator will be able to distinguish something that the
510 : : * hash equality operator could not. For example with floating
511 : : * point types -0.0 and +0.0 are classed as equal by the hash
512 : : * function and equality function, but some other operator may be
513 : : * able to tell those values apart. This means that we must put
514 : : * memoize into binary comparison mode so that it does bit-by-bit
515 : : * comparisons rather than a "logical" comparison as it would
516 : : * using the hash equality operator.
517 : : */
872 drowley@postgresql.o 518 [ + + ]:CBC 132081 : if (!OidIsValid(rinfo->hashjoinoperator))
519 : 526 : *binary_mode = true;
520 : : }
521 : : }
522 : :
523 : : /* Now add any lateral vars to the cache key too */
1108 524 [ + + + + : 121776 : foreach(lc, innerrel->lateral_vars)
+ + ]
525 : : {
526 : 1869 : Node *expr = (Node *) lfirst(lc);
527 : : TypeCacheEntry *typentry;
528 : :
529 : : /* Reject if there are any volatile functions in lateral vars */
530 [ - + ]: 1869 : if (contain_volatile_functions(expr))
531 : : {
1108 drowley@postgresql.o 532 :UBC 0 : list_free(*operators);
533 : 0 : list_free(*param_exprs);
534 : 0 : return false;
535 : : }
536 : :
1108 drowley@postgresql.o 537 :CBC 1869 : typentry = lookup_type_cache(exprType(expr),
538 : : TYPECACHE_HASH_PROC | TYPECACHE_EQ_OPR);
539 : :
540 : : /* can't use memoize without a valid hash proc and equals operator */
541 [ + + - + ]: 1869 : if (!OidIsValid(typentry->hash_proc) || !OidIsValid(typentry->eq_opr))
542 : : {
543 : 75 : list_free(*operators);
544 : 75 : list_free(*param_exprs);
545 : 75 : return false;
546 : : }
547 : :
548 : : /*
549 : : * 'expr' may already exist as a parameter from the ppi_clauses. No
550 : : * need to include it again, however we'd better ensure we do switch
551 : : * into binary mode.
552 : : */
79 drowley@postgresql.o 553 [ + + ]:GNC 1794 : if (!list_member(*param_exprs, expr))
554 : : {
555 : 1593 : *operators = lappend_oid(*operators, typentry->eq_opr);
556 : 1593 : *param_exprs = lappend(*param_exprs, expr);
557 : : }
558 : :
559 : : /*
560 : : * We must go into binary mode as we don't have too much of an idea of
561 : : * how these lateral Vars are being used. See comment above when we
562 : : * set *binary_mode for the non-lateral Var case. This could be
563 : : * relaxed a bit if we had the RestrictInfos and knew the operators
564 : : * being used, however for cases like Vars that are arguments to
565 : : * functions we must operate in binary mode as we don't have
566 : : * visibility into what the function is doing with the Vars.
567 : : */
872 drowley@postgresql.o 568 :CBC 1794 : *binary_mode = true;
569 : : }
570 : :
571 : : /* We're okay to use memoize */
1108 572 : 119907 : return true;
573 : : }
574 : :
575 : : /*
576 : : * get_memoize_path
577 : : * If possible, make and return a Memoize path atop of 'inner_path'.
578 : : * Otherwise return NULL.
579 : : */
580 : : static Path *
1005 581 : 707533 : get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
582 : : RelOptInfo *outerrel, Path *inner_path,
583 : : Path *outer_path, JoinType jointype,
584 : : JoinPathExtraData *extra)
585 : : {
586 : : List *param_exprs;
587 : : List *hash_operators;
588 : : ListCell *lc;
589 : : bool binary_mode;
590 : :
591 : : /* Obviously not if it's disabled */
592 [ + + ]: 707533 : if (!enable_memoize)
1108 593 : 323 : return NULL;
594 : :
595 : : /*
596 : : * We can safely not bother with all this unless we expect to perform more
597 : : * than one inner scan. The first scan is always going to be a cache
598 : : * miss. This would likely fail later anyway based on costs, so this is
599 : : * really just to save some wasted effort.
600 : : */
601 [ + + ]: 707210 : if (outer_path->parent->rows < 2)
602 : 232121 : return NULL;
603 : :
604 : : /*
605 : : * We can only have a memoize node when there's some kind of cache key,
606 : : * either parameterized path clauses or lateral Vars. No cache key sounds
607 : : * more like something a Materialize node might be more useful for.
608 : : */
609 [ + + ]: 475089 : if ((inner_path->param_info == NULL ||
610 [ + + ]: 197339 : inner_path->param_info->ppi_clauses == NIL) &&
611 [ + + ]: 286023 : innerrel->lateral_vars == NIL)
612 : 284780 : return NULL;
613 : :
614 : : /*
615 : : * Currently we don't do this for SEMI and ANTI joins unless they're
616 : : * marked as inner_unique. This is because nested loop SEMI/ANTI joins
617 : : * don't scan the inner node to completion, which will mean memoize cannot
618 : : * mark the cache entry as complete.
619 : : *
620 : : * XXX Currently we don't attempt to mark SEMI/ANTI joins as inner_unique
621 : : * = true. Should we? See add_paths_to_joinrel()
622 : : */
623 [ + + + + : 190309 : if (!extra->inner_unique && (jointype == JOIN_SEMI ||
+ + ]
624 : : jointype == JOIN_ANTI))
625 : 4214 : return NULL;
626 : :
627 : : /*
628 : : * Memoize normally marks cache entries as complete when it runs out of
629 : : * tuples to read from its subplan. However, with unique joins, Nested
630 : : * Loop will skip to the next outer tuple after finding the first matching
631 : : * inner tuple. This means that we may not read the inner side of the
632 : : * join to completion which leaves no opportunity to mark the cache entry
633 : : * as complete. To work around that, when the join is unique we
634 : : * automatically mark cache entries as complete after fetching the first
635 : : * tuple. This works when the entire join condition is parameterized.
636 : : * Otherwise, when the parameterization is only a subset of the join
637 : : * condition, we can't be sure which part of it causes the join to be
638 : : * unique. This means there are no guarantees that only 1 tuple will be
639 : : * read. We cannot mark the cache entry as complete after reading the
640 : : * first tuple without that guarantee. This means the scope of Memoize
641 : : * node's usefulness is limited to only outer rows that have no join
642 : : * partner as this is the only case where Nested Loop would exhaust the
643 : : * inner scan of a unique join. Since the scope is limited to that, we
644 : : * just don't bother making a memoize path in this case.
645 : : *
646 : : * Lateral vars needn't be considered here as they're not considered when
647 : : * determining if the join is unique.
648 : : *
649 : : * XXX this could be enabled if the remaining join quals were made part of
650 : : * the inner scan's filter instead of the join filter. Maybe it's worth
651 : : * considering doing that?
652 : : */
1058 653 [ + + ]: 186095 : if (extra->inner_unique &&
1056 654 [ + - + + ]: 222544 : (inner_path->param_info == NULL ||
83 655 : 111272 : bms_num_members(inner_path->param_info->ppi_serials) <
1056 656 : 111272 : list_length(extra->restrictlist)))
1058 657 : 24267 : return NULL;
658 : :
659 : : /*
660 : : * We can't use a memoize node if there are volatile functions in the
661 : : * inner rel's target list or restrict list. A cache hit could reduce the
662 : : * number of calls to these functions.
663 : : */
1108 664 [ - + ]: 161828 : if (contain_volatile_functions((Node *) innerrel->reltarget))
1108 drowley@postgresql.o 665 :UBC 0 : return NULL;
666 : :
1108 drowley@postgresql.o 667 [ + + + + :CBC 265825 : foreach(lc, innerrel->baserestrictinfo)
+ + ]
668 : : {
669 : 104003 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
670 : :
671 [ + + ]: 104003 : if (contain_volatile_functions((Node *) rinfo))
1108 drowley@postgresql.o 672 :GBC 6 : return NULL;
673 : : }
674 : :
675 : : /*
676 : : * Also check the parameterized path restrictinfos for volatile functions.
677 : : * Indexed functions must be immutable so shouldn't have any volatile
678 : : * functions, however, with a lateral join the inner scan may not be an
679 : : * index scan.
680 : : */
251 drowley@postgresql.o 681 [ + - ]:CBC 161822 : if (inner_path->param_info != NULL)
682 : : {
683 [ + + + + : 349413 : foreach(lc, inner_path->param_info->ppi_clauses)
+ + ]
684 : : {
685 : 187592 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
686 : :
687 [ + + ]: 187592 : if (contain_volatile_functions((Node *) rinfo))
688 : 1 : return NULL;
689 : : }
690 : : }
691 : :
692 : : /* Check if we have hash ops for each parameter to the path */
1108 693 [ + + ]: 161821 : if (paraminfo_get_equal_hashops(root,
694 : : inner_path->param_info,
496 tgl@sss.pgh.pa.us 695 [ + + ]: 161821 : outerrel->top_parent ?
696 : : outerrel->top_parent : outerrel,
697 : : innerrel,
698 : : ¶m_exprs,
699 : : &hash_operators,
700 : : &binary_mode))
701 : : {
1005 drowley@postgresql.o 702 : 119907 : return (Path *) create_memoize_path(root,
703 : : innerrel,
704 : : inner_path,
705 : : param_exprs,
706 : : hash_operators,
707 : 119907 : extra->inner_unique,
708 : : binary_mode,
709 : : outer_path->rows);
710 : : }
711 : :
1108 712 : 41914 : return NULL;
713 : : }
714 : :
715 : : /*
716 : : * try_nestloop_path
717 : : * Consider a nestloop join path; if it appears useful, push it into
718 : : * the joinrel's pathlist via add_path().
719 : : */
720 : : static void
4461 tgl@sss.pgh.pa.us 721 : 1221901 : try_nestloop_path(PlannerInfo *root,
722 : : RelOptInfo *joinrel,
723 : : Path *outer_path,
724 : : Path *inner_path,
725 : : List *pathkeys,
726 : : JoinType jointype,
727 : : JoinPathExtraData *extra)
728 : : {
729 : : Relids required_outer;
730 : : JoinCostWorkspace workspace;
2434 rhaas@postgresql.org 731 : 1221901 : RelOptInfo *innerrel = inner_path->parent;
732 : 1221901 : RelOptInfo *outerrel = outer_path->parent;
733 : : Relids innerrelids;
734 : : Relids outerrelids;
735 [ + + ]: 1221901 : Relids inner_paramrels = PATH_REQ_OUTER(inner_path);
736 [ + + ]: 1221901 : Relids outer_paramrels = PATH_REQ_OUTER(outer_path);
737 : :
738 : : /*
739 : : * If we are forming an outer join at this join, it's nonsensical to use
740 : : * an input path that uses the outer join as part of its parameterization.
741 : : * (This can happen despite our join order restrictions, since those apply
742 : : * to what is in an input relation not what its parameters are.)
743 : : */
290 tgl@sss.pgh.pa.us 744 [ + + + - ]: 1535824 : if (extra->sjinfo->ojrelid != 0 &&
745 [ + + ]: 627846 : (bms_is_member(extra->sjinfo->ojrelid, inner_paramrels) ||
746 : 313923 : bms_is_member(extra->sjinfo->ojrelid, outer_paramrels)))
747 : 104706 : return;
748 : :
749 : : /*
750 : : * Any parameterization of the input paths refers to topmost parents of
751 : : * the relevant relations, because reparameterize_path_by_child() hasn't
752 : : * been called yet. So we must consider topmost parents of the relations
753 : : * being joined, too, while determining parameterization of the result and
754 : : * checking for disallowed parameterization cases.
755 : : */
2382 rhaas@postgresql.org 756 [ + + ]: 1221841 : if (innerrel->top_parent_relids)
757 : 16538 : innerrelids = innerrel->top_parent_relids;
758 : : else
759 : 1205303 : innerrelids = innerrel->relids;
760 : :
761 [ + + ]: 1221841 : if (outerrel->top_parent_relids)
762 : 16538 : outerrelids = outerrel->top_parent_relids;
763 : : else
764 : 1205303 : outerrelids = outerrel->relids;
765 : :
766 : : /*
767 : : * Check to see if proposed path is still parameterized, and reject if the
768 : : * parameterization wouldn't be sensible --- unless allow_star_schema_join
769 : : * says to allow it anyway. Also, we must reject if have_dangerous_phv
770 : : * doesn't like the look of it, which could only happen if the nestloop is
771 : : * still parameterized.
772 : : */
2434 773 : 1221841 : required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels,
774 : : innerrelids, inner_paramrels);
4461 tgl@sss.pgh.pa.us 775 [ + + ]: 1221841 : if (required_outer &&
3170 776 [ + + ]: 121180 : ((!bms_overlap(required_outer, extra->param_source_rels) &&
2434 rhaas@postgresql.org 777 [ + + + + ]: 121394 : !allow_star_schema_join(root, outerrelids, inner_paramrels)) ||
778 : 16582 : have_dangerous_phv(root, outerrelids, inner_paramrels)))
779 : : {
780 : : /* Waste no memory when we reject a path here */
3176 tgl@sss.pgh.pa.us 781 : 104646 : bms_free(required_outer);
782 : 104646 : return;
783 : : }
784 : :
785 : : /* If we got past that, we shouldn't have any unsafe outer-join refs */
426 786 [ - + ]: 1117195 : Assert(!have_unsafe_outer_join_ref(root, outerrelids, inner_paramrels));
787 : :
788 : : /*
789 : : * If the inner path is parameterized, it is parameterized by the topmost
790 : : * parent of the outer rel, not the outer rel itself. We will need to
791 : : * translate the parameterization, if this path is chosen, during
792 : : * create_plan(). Here we just check whether we will be able to perform
793 : : * the translation, and if not avoid creating a nestloop path.
794 : : */
26 tgl@sss.pgh.pa.us 795 [ + + + - :GNC 1117195 : if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) &&
+ + ]
796 [ - + ]: 5336 : !path_is_reparameterizable_by_child(inner_path, outer_path->parent))
797 : : {
26 tgl@sss.pgh.pa.us 798 :UNC 0 : bms_free(required_outer);
799 : 0 : return;
800 : : }
801 : :
802 : : /*
803 : : * Do a precheck to quickly eliminate obviously-inferior paths. We
804 : : * calculate a cheap lower bound on the path's cost and then use
805 : : * add_path_precheck() to see if the path is clearly going to be dominated
806 : : * by some existing path for the joinrel. If not, do the full pushup with
807 : : * creating a fully valid path structure and submitting it to add_path().
808 : : * The latter two steps are expensive enough to make this two-phase
809 : : * methodology worthwhile.
810 : : */
4461 tgl@sss.pgh.pa.us 811 :CBC 1117195 : initial_cost_nestloop(root, &workspace, jointype,
812 : : outer_path, inner_path, extra);
813 : :
814 [ + + ]: 1117195 : if (add_path_precheck(joinrel,
815 : : workspace.startup_cost, workspace.total_cost,
816 : : pathkeys, required_outer))
817 : : {
818 : 565025 : add_path(joinrel, (Path *)
819 : 565025 : create_nestloop_path(root,
820 : : joinrel,
821 : : jointype,
822 : : &workspace,
823 : : extra,
824 : : outer_path,
825 : : inner_path,
826 : : extra->restrictlist,
827 : : pathkeys,
828 : : required_outer));
829 : : }
830 : : else
831 : : {
832 : : /* Waste no memory when we reject a path here */
833 : 552170 : bms_free(required_outer);
834 : : }
835 : : }
836 : :
837 : : /*
838 : : * try_partial_nestloop_path
839 : : * Consider a partial nestloop join path; if it appears useful, push it into
840 : : * the joinrel's partial_pathlist via add_partial_path().
841 : : */
842 : : static void
3007 rhaas@postgresql.org 843 : 13119 : try_partial_nestloop_path(PlannerInfo *root,
844 : : RelOptInfo *joinrel,
845 : : Path *outer_path,
846 : : Path *inner_path,
847 : : List *pathkeys,
848 : : JoinType jointype,
849 : : JoinPathExtraData *extra)
850 : : {
851 : : JoinCostWorkspace workspace;
852 : :
853 : : /*
854 : : * If the inner path is parameterized, the parameterization must be fully
855 : : * satisfied by the proposed outer path. Parameterized partial paths are
856 : : * not supported. The caller should already have verified that no lateral
857 : : * rels are required here.
858 : : */
859 [ - + ]: 13119 : Assert(bms_is_empty(joinrel->lateral_relids));
860 [ + + ]: 13119 : if (inner_path->param_info != NULL)
861 : : {
862 : 6674 : Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
2382 863 : 6674 : RelOptInfo *outerrel = outer_path->parent;
864 : : Relids outerrelids;
865 : :
866 : : /*
867 : : * The inner and outer paths are parameterized, if at all, by the top
868 : : * level parents, not the child relations, so we must use those relids
869 : : * for our parameterization tests.
870 : : */
871 [ + + ]: 6674 : if (outerrel->top_parent_relids)
872 : 4986 : outerrelids = outerrel->top_parent_relids;
873 : : else
874 : 1688 : outerrelids = outerrel->relids;
875 : :
876 [ + + ]: 6674 : if (!bms_is_subset(inner_paramrels, outerrelids))
3007 877 : 9203 : return;
878 : : }
879 : :
880 : : /*
881 : : * If the inner path is parameterized, it is parameterized by the topmost
882 : : * parent of the outer rel, not the outer rel itself. We will need to
883 : : * translate the parameterization, if this path is chosen, during
884 : : * create_plan(). Here we just check whether we will be able to perform
885 : : * the translation, and if not avoid creating a nestloop path.
886 : : */
26 tgl@sss.pgh.pa.us 887 [ + + + - :GNC 12596 : if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) &&
+ + ]
888 [ - + ]: 4590 : !path_is_reparameterizable_by_child(inner_path, outer_path->parent))
26 tgl@sss.pgh.pa.us 889 :UNC 0 : return;
890 : :
891 : : /*
892 : : * Before creating a path, get a quick lower bound on what it is likely to
893 : : * cost. Bail out right away if it looks terrible.
894 : : */
3007 rhaas@postgresql.org 895 :CBC 12596 : initial_cost_nestloop(root, &workspace, jointype,
896 : : outer_path, inner_path, extra);
897 [ + + ]: 12596 : if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
898 : 8680 : return;
899 : :
900 : : /* Might be good enough to be worth trying, so let's try it. */
901 : 3916 : add_partial_path(joinrel, (Path *)
2930 tgl@sss.pgh.pa.us 902 : 3916 : create_nestloop_path(root,
903 : : joinrel,
904 : : jointype,
905 : : &workspace,
906 : : extra,
907 : : outer_path,
908 : : inner_path,
909 : : extra->restrictlist,
910 : : pathkeys,
911 : : NULL));
912 : : }
913 : :
914 : : /*
915 : : * try_mergejoin_path
916 : : * Consider a merge join path; if it appears useful, push it into
917 : : * the joinrel's pathlist via add_path().
918 : : */
919 : : static void
4461 920 : 513921 : try_mergejoin_path(PlannerInfo *root,
921 : : RelOptInfo *joinrel,
922 : : Path *outer_path,
923 : : Path *inner_path,
924 : : List *pathkeys,
925 : : List *mergeclauses,
926 : : List *outersortkeys,
927 : : List *innersortkeys,
928 : : JoinType jointype,
929 : : JoinPathExtraData *extra,
930 : : bool is_partial)
931 : : {
932 : : Relids required_outer;
933 : : JoinCostWorkspace workspace;
934 : :
2595 rhaas@postgresql.org 935 [ + + ]: 513921 : if (is_partial)
936 : : {
937 : 3015 : try_partial_mergejoin_path(root,
938 : : joinrel,
939 : : outer_path,
940 : : inner_path,
941 : : pathkeys,
942 : : mergeclauses,
943 : : outersortkeys,
944 : : innersortkeys,
945 : : jointype,
946 : : extra);
947 : 16179 : return;
948 : : }
949 : :
950 : : /*
951 : : * If we are forming an outer join at this join, it's nonsensical to use
952 : : * an input path that uses the outer join as part of its parameterization.
953 : : * (This can happen despite our join order restrictions, since those apply
954 : : * to what is in an input relation not what its parameters are.)
955 : : */
290 tgl@sss.pgh.pa.us 956 [ + + + - ]: 703913 : if (extra->sjinfo->ojrelid != 0 &&
957 [ + + + + ]: 386014 : (bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(inner_path)) ||
958 [ + + ]: 193007 : bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(outer_path))))
959 : 6 : return;
960 : :
961 : : /*
962 : : * Check to see if proposed path is still parameterized, and reject if the
963 : : * parameterization wouldn't be sensible.
964 : : */
4461 965 : 510900 : required_outer = calc_non_nestloop_required_outer(outer_path,
966 : : inner_path);
967 [ + + ]: 510900 : if (required_outer &&
3262 968 [ + + ]: 14394 : !bms_overlap(required_outer, extra->param_source_rels))
969 : : {
970 : : /* Waste no memory when we reject a path here */
4461 971 : 13158 : bms_free(required_outer);
972 : 13158 : return;
973 : : }
974 : :
975 : : /*
976 : : * If the given paths are already well enough ordered, we can skip doing
977 : : * an explicit sort.
978 : : */
979 [ + + + + ]: 739947 : if (outersortkeys &&
980 : 242205 : pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
981 : 6566 : outersortkeys = NIL;
982 [ + + + + ]: 898995 : if (innersortkeys &&
983 : 401253 : pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
984 : 10920 : innersortkeys = NIL;
985 : :
986 : : /*
987 : : * See comments in try_nestloop_path().
988 : : */
989 : 497742 : initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
990 : : outer_path, inner_path,
991 : : outersortkeys, innersortkeys,
992 : : extra);
993 : :
994 [ + + ]: 497742 : if (add_path_precheck(joinrel,
995 : : workspace.startup_cost, workspace.total_cost,
996 : : pathkeys, required_outer))
997 : : {
998 : 123675 : add_path(joinrel, (Path *)
999 : 123675 : create_mergejoin_path(root,
1000 : : joinrel,
1001 : : jointype,
1002 : : &workspace,
1003 : : extra,
1004 : : outer_path,
1005 : : inner_path,
1006 : : extra->restrictlist,
1007 : : pathkeys,
1008 : : required_outer,
1009 : : mergeclauses,
1010 : : outersortkeys,
1011 : : innersortkeys));
1012 : : }
1013 : : else
1014 : : {
1015 : : /* Waste no memory when we reject a path here */
1016 : 374067 : bms_free(required_outer);
1017 : : }
1018 : : }
1019 : :
1020 : : /*
1021 : : * try_partial_mergejoin_path
1022 : : * Consider a partial merge join path; if it appears useful, push it into
1023 : : * the joinrel's pathlist via add_partial_path().
1024 : : */
1025 : : static void
2595 rhaas@postgresql.org 1026 : 9090 : try_partial_mergejoin_path(PlannerInfo *root,
1027 : : RelOptInfo *joinrel,
1028 : : Path *outer_path,
1029 : : Path *inner_path,
1030 : : List *pathkeys,
1031 : : List *mergeclauses,
1032 : : List *outersortkeys,
1033 : : List *innersortkeys,
1034 : : JoinType jointype,
1035 : : JoinPathExtraData *extra)
1036 : : {
1037 : : JoinCostWorkspace workspace;
1038 : :
1039 : : /*
1040 : : * See comments in try_partial_hashjoin_path().
1041 : : */
1042 [ - + ]: 9090 : Assert(bms_is_empty(joinrel->lateral_relids));
1043 [ - + ]: 9090 : if (inner_path->param_info != NULL)
1044 : : {
2595 rhaas@postgresql.org 1045 :UBC 0 : Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
1046 : :
1047 [ # # ]: 0 : if (!bms_is_empty(inner_paramrels))
1048 : 0 : return;
1049 : : }
1050 : :
1051 : : /*
1052 : : * If the given paths are already well enough ordered, we can skip doing
1053 : : * an explicit sort.
1054 : : */
2595 rhaas@postgresql.org 1055 [ + + + + ]:CBC 15165 : if (outersortkeys &&
1056 : 6075 : pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
1057 : 54 : outersortkeys = NIL;
1058 [ + + + + ]: 16956 : if (innersortkeys &&
1059 : 7866 : pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
1060 : 126 : innersortkeys = NIL;
1061 : :
1062 : : /*
1063 : : * See comments in try_partial_nestloop_path().
1064 : : */
1065 : 9090 : initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
1066 : : outer_path, inner_path,
1067 : : outersortkeys, innersortkeys,
1068 : : extra);
1069 : :
1070 [ + + ]: 9090 : if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
1071 : 4537 : return;
1072 : :
1073 : : /* Might be good enough to be worth trying, so let's try it. */
1074 : 4553 : add_partial_path(joinrel, (Path *)
1075 : 4553 : create_mergejoin_path(root,
1076 : : joinrel,
1077 : : jointype,
1078 : : &workspace,
1079 : : extra,
1080 : : outer_path,
1081 : : inner_path,
1082 : : extra->restrictlist,
1083 : : pathkeys,
1084 : : NULL,
1085 : : mergeclauses,
1086 : : outersortkeys,
1087 : : innersortkeys));
1088 : : }
1089 : :
1090 : : /*
1091 : : * try_hashjoin_path
1092 : : * Consider a hash join path; if it appears useful, push it into
1093 : : * the joinrel's pathlist via add_path().
1094 : : */
1095 : : static void
4461 tgl@sss.pgh.pa.us 1096 : 300127 : try_hashjoin_path(PlannerInfo *root,
1097 : : RelOptInfo *joinrel,
1098 : : Path *outer_path,
1099 : : Path *inner_path,
1100 : : List *hashclauses,
1101 : : JoinType jointype,
1102 : : JoinPathExtraData *extra)
1103 : : {
1104 : : Relids required_outer;
1105 : : JoinCostWorkspace workspace;
1106 : :
1107 : : /*
1108 : : * If we are forming an outer join at this join, it's nonsensical to use
1109 : : * an input path that uses the outer join as part of its parameterization.
1110 : : * (This can happen despite our join order restrictions, since those apply
1111 : : * to what is in an input relation not what its parameters are.)
1112 : : */
290 1113 [ + + + + ]: 416637 : if (extra->sjinfo->ojrelid != 0 &&
1114 [ + + + + ]: 233005 : (bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(inner_path)) ||
1115 [ + + ]: 116495 : bms_is_member(extra->sjinfo->ojrelid, PATH_REQ_OUTER(outer_path))))
1116 : 40127 : return;
1117 : :
1118 : : /*
1119 : : * Check to see if proposed path is still parameterized, and reject if the
1120 : : * parameterization wouldn't be sensible.
1121 : : */
4461 1122 : 300097 : required_outer = calc_non_nestloop_required_outer(outer_path,
1123 : : inner_path);
1124 [ + + ]: 300097 : if (required_outer &&
3262 1125 [ + + ]: 46453 : !bms_overlap(required_outer, extra->param_source_rels))
1126 : : {
1127 : : /* Waste no memory when we reject a path here */
4461 1128 : 40097 : bms_free(required_outer);
1129 : 40097 : return;
1130 : : }
1131 : :
1132 : : /*
1133 : : * See comments in try_nestloop_path(). Also note that hashjoin paths
1134 : : * never have any output pathkeys, per comments in create_hashjoin_path.
1135 : : */
1136 : 260000 : initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1137 : : outer_path, inner_path, extra, false);
1138 : :
1139 [ + + ]: 260000 : if (add_path_precheck(joinrel,
1140 : : workspace.startup_cost, workspace.total_cost,
1141 : : NIL, required_outer))
1142 : : {
1143 : 106826 : add_path(joinrel, (Path *)
1144 : 106826 : create_hashjoin_path(root,
1145 : : joinrel,
1146 : : jointype,
1147 : : &workspace,
1148 : : extra,
1149 : : outer_path,
1150 : : inner_path,
1151 : : false, /* parallel_hash */
1152 : : extra->restrictlist,
1153 : : required_outer,
1154 : : hashclauses));
1155 : : }
1156 : : else
1157 : : {
1158 : : /* Waste no memory when we reject a path here */
1159 : 153174 : bms_free(required_outer);
1160 : : }
1161 : : }
1162 : :
1163 : : /*
1164 : : * try_partial_hashjoin_path
1165 : : * Consider a partial hashjoin join path; if it appears useful, push it into
1166 : : * the joinrel's partial_pathlist via add_partial_path().
1167 : : * The outer side is partial. If parallel_hash is true, then the inner path
1168 : : * must be partial and will be run in parallel to create one or more shared
1169 : : * hash tables; otherwise the inner path must be complete and a copy of it
1170 : : * is run in every process to create separate identical private hash tables.
1171 : : */
1172 : : static void
3007 rhaas@postgresql.org 1173 : 10053 : try_partial_hashjoin_path(PlannerInfo *root,
1174 : : RelOptInfo *joinrel,
1175 : : Path *outer_path,
1176 : : Path *inner_path,
1177 : : List *hashclauses,
1178 : : JoinType jointype,
1179 : : JoinPathExtraData *extra,
1180 : : bool parallel_hash)
1181 : : {
1182 : : JoinCostWorkspace workspace;
1183 : :
1184 : : /*
1185 : : * If the inner path is parameterized, the parameterization must be fully
1186 : : * satisfied by the proposed outer path. Parameterized partial paths are
1187 : : * not supported. The caller should already have verified that no lateral
1188 : : * rels are required here.
1189 : : */
1190 [ - + ]: 10053 : Assert(bms_is_empty(joinrel->lateral_relids));
1191 [ - + ]: 10053 : if (inner_path->param_info != NULL)
1192 : : {
3007 rhaas@postgresql.org 1193 :UBC 0 : Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
1194 : :
1195 [ # # ]: 0 : if (!bms_is_empty(inner_paramrels))
1196 : 0 : return;
1197 : : }
1198 : :
1199 : : /*
1200 : : * Before creating a path, get a quick lower bound on what it is likely to
1201 : : * cost. Bail out right away if it looks terrible.
1202 : : */
3007 rhaas@postgresql.org 1203 :CBC 10053 : initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1204 : : outer_path, inner_path, extra, parallel_hash);
1205 [ + + ]: 10053 : if (!add_partial_path_precheck(joinrel, workspace.total_cost, NIL))
1206 : 4767 : return;
1207 : :
1208 : : /* Might be good enough to be worth trying, so let's try it. */
1209 : 5286 : add_partial_path(joinrel, (Path *)
2930 tgl@sss.pgh.pa.us 1210 : 5286 : create_hashjoin_path(root,
1211 : : joinrel,
1212 : : jointype,
1213 : : &workspace,
1214 : : extra,
1215 : : outer_path,
1216 : : inner_path,
1217 : : parallel_hash,
1218 : : extra->restrictlist,
1219 : : NULL,
1220 : : hashclauses));
1221 : : }
1222 : :
1223 : : /*
1224 : : * clause_sides_match_join
1225 : : * Determine whether a join clause is of the right form to use in this join.
1226 : : *
1227 : : * We already know that the clause is a binary opclause referencing only the
1228 : : * rels in the current join. The point here is to check whether it has the
1229 : : * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
1230 : : * rather than mixing outer and inner vars on either side. If it matches,
1231 : : * we set the transient flag outer_is_left to identify which side is which.
1232 : : */
1233 : : static inline bool
5321 1234 : 656317 : clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
1235 : : RelOptInfo *innerrel)
1236 : : {
5322 1237 [ + + + + ]: 983147 : if (bms_is_subset(rinfo->left_relids, outerrel->relids) &&
1238 : 326830 : bms_is_subset(rinfo->right_relids, innerrel->relids))
1239 : : {
1240 : : /* lefthand side is outer */
1241 : 326656 : rinfo->outer_is_left = true;
1242 : 326656 : return true;
1243 : : }
1244 [ + + + + ]: 650254 : else if (bms_is_subset(rinfo->left_relids, innerrel->relids) &&
1245 : 320593 : bms_is_subset(rinfo->right_relids, outerrel->relids))
1246 : : {
1247 : : /* righthand side is outer */
1248 : 298970 : rinfo->outer_is_left = false;
1249 : 298970 : return true;
1250 : : }
1251 : 30691 : return false; /* no good for these input relations */
1252 : : }
1253 : :
1254 : : /*
1255 : : * sort_inner_and_outer
1256 : : * Create mergejoin join paths by explicitly sorting both the outer and
1257 : : * inner join relations on each available merge ordering.
1258 : : *
1259 : : * 'joinrel' is the join relation
1260 : : * 'outerrel' is the outer join relation
1261 : : * 'innerrel' is the inner join relation
1262 : : * 'jointype' is the type of join to do
1263 : : * 'extra' contains additional input values
1264 : : */
1265 : : static void
6888 1266 : 275674 : sort_inner_and_outer(PlannerInfo *root,
1267 : : RelOptInfo *joinrel,
1268 : : RelOptInfo *outerrel,
1269 : : RelOptInfo *innerrel,
1270 : : JoinType jointype,
1271 : : JoinPathExtraData *extra)
1272 : : {
2595 rhaas@postgresql.org 1273 : 275674 : JoinType save_jointype = jointype;
1274 : : Path *outer_path;
1275 : : Path *inner_path;
1276 : 275674 : Path *cheapest_partial_outer = NULL;
1277 : 275674 : Path *cheapest_safe_inner = NULL;
1278 : : List *all_pathkeys;
1279 : : ListCell *l;
1280 : :
1281 : : /*
1282 : : * We only consider the cheapest-total-cost input paths, since we are
1283 : : * assuming here that a sort is required. We will consider
1284 : : * cheapest-startup-cost input paths later, and only if they don't need a
1285 : : * sort.
1286 : : *
1287 : : * This function intentionally does not consider parameterized input
1288 : : * paths, except when the cheapest-total is parameterized. If we did so,
1289 : : * we'd have a combinatorial explosion of mergejoin paths of dubious
1290 : : * value. This interacts with decisions elsewhere that also discriminate
1291 : : * against mergejoins with parameterized inputs; see comments in
1292 : : * src/backend/optimizer/README.
1293 : : */
7755 tgl@sss.pgh.pa.us 1294 : 275674 : outer_path = outerrel->cheapest_total_path;
1295 : 275674 : inner_path = innerrel->cheapest_total_path;
1296 : :
1297 : : /*
1298 : : * If either cheapest-total path is parameterized by the other rel, we
1299 : : * can't use a mergejoin. (There's no use looking for alternative input
1300 : : * paths, since these should already be the least-parameterized available
1301 : : * paths.)
1302 : : */
4246 1303 [ + + + - : 275674 : if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
+ + + + +
- + + ]
1304 [ + + + - : 270420 : PATH_PARAM_BY_REL(inner_path, outerrel))
+ + + + +
- + + ]
4268 1305 : 10532 : return;
1306 : :
1307 : : /*
1308 : : * If unique-ification is requested, do it and then handle as a plain
1309 : : * inner join.
1310 : : */
7755 1311 [ + + ]: 265142 : if (jointype == JOIN_UNIQUE_OUTER)
1312 : : {
5722 1313 : 2241 : outer_path = (Path *) create_unique_path(root, outerrel,
1314 : : outer_path, extra->sjinfo);
1315 [ - + ]: 2241 : Assert(outer_path);
7755 1316 : 2241 : jointype = JOIN_INNER;
1317 : : }
1318 [ + + ]: 262901 : else if (jointype == JOIN_UNIQUE_INNER)
1319 : : {
5722 1320 : 2241 : inner_path = (Path *) create_unique_path(root, innerrel,
1321 : : inner_path, extra->sjinfo);
1322 [ - + ]: 2241 : Assert(inner_path);
7755 1323 : 2241 : jointype = JOIN_INNER;
1324 : : }
1325 : :
1326 : : /*
1327 : : * If the joinrel is parallel-safe, we may be able to consider a partial
1328 : : * merge join. However, we can't handle JOIN_UNIQUE_OUTER, because the
1329 : : * outer path will be partial, and therefore we won't be able to properly
1330 : : * guarantee uniqueness. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT
1331 : : * and JOIN_RIGHT_ANTI, because they can produce false null extended rows.
1332 : : * Also, the resulting path must not be parameterized.
1333 : : */
2595 rhaas@postgresql.org 1334 [ + + + + ]: 265142 : if (joinrel->consider_parallel &&
1335 [ + + ]: 230008 : save_jointype != JOIN_UNIQUE_OUTER &&
1336 [ + + ]: 228776 : save_jointype != JOIN_FULL &&
1337 [ + + ]: 188363 : save_jointype != JOIN_RIGHT &&
375 tgl@sss.pgh.pa.us 1338 : 187712 : save_jointype != JOIN_RIGHT_ANTI &&
2595 rhaas@postgresql.org 1339 [ + + ]: 187712 : outerrel->partial_pathlist != NIL &&
1340 [ + - ]: 4909 : bms_is_empty(joinrel->lateral_relids))
1341 : : {
1342 : 4909 : cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist);
1343 : :
1344 [ + + ]: 4909 : if (inner_path->parallel_safe)
1345 : 4714 : cheapest_safe_inner = inner_path;
1346 [ + + ]: 195 : else if (save_jointype != JOIN_UNIQUE_INNER)
1347 : : cheapest_safe_inner =
1348 : 192 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
1349 : : }
1350 : :
1351 : : /*
1352 : : * Each possible ordering of the available mergejoin clauses will generate
1353 : : * a differently-sorted result path at essentially the same cost. We have
1354 : : * no basis for choosing one over another at this level of joining, but
1355 : : * some sort orders may be more useful than others for higher-level
1356 : : * mergejoins, so it's worth considering multiple orderings.
1357 : : *
1358 : : * Actually, it's not quite true that every mergeclause ordering will
1359 : : * generate a different path order, because some of the clauses may be
1360 : : * partially redundant (refer to the same EquivalenceClasses). Therefore,
1361 : : * what we do is convert the mergeclause list to a list of canonical
1362 : : * pathkeys, and then consider different orderings of the pathkeys.
1363 : : *
1364 : : * Generating a path for *every* permutation of the pathkeys doesn't seem
1365 : : * like a winning strategy; the cost in planning time is too high. For
1366 : : * now, we generate one path for each pathkey, listing that pathkey first
1367 : : * and the rest in random order. This should allow at least a one-clause
1368 : : * mergejoin without re-sorting against any other possible mergejoin
1369 : : * partner path. But if we've not guessed the right ordering of secondary
1370 : : * keys, we may end up evaluating clauses as qpquals when they could have
1371 : : * been done as mergeclauses. (In practice, it's rare that there's more
1372 : : * than two or three mergeclauses, so expending a huge amount of thought
1373 : : * on that is probably not worth it.)
1374 : : *
1375 : : * The pathkey order returned by select_outer_pathkeys_for_merge() has
1376 : : * some heuristics behind it (see that function), so be sure to try it
1377 : : * exactly as-is as well as making variants.
1378 : : */
6294 tgl@sss.pgh.pa.us 1379 : 265142 : all_pathkeys = select_outer_pathkeys_for_merge(root,
1380 : : extra->mergeclause_list,
1381 : : joinrel);
1382 : :
7263 neilc@samurai.com 1383 [ + + + + : 507347 : foreach(l, all_pathkeys)
+ + ]
1384 : : {
703 tgl@sss.pgh.pa.us 1385 : 242205 : PathKey *front_pathkey = (PathKey *) lfirst(l);
1386 : : List *cur_mergeclauses;
1387 : : List *outerkeys;
1388 : : List *innerkeys;
1389 : : List *merge_pathkeys;
1390 : :
1391 : : /* Make a pathkey list with this guy first */
7263 neilc@samurai.com 1392 [ + + ]: 242205 : if (l != list_head(all_pathkeys))
6294 tgl@sss.pgh.pa.us 1393 : 21622 : outerkeys = lcons(front_pathkey,
1394 : : list_delete_nth_cell(list_copy(all_pathkeys),
1395 : : foreach_current_index(l)));
1396 : : else
5995 bruce@momjian.us 1397 : 220583 : outerkeys = all_pathkeys; /* no work at first one... */
1398 : :
1399 : : /* Sort the mergeclauses into the corresponding ordering */
1400 : : cur_mergeclauses =
2242 tgl@sss.pgh.pa.us 1401 : 242205 : find_mergeclauses_for_outer_pathkeys(root,
1402 : : outerkeys,
1403 : : extra->mergeclause_list);
1404 : :
1405 : : /* Should have used them all... */
3262 1406 [ - + ]: 242205 : Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));
1407 : :
1408 : : /* Build sort pathkeys for the inner side */
6294 1409 : 242205 : innerkeys = make_inner_pathkeys_for_merge(root,
1410 : : cur_mergeclauses,
1411 : : outerkeys);
1412 : :
1413 : : /* Build pathkeys representing output sort order */
7021 1414 : 242205 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1415 : : outerkeys);
1416 : :
1417 : : /*
1418 : : * And now we can make the path.
1419 : : *
1420 : : * Note: it's possible that the cheapest paths will already be sorted
1421 : : * properly. try_mergejoin_path will detect that case and suppress an
1422 : : * explicit sort step, so we needn't do so here.
1423 : : */
4461 1424 : 242205 : try_mergejoin_path(root,
1425 : : joinrel,
1426 : : outer_path,
1427 : : inner_path,
1428 : : merge_pathkeys,
1429 : : cur_mergeclauses,
1430 : : outerkeys,
1431 : : innerkeys,
1432 : : jointype,
1433 : : extra,
1434 : : false);
1435 : :
1436 : : /*
1437 : : * If we have partial outer and parallel safe inner path then try
1438 : : * partial mergejoin path.
1439 : : */
2595 rhaas@postgresql.org 1440 [ + + + + ]: 242205 : if (cheapest_partial_outer && cheapest_safe_inner)
1441 : 6075 : try_partial_mergejoin_path(root,
1442 : : joinrel,
1443 : : cheapest_partial_outer,
1444 : : cheapest_safe_inner,
1445 : : merge_pathkeys,
1446 : : cur_mergeclauses,
1447 : : outerkeys,
1448 : : innerkeys,
1449 : : jointype,
1450 : : extra);
1451 : : }
1452 : : }
1453 : :
1454 : : /*
1455 : : * generate_mergejoin_paths
1456 : : * Creates possible mergejoin paths for input outerpath.
1457 : : *
1458 : : * We generate mergejoins if mergejoin clauses are available. We have
1459 : : * two ways to generate the inner path for a mergejoin: sort the cheapest
1460 : : * inner path, or use an inner path that is already suitably ordered for the
1461 : : * merge. If we have several mergeclauses, it could be that there is no inner
1462 : : * path (or only a very expensive one) for the full list of mergeclauses, but
1463 : : * better paths exist if we truncate the mergeclause list (thereby discarding
1464 : : * some sort key requirements). So, we consider truncations of the
1465 : : * mergeclause list as well as the full list. (Ideally we'd consider all
1466 : : * subsets of the mergeclause list, but that seems way too expensive.)
1467 : : */
1468 : : static void
2671 1469 : 521810 : generate_mergejoin_paths(PlannerInfo *root,
1470 : : RelOptInfo *joinrel,
1471 : : RelOptInfo *innerrel,
1472 : : Path *outerpath,
1473 : : JoinType jointype,
1474 : : JoinPathExtraData *extra,
1475 : : bool useallclauses,
1476 : : Path *inner_cheapest_total,
1477 : : List *merge_pathkeys,
1478 : : bool is_partial)
1479 : : {
1480 : : List *mergeclauses;
1481 : : List *innersortkeys;
1482 : : List *trialsortkeys;
1483 : : Path *cheapest_startup_inner;
1484 : : Path *cheapest_total_inner;
1485 : 521810 : JoinType save_jointype = jointype;
1486 : : int num_sortkeys;
1487 : : int sortkeycnt;
1488 : :
1489 [ + - + + ]: 521810 : if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
1490 : 4651 : jointype = JOIN_INNER;
1491 : :
1492 : : /* Look for useful mergeclauses (if any) */
1493 : : mergeclauses =
2242 tgl@sss.pgh.pa.us 1494 : 521810 : find_mergeclauses_for_outer_pathkeys(root,
1495 : : outerpath->pathkeys,
1496 : : extra->mergeclause_list);
1497 : :
1498 : : /*
1499 : : * Done with this outer path if no chance for a mergejoin.
1500 : : *
1501 : : * Special corner case: for "x FULL JOIN y ON true", there will be no join
1502 : : * clauses at all. Ordinarily we'd generate a clauseless nestloop path,
1503 : : * but since mergejoin is our only join type that supports FULL JOIN
1504 : : * without any join clauses, it's necessary to generate a clauseless
1505 : : * mergejoin path instead.
1506 : : */
2671 rhaas@postgresql.org 1507 [ + + ]: 521810 : if (mergeclauses == NIL)
1508 : : {
1509 [ + + ]: 349240 : if (jointype == JOIN_FULL)
1510 : : /* okay to try for mergejoin */ ;
1511 : : else
1512 : 347566 : return;
1513 : : }
1514 [ + + + + ]: 216859 : if (useallclauses &&
1515 : 42615 : list_length(mergeclauses) != list_length(extra->mergeclause_list))
1516 : 5678 : return;
1517 : :
1518 : : /* Compute the required ordering of the inner path */
1519 : 168566 : innersortkeys = make_inner_pathkeys_for_merge(root,
1520 : : mergeclauses,
1521 : : outerpath->pathkeys);
1522 : :
1523 : : /*
1524 : : * Generate a mergejoin on the basis of sorting the cheapest inner. Since
1525 : : * a sort will be needed, only cheapest total cost matters. (But
1526 : : * try_mergejoin_path will do the right thing if inner_cheapest_total is
1527 : : * already correctly sorted.)
1528 : : */
1529 : 168566 : try_mergejoin_path(root,
1530 : : joinrel,
1531 : : outerpath,
1532 : : inner_cheapest_total,
1533 : : merge_pathkeys,
1534 : : mergeclauses,
1535 : : NIL,
1536 : : innersortkeys,
1537 : : jointype,
1538 : : extra,
1539 : : is_partial);
1540 : :
1541 : : /* Can't do anything else if inner path needs to be unique'd */
1542 [ + + ]: 168566 : if (save_jointype == JOIN_UNIQUE_INNER)
1543 : 901 : return;
1544 : :
1545 : : /*
1546 : : * Look for presorted inner paths that satisfy the innersortkey list ---
1547 : : * or any truncation thereof, if we are allowed to build a mergejoin using
1548 : : * a subset of the merge clauses. Here, we consider both cheap startup
1549 : : * cost and cheap total cost.
1550 : : *
1551 : : * Currently we do not consider parameterized inner paths here. This
1552 : : * interacts with decisions elsewhere that also discriminate against
1553 : : * mergejoins with parameterized inputs; see comments in
1554 : : * src/backend/optimizer/README.
1555 : : *
1556 : : * As we shorten the sortkey list, we should consider only paths that are
1557 : : * strictly cheaper than (in particular, not the same as) any path found
1558 : : * in an earlier iteration. Otherwise we'd be intentionally using fewer
1559 : : * merge keys than a given path allows (treating the rest as plain
1560 : : * joinquals), which is unlikely to be a good idea. Also, eliminating
1561 : : * paths here on the basis of compare_path_costs is a lot cheaper than
1562 : : * building the mergejoin path only to throw it away.
1563 : : *
1564 : : * If inner_cheapest_total is well enough sorted to have not required a
1565 : : * sort in the path made above, we shouldn't make a duplicate path with
1566 : : * it, either. We handle that case with the same logic that handles the
1567 : : * previous consideration, by initializing the variables that track
1568 : : * cheapest-so-far properly. Note that we do NOT reject
1569 : : * inner_cheapest_total if we find it matches some shorter set of
1570 : : * pathkeys. That case corresponds to using fewer mergekeys to avoid
1571 : : * sorting inner_cheapest_total, whereas we did sort it above, so the
1572 : : * plans being considered are different.
1573 : : */
1574 [ + + ]: 167665 : if (pathkeys_contained_in(innersortkeys,
1575 : : inner_cheapest_total->pathkeys))
1576 : : {
1577 : : /* inner_cheapest_total didn't require a sort */
1578 : 4884 : cheapest_startup_inner = inner_cheapest_total;
1579 : 4884 : cheapest_total_inner = inner_cheapest_total;
1580 : : }
1581 : : else
1582 : : {
1583 : : /* it did require a sort, at least for the full set of keys */
1584 : 162781 : cheapest_startup_inner = NULL;
1585 : 162781 : cheapest_total_inner = NULL;
1586 : : }
1587 : 167665 : num_sortkeys = list_length(innersortkeys);
1588 [ + + + + ]: 167665 : if (num_sortkeys > 1 && !useallclauses)
2489 tgl@sss.pgh.pa.us 1589 : 4647 : trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
1590 : : else
2671 rhaas@postgresql.org 1591 : 163018 : trialsortkeys = innersortkeys; /* won't really truncate */
1592 : :
1593 [ + + ]: 303108 : for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
1594 : : {
1595 : : Path *innerpath;
1596 : 172308 : List *newclauses = NIL;
1597 : :
1598 : : /*
1599 : : * Look for an inner path ordered well enough for the first
1600 : : * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
1601 : : * destructively, which is why we made a copy...
1602 : : */
1603 : 172308 : trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
1604 : 172308 : innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1605 : : trialsortkeys,
1606 : : NULL,
1607 : : TOTAL_COST,
1608 : : is_partial);
1609 [ + + + + ]: 172308 : if (innerpath != NULL &&
1610 [ + + ]: 7381 : (cheapest_total_inner == NULL ||
1611 : 7381 : compare_path_costs(innerpath, cheapest_total_inner,
1612 : : TOTAL_COST) < 0))
1613 : : {
1614 : : /* Found a cheap (or even-cheaper) sorted path */
1615 : : /* Select the right mergeclauses, if we didn't already */
1616 [ + + ]: 102636 : if (sortkeycnt < num_sortkeys)
1617 : : {
1618 : : newclauses =
2242 tgl@sss.pgh.pa.us 1619 : 1251 : trim_mergeclauses_for_inner_pathkeys(root,
1620 : : mergeclauses,
1621 : : trialsortkeys);
2671 rhaas@postgresql.org 1622 [ - + ]: 1251 : Assert(newclauses != NIL);
1623 : : }
1624 : : else
1625 : 101385 : newclauses = mergeclauses;
1626 : 102636 : try_mergejoin_path(root,
1627 : : joinrel,
1628 : : outerpath,
1629 : : innerpath,
1630 : : merge_pathkeys,
1631 : : newclauses,
1632 : : NIL,
1633 : : NIL,
1634 : : jointype,
1635 : : extra,
1636 : : is_partial);
1637 : 102636 : cheapest_total_inner = innerpath;
1638 : : }
1639 : : /* Same on the basis of cheapest startup cost ... */
1640 : 172308 : innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1641 : : trialsortkeys,
1642 : : NULL,
1643 : : STARTUP_COST,
1644 : : is_partial);
1645 [ + + + + ]: 172308 : if (innerpath != NULL &&
1646 [ + + ]: 7381 : (cheapest_startup_inner == NULL ||
1647 : 7381 : compare_path_costs(innerpath, cheapest_startup_inner,
1648 : : STARTUP_COST) < 0))
1649 : : {
1650 : : /* Found a cheap (or even-cheaper) sorted path */
1651 [ + + ]: 101969 : if (innerpath != cheapest_total_inner)
1652 : : {
1653 : : /*
1654 : : * Avoid rebuilding clause list if we already made one; saves
1655 : : * memory in big join trees...
1656 : : */
1657 [ + + ]: 514 : if (newclauses == NIL)
1658 : : {
1659 [ - + ]: 1 : if (sortkeycnt < num_sortkeys)
1660 : : {
1661 : : newclauses =
2242 tgl@sss.pgh.pa.us 1662 :UBC 0 : trim_mergeclauses_for_inner_pathkeys(root,
1663 : : mergeclauses,
1664 : : trialsortkeys);
2671 rhaas@postgresql.org 1665 [ # # ]: 0 : Assert(newclauses != NIL);
1666 : : }
1667 : : else
2671 rhaas@postgresql.org 1668 :CBC 1 : newclauses = mergeclauses;
1669 : : }
1670 : 514 : try_mergejoin_path(root,
1671 : : joinrel,
1672 : : outerpath,
1673 : : innerpath,
1674 : : merge_pathkeys,
1675 : : newclauses,
1676 : : NIL,
1677 : : NIL,
1678 : : jointype,
1679 : : extra,
1680 : : is_partial);
1681 : : }
1682 : 101969 : cheapest_startup_inner = innerpath;
1683 : : }
1684 : :
1685 : : /*
1686 : : * Don't consider truncated sortkeys if we need all clauses.
1687 : : */
1688 [ + + ]: 172308 : if (useallclauses)
1689 : 36865 : break;
1690 : : }
1691 : : }
1692 : :
1693 : : /*
1694 : : * match_unsorted_outer
1695 : : * Creates possible join paths for processing a single join relation
1696 : : * 'joinrel' by employing either iterative substitution or
1697 : : * mergejoining on each of its possible outer paths (considering
1698 : : * only outer paths that are already ordered well enough for merging).
1699 : : *
1700 : : * We always generate a nestloop path for each available outer path.
1701 : : * In fact we may generate as many as five: one on the cheapest-total-cost
1702 : : * inner path, one on the same with materialization, one on the
1703 : : * cheapest-startup-cost inner path (if different), one on the
1704 : : * cheapest-total inner-indexscan path (if any), and one on the
1705 : : * cheapest-startup inner-indexscan path (if different).
1706 : : *
1707 : : * We also consider mergejoins if mergejoin clauses are available. See
1708 : : * detailed comments in generate_mergejoin_paths.
1709 : : *
1710 : : * 'joinrel' is the join relation
1711 : : * 'outerrel' is the outer join relation
1712 : : * 'innerrel' is the inner join relation
1713 : : * 'jointype' is the type of join to do
1714 : : * 'extra' contains additional input values
1715 : : */
1716 : : static void
6888 tgl@sss.pgh.pa.us 1717 : 275674 : match_unsorted_outer(PlannerInfo *root,
1718 : : RelOptInfo *joinrel,
1719 : : RelOptInfo *outerrel,
1720 : : RelOptInfo *innerrel,
1721 : : JoinType jointype,
1722 : : JoinPathExtraData *extra)
1723 : : {
7755 1724 : 275674 : JoinType save_jointype = jointype;
1725 : : bool nestjoinOK;
1726 : : bool useallclauses;
1727 : 275674 : Path *inner_cheapest_total = innerrel->cheapest_total_path;
7806 1728 : 275674 : Path *matpath = NULL;
1729 : : ListCell *lc1;
1730 : :
1731 : : /*
1732 : : * Nestloop only supports inner, left, semi, and anti joins. Also, if we
1733 : : * are doing a right, right-anti or full mergejoin, we must use *all* the
1734 : : * mergeclauses as join clauses, else we will not have a valid plan.
1735 : : * (Although these two flags are currently inverses, keep them separate
1736 : : * for clarity and possible future changes.)
1737 : : */
8615 1738 [ + + + - ]: 275674 : switch (jointype)
1739 : : {
1740 : 225093 : case JOIN_INNER:
1741 : : case JOIN_LEFT:
1742 : : case JOIN_SEMI:
1743 : : case JOIN_ANTI:
1744 : 225093 : nestjoinOK = true;
8400 1745 : 225093 : useallclauses = false;
8615 1746 : 225093 : break;
8400 1747 : 46087 : case JOIN_RIGHT:
1748 : : case JOIN_RIGHT_ANTI:
1749 : : case JOIN_FULL:
8615 1750 : 46087 : nestjoinOK = false;
8400 1751 : 46087 : useallclauses = true;
1752 : 46087 : break;
5722 1753 : 4494 : case JOIN_UNIQUE_OUTER:
1754 : : case JOIN_UNIQUE_INNER:
1755 : 4494 : jointype = JOIN_INNER;
1756 : 4494 : nestjoinOK = true;
1757 : 4494 : useallclauses = false;
1758 : 4494 : break;
8400 tgl@sss.pgh.pa.us 1759 :UBC 0 : default:
7569 1760 [ # # ]: 0 : elog(ERROR, "unrecognized join type: %d",
1761 : : (int) jointype);
1762 : : nestjoinOK = false; /* keep compiler quiet */
1763 : : useallclauses = false;
1764 : : break;
1765 : : }
1766 : :
1767 : : /*
1768 : : * If inner_cheapest_total is parameterized by the outer rel, ignore it;
1769 : : * we will consider it below as a member of cheapest_parameterized_paths,
1770 : : * but the other possibilities considered in this routine aren't usable.
1771 : : */
4246 tgl@sss.pgh.pa.us 1772 [ + + + - :CBC 275674 : if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
+ + + + +
- + + ]
1773 : 5278 : inner_cheapest_total = NULL;
1774 : :
1775 : : /*
1776 : : * If we need to unique-ify the inner path, we will consider only the
1777 : : * cheapest-total inner.
1778 : : */
5722 1779 [ + + ]: 275674 : if (save_jointype == JOIN_UNIQUE_INNER)
1780 : : {
1781 : : /* No way to do this with an inner path parameterized by outer rel */
4268 1782 [ + + ]: 2247 : if (inner_cheapest_total == NULL)
4268 tgl@sss.pgh.pa.us 1783 :GBC 6 : return;
1784 : : inner_cheapest_total = (Path *)
3262 tgl@sss.pgh.pa.us 1785 :CBC 2241 : create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo);
5722 1786 [ - + ]: 2241 : Assert(inner_cheapest_total);
1787 : : }
7755 1788 [ + + ]: 273427 : else if (nestjoinOK)
1789 : : {
1790 : : /*
1791 : : * Consider materializing the cheapest inner path, unless
1792 : : * enable_material is off or the path in question materializes its
1793 : : * output anyway.
1794 : : */
4268 1795 [ + + + + ]: 227340 : if (enable_material && inner_cheapest_total != NULL &&
5109 rhaas@postgresql.org 1796 [ + + ]: 221855 : !ExecMaterializesOutput(inner_cheapest_total->pathtype))
1797 : : matpath = (Path *)
7755 tgl@sss.pgh.pa.us 1798 : 211091 : create_material_path(innerrel, inner_cheapest_total);
1799 : : }
1800 : :
4461 1801 [ + - + + : 903083 : foreach(lc1, outerrel->pathlist)
+ + ]
1802 : : {
1803 : 627415 : Path *outerpath = (Path *) lfirst(lc1);
1804 : : List *merge_pathkeys;
1805 : :
1806 : : /*
1807 : : * We cannot use an outer path that is parameterized by the inner rel.
1808 : : */
4246 1809 [ + + + - : 627415 : if (PATH_PARAM_BY_REL(outerpath, innerrel))
+ + + + +
- + + ]
4461 1810 : 103932 : continue;
1811 : :
1812 : : /*
1813 : : * If we need to unique-ify the outer path, it's pointless to consider
1814 : : * any but the cheapest outer. (XXX we don't consider parameterized
1815 : : * outers, nor inners, for unique-ified cases. Should we?)
1816 : : */
7755 1817 [ + + ]: 523483 : if (save_jointype == JOIN_UNIQUE_OUTER)
1818 : : {
1819 [ + + ]: 2821 : if (outerpath != outerrel->cheapest_total_path)
1820 : 580 : continue;
5722 1821 : 2241 : outerpath = (Path *) create_unique_path(root, outerrel,
1822 : : outerpath, extra->sjinfo);
1823 [ - + ]: 2241 : Assert(outerpath);
1824 : : }
1825 : :
1826 : : /*
1827 : : * The result will have this sort order (even if it is implemented as
1828 : : * a nestloop, and even if some of the mergeclauses are implemented by
1829 : : * qpquals rather than as true mergeclauses):
1830 : : */
7021 1831 : 522903 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1832 : : outerpath->pathkeys);
1833 : :
4461 1834 [ + + ]: 522903 : if (save_jointype == JOIN_UNIQUE_INNER)
1835 : : {
1836 : : /*
1837 : : * Consider nestloop join, but only with the unique-ified cheapest
1838 : : * inner path
1839 : : */
1840 : 4225 : try_nestloop_path(root,
1841 : : joinrel,
1842 : : outerpath,
1843 : : inner_cheapest_total,
1844 : : merge_pathkeys,
1845 : : jointype,
1846 : : extra);
1847 : : }
1848 [ + + ]: 518678 : else if (nestjoinOK)
1849 : : {
1850 : : /*
1851 : : * Consider nestloop joins using this outer path and various
1852 : : * available paths for the inner relation. We consider the
1853 : : * cheapest-total paths for each available parameterization of the
1854 : : * inner relation, including the unparameterized case.
1855 : : */
1856 : : ListCell *lc2;
1857 : :
1858 [ + - + + : 1126596 : foreach(lc2, innerrel->cheapest_parameterized_paths)
+ + ]
1859 : : {
1860 : 697065 : Path *innerpath = (Path *) lfirst(lc2);
1861 : : Path *mpath;
1862 : :
1863 : 697065 : try_nestloop_path(root,
1864 : : joinrel,
1865 : : outerpath,
1866 : : innerpath,
1867 : : merge_pathkeys,
1868 : : jointype,
1869 : : extra);
1870 : :
1871 : : /*
1872 : : * Try generating a memoize path and see if that makes the
1873 : : * nested loop any cheaper.
1874 : : */
1005 drowley@postgresql.o 1875 : 697065 : mpath = get_memoize_path(root, innerrel, outerrel,
1876 : : innerpath, outerpath, jointype,
1877 : : extra);
1878 [ + + ]: 697065 : if (mpath != NULL)
1108 1879 : 117256 : try_nestloop_path(root,
1880 : : joinrel,
1881 : : outerpath,
1882 : : mpath,
1883 : : merge_pathkeys,
1884 : : jointype,
1885 : : extra);
1886 : : }
1887 : :
1888 : : /* Also consider materialized form of the cheapest inner path */
7806 tgl@sss.pgh.pa.us 1889 [ + + ]: 429531 : if (matpath != NULL)
4461 1890 : 403355 : try_nestloop_path(root,
1891 : : joinrel,
1892 : : outerpath,
1893 : : matpath,
1894 : : merge_pathkeys,
1895 : : jointype,
1896 : : extra);
1897 : : }
1898 : :
1899 : : /* Can't do anything else if outer path needs to be unique'd */
7755 1900 [ + + ]: 522903 : if (save_jointype == JOIN_UNIQUE_OUTER)
1901 : 2241 : continue;
1902 : :
1903 : : /* Can't do anything else if inner rel is parameterized by outer */
4246 1904 [ + + ]: 520662 : if (inner_cheapest_total == NULL)
4268 1905 : 5492 : continue;
1906 : :
1907 : : /* Generate merge join paths */
2671 rhaas@postgresql.org 1908 : 515170 : generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
1909 : : save_jointype, extra, useallclauses,
1910 : : inner_cheapest_total, merge_pathkeys,
1911 : : false);
1912 : : }
1913 : :
1914 : : /*
1915 : : * Consider partial nestloop and mergejoin plan if outerrel has any
1916 : : * partial path and the joinrel is parallel-safe. However, we can't
1917 : : * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
1918 : : * therefore we won't be able to properly guarantee uniqueness. Nor can
1919 : : * we handle joins needing lateral rels, since partial paths must not be
1920 : : * parameterized. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT and
1921 : : * JOIN_RIGHT_ANTI, because they can produce false null extended rows.
1922 : : */
2595 1923 [ + + + + ]: 275668 : if (joinrel->consider_parallel &&
3007 1924 [ + + ]: 232637 : save_jointype != JOIN_UNIQUE_OUTER &&
2595 1925 [ + + ]: 231405 : save_jointype != JOIN_FULL &&
1926 [ + + ]: 190833 : save_jointype != JOIN_RIGHT &&
375 tgl@sss.pgh.pa.us 1927 : 190182 : save_jointype != JOIN_RIGHT_ANTI &&
2595 rhaas@postgresql.org 1928 [ + + ]: 190182 : outerrel->partial_pathlist != NIL &&
3007 1929 [ + - ]: 5110 : bms_is_empty(joinrel->lateral_relids))
1930 : : {
2595 1931 [ + - ]: 5110 : if (nestjoinOK)
1932 : 5110 : consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
1933 : : save_jointype, extra);
1934 : :
1935 : : /*
1936 : : * If inner_cheapest_total is NULL or non parallel-safe then find the
1937 : : * cheapest total parallel safe path. If doing JOIN_UNIQUE_INNER, we
1938 : : * can't use any alternative inner path.
1939 : : */
1940 [ + + ]: 5110 : if (inner_cheapest_total == NULL ||
1941 [ + + ]: 4909 : !inner_cheapest_total->parallel_safe)
1942 : : {
1943 [ + + ]: 396 : if (save_jointype == JOIN_UNIQUE_INNER)
1944 : 3 : return;
1945 : :
1536 alvherre@alvh.no-ip. 1946 : 393 : inner_cheapest_total = get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
1947 : : }
1948 : :
2595 rhaas@postgresql.org 1949 [ + + ]: 5107 : if (inner_cheapest_total)
1950 : 4897 : consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
1951 : : save_jointype, extra,
1952 : : inner_cheapest_total);
1953 : : }
1954 : : }
1955 : :
1956 : : /*
1957 : : * consider_parallel_mergejoin
1958 : : * Try to build partial paths for a joinrel by joining a partial path
1959 : : * for the outer relation to a complete path for the inner relation.
1960 : : *
1961 : : * 'joinrel' is the join relation
1962 : : * 'outerrel' is the outer join relation
1963 : : * 'innerrel' is the inner join relation
1964 : : * 'jointype' is the type of join to do
1965 : : * 'extra' contains additional input values
1966 : : * 'inner_cheapest_total' cheapest total path for innerrel
1967 : : */
1968 : : static void
1969 : 4897 : consider_parallel_mergejoin(PlannerInfo *root,
1970 : : RelOptInfo *joinrel,
1971 : : RelOptInfo *outerrel,
1972 : : RelOptInfo *innerrel,
1973 : : JoinType jointype,
1974 : : JoinPathExtraData *extra,
1975 : : Path *inner_cheapest_total)
1976 : : {
1977 : : ListCell *lc1;
1978 : :
1979 : : /* generate merge join path for each partial outer path */
1980 [ + - + + : 11537 : foreach(lc1, outerrel->partial_pathlist)
+ + ]
1981 : : {
1982 : 6640 : Path *outerpath = (Path *) lfirst(lc1);
1983 : : List *merge_pathkeys;
1984 : :
1985 : : /*
1986 : : * Figure out what useful ordering any paths we create will have.
1987 : : */
1988 : 6640 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1989 : : outerpath->pathkeys);
1990 : :
1991 : 6640 : generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype,
1992 : : extra, false, inner_cheapest_total,
1993 : : merge_pathkeys, true);
1994 : : }
3007 1995 : 4897 : }
1996 : :
1997 : : /*
1998 : : * consider_parallel_nestloop
1999 : : * Try to build partial paths for a joinrel by joining a partial path for the
2000 : : * outer relation to a complete path for the inner relation.
2001 : : *
2002 : : * 'joinrel' is the join relation
2003 : : * 'outerrel' is the outer join relation
2004 : : * 'innerrel' is the inner join relation
2005 : : * 'jointype' is the type of join to do
2006 : : * 'extra' contains additional input values
2007 : : */
2008 : : static void
2009 : 5110 : consider_parallel_nestloop(PlannerInfo *root,
2010 : : RelOptInfo *joinrel,
2011 : : RelOptInfo *outerrel,
2012 : : RelOptInfo *innerrel,
2013 : : JoinType jointype,
2014 : : JoinPathExtraData *extra)
2015 : : {
2693 tgl@sss.pgh.pa.us 2016 : 5110 : JoinType save_jointype = jointype;
2017 : : ListCell *lc1;
2018 : :
2019 [ + + ]: 5110 : if (jointype == JOIN_UNIQUE_INNER)
2020 : 285 : jointype = JOIN_INNER;
2021 : :
3007 rhaas@postgresql.org 2022 [ + - + + : 12032 : foreach(lc1, outerrel->partial_pathlist)
+ + ]
2023 : : {
2024 : 6922 : Path *outerpath = (Path *) lfirst(lc1);
2025 : : List *pathkeys;
2026 : : ListCell *lc2;
2027 : :
2028 : : /* Figure out what useful ordering any paths we create will have. */
2029 : 6922 : pathkeys = build_join_pathkeys(root, joinrel, jointype,
2030 : : outerpath->pathkeys);
2031 : :
2032 : : /*
2033 : : * Try the cheapest parameterized paths; only those which will produce
2034 : : * an unparameterized path when joined to this outerrel will survive
2035 : : * try_partial_nestloop_path. The cheapest unparameterized path is
2036 : : * also in this list.
2037 : : */
2038 [ + - + + : 18050 : foreach(lc2, innerrel->cheapest_parameterized_paths)
+ + ]
2039 : : {
2040 : 11128 : Path *innerpath = (Path *) lfirst(lc2);
2041 : : Path *mpath;
2042 : :
2043 : : /* Can't join to an inner path that is not parallel-safe */
2044 [ + + ]: 11128 : if (!innerpath->parallel_safe)
2045 : 219 : continue;
2046 : :
2047 : : /*
2048 : : * If we're doing JOIN_UNIQUE_INNER, we can only use the inner's
2049 : : * cheapest_total_path, and we have to unique-ify it. (We might
2050 : : * be able to relax this to allow other safe, unparameterized
2051 : : * inner paths, but right now create_unique_path is not on board
2052 : : * with that.)
2053 : : */
2693 tgl@sss.pgh.pa.us 2054 [ + + ]: 10909 : if (save_jointype == JOIN_UNIQUE_INNER)
2055 : : {
2056 [ + + ]: 867 : if (innerpath != innerrel->cheapest_total_path)
3007 rhaas@postgresql.org 2057 : 441 : continue;
2058 : 426 : innerpath = (Path *) create_unique_path(root, innerrel,
2059 : : innerpath,
2060 : : extra->sjinfo);
3000 2061 [ - + ]: 426 : Assert(innerpath);
2062 : : }
2063 : :
3007 2064 : 10468 : try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
2065 : : pathkeys, jointype, extra);
2066 : :
2067 : : /*
2068 : : * Try generating a memoize path and see if that makes the nested
2069 : : * loop any cheaper.
2070 : : */
1005 drowley@postgresql.o 2071 : 10468 : mpath = get_memoize_path(root, innerrel, outerrel,
2072 : : innerpath, outerpath, jointype,
2073 : : extra);
2074 [ + + ]: 10468 : if (mpath != NULL)
2075 : 2651 : try_partial_nestloop_path(root, joinrel, outerpath, mpath,
2076 : : pathkeys, jointype, extra);
2077 : : }
2078 : : }
10141 scrappy@hub.org 2079 : 5110 : }
2080 : :
2081 : : /*
2082 : : * hash_inner_and_outer
2083 : : * Create hashjoin join paths by explicitly hashing both the outer and
2084 : : * inner keys of each available hash clause.
2085 : : *
2086 : : * 'joinrel' is the join relation
2087 : : * 'outerrel' is the outer join relation
2088 : : * 'innerrel' is the inner join relation
2089 : : * 'jointype' is the type of join to do
2090 : : * 'extra' contains additional input values
2091 : : */
2092 : : static void
6888 tgl@sss.pgh.pa.us 2093 : 277462 : hash_inner_and_outer(PlannerInfo *root,
2094 : : RelOptInfo *joinrel,
2095 : : RelOptInfo *outerrel,
2096 : : RelOptInfo *innerrel,
2097 : : JoinType jointype,
2098 : : JoinPathExtraData *extra)
2099 : : {
2693 2100 : 277462 : JoinType save_jointype = jointype;
4854 2101 : 277462 : bool isouterjoin = IS_OUTER_JOIN(jointype);
2102 : : List *hashclauses;
2103 : : ListCell *l;
2104 : :
2105 : : /*
2106 : : * We need to build only one hashclauses list for any given pair of outer
2107 : : * and inner relations; all of the hashable clauses will be used as keys.
2108 : : *
2109 : : * Scan the join's restrictinfo list to find hashjoinable clauses that are
2110 : : * usable with this pair of sub-relations.
2111 : : */
7806 2112 : 277462 : hashclauses = NIL;
3262 2113 [ + + + + : 568414 : foreach(l, extra->restrictlist)
+ + ]
2114 : : {
7263 neilc@samurai.com 2115 : 290952 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
2116 : :
2117 : : /*
2118 : : * If processing an outer join, only use its own join clauses for
2119 : : * hashing. For inner joins we need not be so picky.
2120 : : */
2186 tgl@sss.pgh.pa.us 2121 [ + + + + : 290952 : if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
- + ]
8615 2122 : 4518 : continue;
2123 : :
5322 2124 [ + + ]: 286434 : if (!restrictinfo->can_join ||
2125 [ + + ]: 255803 : restrictinfo->hashjoinoperator == InvalidOid)
2126 : 40104 : continue; /* not hashjoinable */
2127 : :
2128 : : /*
2129 : : * Check if clause has the form "outer op inner" or "inner op outer".
2130 : : */
5321 2131 [ + + ]: 246330 : if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
8833 2132 : 72 : continue; /* no good for these input relations */
2133 : :
7806 2134 : 246258 : hashclauses = lappend(hashclauses, restrictinfo);
2135 : : }
2136 : :
2137 : : /* If we found any usable hashclauses, make paths */
2138 [ + + ]: 277462 : if (hashclauses)
2139 : : {
2140 : : /*
2141 : : * We consider both the cheapest-total-cost and cheapest-startup-cost
2142 : : * outer paths. There's no need to consider any but the
2143 : : * cheapest-total-cost inner path, however.
2144 : : */
7559 bruce@momjian.us 2145 : 224350 : Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
2146 : 224350 : Path *cheapest_total_outer = outerrel->cheapest_total_path;
2147 : 224350 : Path *cheapest_total_inner = innerrel->cheapest_total_path;
2148 : :
2149 : : /*
2150 : : * If either cheapest-total path is parameterized by the other rel, we
2151 : : * can't use a hashjoin. (There's no use looking for alternative
2152 : : * input paths, since these should already be the least-parameterized
2153 : : * available paths.)
2154 : : */
4246 tgl@sss.pgh.pa.us 2155 [ + + + - : 224350 : if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
+ + + + +
- + + ]
2156 [ + + + - : 224018 : PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
+ + + + +
- + + ]
4268 2157 : 670 : return;
2158 : :
2159 : : /* Unique-ify if need be; we ignore parameterized possibilities */
7755 2160 [ + + ]: 223680 : if (jointype == JOIN_UNIQUE_OUTER)
2161 : : {
2162 : : cheapest_total_outer = (Path *)
5722 2163 : 1311 : create_unique_path(root, outerrel,
2164 : : cheapest_total_outer, extra->sjinfo);
2165 [ - + ]: 1311 : Assert(cheapest_total_outer);
7755 2166 : 1311 : jointype = JOIN_INNER;
4461 2167 : 1311 : try_hashjoin_path(root,
2168 : : joinrel,
2169 : : cheapest_total_outer,
2170 : : cheapest_total_inner,
2171 : : hashclauses,
2172 : : jointype,
2173 : : extra);
2174 : : /* no possibility of cheap startup here */
2175 : : }
7755 2176 [ + + ]: 222369 : else if (jointype == JOIN_UNIQUE_INNER)
2177 : : {
2178 : : cheapest_total_inner = (Path *)
5722 2179 : 1311 : create_unique_path(root, innerrel,
2180 : : cheapest_total_inner, extra->sjinfo);
2181 [ - + ]: 1311 : Assert(cheapest_total_inner);
7755 2182 : 1311 : jointype = JOIN_INNER;
4461 2183 : 1311 : try_hashjoin_path(root,
2184 : : joinrel,
2185 : : cheapest_total_outer,
2186 : : cheapest_total_inner,
2187 : : hashclauses,
2188 : : jointype,
2189 : : extra);
4246 2190 [ + - + + ]: 1311 : if (cheapest_startup_outer != NULL &&
2191 : : cheapest_startup_outer != cheapest_total_outer)
4461 2192 : 158 : try_hashjoin_path(root,
2193 : : joinrel,
2194 : : cheapest_startup_outer,
2195 : : cheapest_total_inner,
2196 : : hashclauses,
2197 : : jointype,
2198 : : extra);
2199 : : }
2200 : : else
2201 : : {
2202 : : /*
2203 : : * For other jointypes, we consider the cheapest startup outer
2204 : : * together with the cheapest total inner, and then consider
2205 : : * pairings of cheapest-total paths including parameterized ones.
2206 : : * There is no use in generating parameterized paths on the basis
2207 : : * of possibly cheap startup cost, so this is sufficient.
2208 : : */
2209 : : ListCell *lc1;
2210 : : ListCell *lc2;
2211 : :
4246 2212 [ + + ]: 221058 : if (cheapest_startup_outer != NULL)
2213 : 220749 : try_hashjoin_path(root,
2214 : : joinrel,
2215 : : cheapest_startup_outer,
2216 : : cheapest_total_inner,
2217 : : hashclauses,
2218 : : jointype,
2219 : : extra);
2220 : :
4461 2221 [ + - + + : 562692 : foreach(lc1, outerrel->cheapest_parameterized_paths)
+ + ]
2222 : : {
2223 : 341634 : Path *outerpath = (Path *) lfirst(lc1);
2224 : :
2225 : : /*
2226 : : * We cannot use an outer path that is parameterized by the
2227 : : * inner rel.
2228 : : */
4246 2229 [ + + + - : 341634 : if (PATH_PARAM_BY_REL(outerpath, innerrel))
+ + + + +
- + + ]
4461 2230 : 100176 : continue;
2231 : :
2232 [ + - + + : 620201 : foreach(lc2, innerrel->cheapest_parameterized_paths)
+ + ]
2233 : : {
2234 : 378743 : Path *innerpath = (Path *) lfirst(lc2);
2235 : :
2236 : : /*
2237 : : * We cannot use an inner path that is parameterized by
2238 : : * the outer rel, either.
2239 : : */
4246 2240 [ + + + - : 378743 : if (PATH_PARAM_BY_REL(innerpath, outerrel))
+ + + + +
- + + ]
4461 2241 : 111739 : continue;
2242 : :
2243 [ + + + + ]: 267004 : if (outerpath == cheapest_startup_outer &&
2244 : : innerpath == cheapest_total_inner)
2489 2245 : 190406 : continue; /* already tried it */
2246 : :
4461 2247 : 76598 : try_hashjoin_path(root,
2248 : : joinrel,
2249 : : outerpath,
2250 : : innerpath,
2251 : : hashclauses,
2252 : : jointype,
2253 : : extra);
2254 : : }
2255 : : }
2256 : : }
2257 : :
2258 : : /*
2259 : : * If the joinrel is parallel-safe, we may be able to consider a
2260 : : * partial hash join. However, we can't handle JOIN_UNIQUE_OUTER,
2261 : : * because the outer path will be partial, and therefore we won't be
2262 : : * able to properly guarantee uniqueness. Also, the resulting path
2263 : : * must not be parameterized.
2264 : : */
2916 rhaas@postgresql.org 2265 [ + + + + ]: 223680 : if (joinrel->consider_parallel &&
2693 tgl@sss.pgh.pa.us 2266 : 200042 : save_jointype != JOIN_UNIQUE_OUTER &&
3007 rhaas@postgresql.org 2267 [ + + ]: 200042 : outerrel->partial_pathlist != NIL &&
2268 [ + - ]: 6345 : bms_is_empty(joinrel->lateral_relids))
2269 : : {
2270 : : Path *cheapest_partial_outer;
2307 andres@anarazel.de 2271 : 6345 : Path *cheapest_partial_inner = NULL;
2930 tgl@sss.pgh.pa.us 2272 : 6345 : Path *cheapest_safe_inner = NULL;
2273 : :
3007 rhaas@postgresql.org 2274 : 6345 : cheapest_partial_outer =
2275 : 6345 : (Path *) linitial(outerrel->partial_pathlist);
2276 : :
2277 : : /*
2278 : : * Can we use a partial inner plan too, so that we can build a
2279 : : * shared hash table in parallel? We can't handle
2280 : : * JOIN_UNIQUE_INNER because we can't guarantee uniqueness.
2281 : : */
1762 tmunro@postgresql.or 2282 [ + + + + ]: 6345 : if (innerrel->partial_pathlist != NIL &&
2283 [ + + ]: 5925 : save_jointype != JOIN_UNIQUE_INNER &&
2284 : : enable_parallel_hash)
2285 : : {
2307 andres@anarazel.de 2286 : 5787 : cheapest_partial_inner =
2287 : 5787 : (Path *) linitial(innerrel->partial_pathlist);
2288 : 5787 : try_partial_hashjoin_path(root, joinrel,
2289 : : cheapest_partial_outer,
2290 : : cheapest_partial_inner,
2291 : : hashclauses, jointype, extra,
2292 : : true /* parallel_hash */ );
2293 : : }
2294 : :
2295 : : /*
2296 : : * Normally, given that the joinrel is parallel-safe, the cheapest
2297 : : * total inner path will also be parallel-safe, but if not, we'll
2298 : : * have to search for the cheapest safe, unparameterized inner
2299 : : * path. If doing JOIN_UNIQUE_INNER, we can't use any alternative
2300 : : * inner path. If full, right, or right-anti join, we can't use
2301 : : * parallelism (building the hash table in each backend) because
2302 : : * no one process has all the match bits.
2303 : : */
375 tgl@sss.pgh.pa.us 2304 [ + + + + ]: 6345 : if (save_jointype == JOIN_FULL ||
2305 [ + + ]: 4437 : save_jointype == JOIN_RIGHT ||
2306 : : save_jointype == JOIN_RIGHT_ANTI)
380 tmunro@postgresql.or 2307 : 2073 : cheapest_safe_inner = NULL;
2308 [ + + ]: 4272 : else if (cheapest_total_inner->parallel_safe)
3007 rhaas@postgresql.org 2309 : 4158 : cheapest_safe_inner = cheapest_total_inner;
2693 tgl@sss.pgh.pa.us 2310 [ + + ]: 114 : else if (save_jointype != JOIN_UNIQUE_INNER)
2311 : : cheapest_safe_inner =
2595 rhaas@postgresql.org 2312 : 111 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
2313 : :
3007 2314 [ + + ]: 6345 : if (cheapest_safe_inner != NULL)
2315 : 4266 : try_partial_hashjoin_path(root, joinrel,
2316 : : cheapest_partial_outer,
2317 : : cheapest_safe_inner,
2318 : : hashclauses, jointype, extra,
2319 : : false /* parallel_hash */ );
2320 : : }
2321 : : }
2322 : : }
2323 : :
2324 : : /*
2325 : : * select_mergejoin_clauses
2326 : : * Select mergejoin clauses that are usable for a particular join.
2327 : : * Returns a list of RestrictInfo nodes for those clauses.
2328 : : *
2329 : : * *mergejoin_allowed is normally set to true, but it is set to false if
2330 : : * this is a right/right-anti/full join and there are nonmergejoinable join
2331 : : * clauses. The executor's mergejoin machinery cannot handle such cases, so
2332 : : * we have to avoid generating a mergejoin plan. (Note that this flag does
2333 : : * NOT consider whether there are actually any mergejoinable clauses. This is
2334 : : * correct because in some cases we need to build a clauseless mergejoin.
2335 : : * Simply returning NIL is therefore not enough to distinguish safe from
2336 : : * unsafe cases.)
2337 : : *
2338 : : * We also mark each selected RestrictInfo to show which side is currently
2339 : : * being considered as outer. These are transient markings that are only
2340 : : * good for the duration of the current add_paths_to_joinrel() call!
2341 : : *
2342 : : * We examine each restrictinfo clause known for the join to see
2343 : : * if it is mergejoinable and involves vars from the two sub-relations
2344 : : * currently of interest.
2345 : : */
2346 : : static List *
5940 tgl@sss.pgh.pa.us 2347 : 278183 : select_mergejoin_clauses(PlannerInfo *root,
2348 : : RelOptInfo *joinrel,
2349 : : RelOptInfo *outerrel,
2350 : : RelOptInfo *innerrel,
2351 : : List *restrictlist,
2352 : : JoinType jointype,
2353 : : bool *mergejoin_allowed)
2354 : : {
9008 2355 : 278183 : List *result_list = NIL;
8615 2356 : 278183 : bool isouterjoin = IS_OUTER_JOIN(jointype);
4853 2357 : 278183 : bool have_nonmergeable_joinclause = false;
2358 : : ListCell *l;
2359 : :
7263 neilc@samurai.com 2360 [ + + + + : 569860 : foreach(l, restrictlist)
+ + ]
2361 : : {
2362 : 291677 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
2363 : :
2364 : : /*
2365 : : * If processing an outer join, only use its own join clauses in the
2366 : : * merge. For inner joins we can use pushed-down clauses too. (Note:
2367 : : * we don't set have_nonmergeable_joinclause here because pushed-down
2368 : : * clauses will become otherquals not joinquals.)
2369 : : */
2186 tgl@sss.pgh.pa.us 2370 [ + + + + : 291677 : if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
- + ]
6746 2371 : 4530 : continue;
2372 : :
2373 : : /* Check that clause is a mergeable operator clause */
7405 2374 [ + + ]: 287147 : if (!restrictinfo->can_join ||
6294 2375 [ + + ]: 256516 : restrictinfo->mergeopfamilies == NIL)
2376 : : {
2377 : : /*
2378 : : * The executor can handle extra joinquals that are constants, but
2379 : : * not anything else, when doing right/right-anti/full merge join.
2380 : : * (The reason to support constants is so we can do FULL JOIN ON
2381 : : * FALSE.)
2382 : : */
5213 2383 [ + - + + ]: 39548 : if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
4853 2384 : 35078 : have_nonmergeable_joinclause = true;
8833 2385 : 39548 : continue; /* not mergejoinable */
2386 : : }
2387 : :
2388 : : /*
2389 : : * Check if clause has the form "outer op inner" or "inner op outer".
2390 : : */
5321 2391 [ + + ]: 247599 : if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
2392 : : {
4853 2393 : 384 : have_nonmergeable_joinclause = true;
7760 2394 : 384 : continue; /* no good for these input relations */
2395 : : }
2396 : :
2397 : : /*
2398 : : * Insist that each side have a non-redundant eclass. This
2399 : : * restriction is needed because various bits of the planner expect
2400 : : * that each clause in a merge be associable with some pathkey in a
2401 : : * canonical pathkey list, but redundant eclasses can't appear in
2402 : : * canonical sort orderings. (XXX it might be worth relaxing this,
2403 : : * but not enough time to address it for 8.3.)
2404 : : */
4916 2405 : 247215 : update_mergeclause_eclasses(root, restrictinfo);
2406 : :
5940 2407 [ + + ]: 247215 : if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
2408 [ + + ]: 247191 : EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
2409 : : {
4853 2410 : 64 : have_nonmergeable_joinclause = true;
5940 2411 : 64 : continue; /* can't handle redundant eclasses */
2412 : : }
2413 : :
6294 2414 : 247151 : result_list = lappend(result_list, restrictinfo);
2415 : : }
2416 : :
2417 : : /*
2418 : : * Report whether mergejoin is allowed (see comment at top of function).
2419 : : */
4854 2420 [ + + ]: 278183 : switch (jointype)
2421 : : {
2422 : 50196 : case JOIN_RIGHT:
2423 : : case JOIN_RIGHT_ANTI:
2424 : : case JOIN_FULL:
4853 2425 : 50196 : *mergejoin_allowed = !have_nonmergeable_joinclause;
4854 2426 : 50196 : break;
2427 : 227987 : default:
4853 2428 : 227987 : *mergejoin_allowed = true;
4854 2429 : 227987 : break;
2430 : : }
2431 : :
9008 2432 : 278183 : return result_list;
2433 : : }
|