Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * joinpath.c
4 : * Routines to find all possible paths for processing a set of joins
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/optimizer/path/joinpath.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include <math.h>
18 :
19 : #include "executor/executor.h"
20 : #include "foreign/fdwapi.h"
21 : #include "nodes/nodeFuncs.h"
22 : #include "optimizer/cost.h"
23 : #include "optimizer/optimizer.h"
24 : #include "optimizer/pathnode.h"
25 : #include "optimizer/paths.h"
26 : #include "optimizer/planmain.h"
27 : #include "utils/typcache.h"
28 :
29 : /* Hook for plugins to get control in add_paths_to_joinrel() */
30 : set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
31 :
32 : /*
33 : * Paths parameterized by the parent can be considered to be parameterized by
34 : * any of its child.
35 : */
36 : #define PATH_PARAM_BY_PARENT(path, rel) \
37 : ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \
38 : (rel)->top_parent_relids))
39 : #define PATH_PARAM_BY_REL_SELF(path, rel) \
40 : ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
41 :
42 : #define PATH_PARAM_BY_REL(path, rel) \
43 : (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel))
44 :
45 : static void try_partial_mergejoin_path(PlannerInfo *root,
46 : RelOptInfo *joinrel,
47 : Path *outer_path,
48 : Path *inner_path,
49 : List *pathkeys,
50 : List *mergeclauses,
51 : List *outersortkeys,
52 : List *innersortkeys,
53 : JoinType jointype,
54 : JoinPathExtraData *extra);
55 : static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
56 : RelOptInfo *outerrel, RelOptInfo *innerrel,
57 : JoinType jointype, JoinPathExtraData *extra);
58 : static inline bool clause_sides_match_join(RestrictInfo *rinfo,
59 : RelOptInfo *outerrel,
60 : RelOptInfo *innerrel);
61 : static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
62 : RelOptInfo *outerrel, RelOptInfo *innerrel,
63 : JoinType jointype, JoinPathExtraData *extra);
64 : static void consider_parallel_nestloop(PlannerInfo *root,
65 : RelOptInfo *joinrel,
66 : RelOptInfo *outerrel,
67 : RelOptInfo *innerrel,
68 : JoinType jointype,
69 : JoinPathExtraData *extra);
70 : static void consider_parallel_mergejoin(PlannerInfo *root,
71 : RelOptInfo *joinrel,
72 : RelOptInfo *outerrel,
73 : RelOptInfo *innerrel,
74 : JoinType jointype,
75 : JoinPathExtraData *extra,
76 : Path *inner_cheapest_total);
77 : static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
78 : RelOptInfo *outerrel, RelOptInfo *innerrel,
79 : JoinType jointype, JoinPathExtraData *extra);
80 : static List *select_mergejoin_clauses(PlannerInfo *root,
81 : RelOptInfo *joinrel,
82 : RelOptInfo *outerrel,
83 : RelOptInfo *innerrel,
84 : List *restrictlist,
85 : JoinType jointype,
86 : bool *mergejoin_allowed);
87 : static void generate_mergejoin_paths(PlannerInfo *root,
88 : RelOptInfo *joinrel,
89 : RelOptInfo *innerrel,
90 : Path *outerpath,
91 : JoinType jointype,
92 : JoinPathExtraData *extra,
93 : bool useallclauses,
94 : Path *inner_cheapest_total,
95 : List *merge_pathkeys,
96 : bool is_partial);
97 :
98 :
99 : /*
100 : * add_paths_to_joinrel
101 : * Given a join relation and two component rels from which it can be made,
102 : * consider all possible paths that use the two component rels as outer
103 : * and inner rel respectively. Add these paths to the join rel's pathlist
104 : * if they survive comparison with other paths (and remove any existing
105 : * paths that are dominated by these paths).
106 : *
107 : * Modifies the pathlist field of the joinrel node to contain the best
108 : * paths found so far.
109 : *
110 : * jointype is not necessarily the same as sjinfo->jointype; it might be
111 : * "flipped around" if we are considering joining the rels in the opposite
112 : * direction from what's indicated in sjinfo.
113 : *
114 : * Also, this routine and others in this module accept the special JoinTypes
115 : * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
116 : * unique-ify the outer or inner relation and then apply a regular inner
117 : * join. These values are not allowed to propagate outside this module,
118 : * however. Path cost estimation code may need to recognize that it's
119 : * dealing with such a case --- the combination of nominal jointype INNER
120 : * with sjinfo->jointype == JOIN_SEMI indicates that.
121 : */
122 : void
6517 tgl 123 CBC 233694 : add_paths_to_joinrel(PlannerInfo *root,
124 : RelOptInfo *joinrel,
125 : RelOptInfo *outerrel,
126 : RelOptInfo *innerrel,
127 : JoinType jointype,
128 : SpecialJoinInfo *sjinfo,
129 : List *restrictlist)
130 : {
131 : JoinPathExtraData extra;
4482 132 233694 : bool mergejoin_allowed = true;
133 : ListCell *lc;
134 : Relids joinrelids;
135 :
136 : /*
137 : * PlannerInfo doesn't contain the SpecialJoinInfos created for joins
138 : * between child relations, even if there is a SpecialJoinInfo node for
139 : * the join between the topmost parents. So, while calculating Relids set
140 : * representing the restriction, consider relids of topmost parent of
141 : * partitions.
142 : */
2011 rhaas 143 233694 : if (joinrel->reloptkind == RELOPT_OTHER_JOINREL)
144 4934 : joinrelids = joinrel->top_parent_relids;
145 : else
146 228760 : joinrelids = joinrel->relids;
147 :
2891 tgl 148 233694 : extra.restrictlist = restrictlist;
149 233694 : extra.mergeclause_list = NIL;
150 233694 : extra.sjinfo = sjinfo;
151 233694 : extra.param_source_rels = NULL;
152 :
153 : /*
154 : * See if the inner relation is provably unique for this outer rel.
155 : *
156 : * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
157 : * matter since the executor can make the equivalent optimization anyway;
158 : * we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, we
159 : * must be considering a semijoin whose inner side is not provably unique
160 : * (else reduce_unique_semijoins would've simplified it), so there's no
161 : * point in calling innerrel_is_unique. However, if the LHS covers all of
162 : * the semijoin's min_lefthand, then it's appropriate to set inner_unique
163 : * because the path produced by create_unique_path will be unique relative
164 : * to the LHS. (If we have an LHS that's only part of the min_lefthand,
165 : * that is *not* true.) For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
166 : * letting that value escape this module.
167 : */
2193 168 233694 : switch (jointype)
169 : {
170 5374 : case JOIN_SEMI:
171 : case JOIN_ANTI:
172 :
173 : /*
174 : * XXX it may be worth proving this to allow a Memoize to be
175 : * considered for Nested Loop Semi/Anti Joins.
176 : */
177 5374 : extra.inner_unique = false; /* well, unproven */
178 5374 : break;
179 1649 : case JOIN_UNIQUE_INNER:
2169 180 3298 : extra.inner_unique = bms_is_subset(sjinfo->min_lefthand,
181 1649 : outerrel->relids);
2193 182 1649 : break;
183 1649 : case JOIN_UNIQUE_OUTER:
2169 184 1649 : extra.inner_unique = innerrel_is_unique(root,
185 : joinrel->relids,
186 : outerrel->relids,
187 : innerrel,
188 : JOIN_INNER,
189 : restrictlist,
190 : false);
2193 191 1649 : break;
192 225022 : default:
2169 193 225022 : extra.inner_unique = innerrel_is_unique(root,
194 : joinrel->relids,
195 : outerrel->relids,
196 : innerrel,
197 : jointype,
198 : restrictlist,
199 : false);
2193 200 225022 : break;
201 : }
202 :
203 : /*
204 : * Find potential mergejoin clauses. We can skip this if we are not
205 : * interested in doing a mergejoin. However, mergejoin may be our only
206 : * way of implementing a full outer join, so override enable_mergejoin if
207 : * it's a full join.
208 : */
8172 209 233694 : if (enable_mergejoin || jointype == JOIN_FULL)
2891 210 233164 : extra.mergeclause_list = select_mergejoin_clauses(root,
211 : joinrel,
212 : outerrel,
213 : innerrel,
214 : restrictlist,
215 : jointype,
216 : &mergejoin_allowed);
217 :
218 : /*
219 : * If it's SEMI, ANTI, or inner_unique join, compute correction factors
220 : * for cost estimation. These will be the same for all paths.
221 : */
2193 222 233694 : if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique)
1815 223 77000 : compute_semi_anti_join_factors(root, joinrel, outerrel, innerrel,
224 : jointype, sjinfo, restrictlist,
225 : &extra.semifactors);
226 :
227 : /*
228 : * Decide whether it's sensible to generate parameterized paths for this
229 : * joinrel, and if so, which relations such paths should require. There
230 : * is usually no need to create a parameterized result path unless there
231 : * is a join order restriction that prevents joining one of our input rels
232 : * directly to the parameter source rel instead of joining to the other
233 : * input rel. (But see allow_star_schema_join().) This restriction
234 : * reduces the number of parameterized paths we have to deal with at
235 : * higher join levels, without compromising the quality of the resulting
236 : * plan. We express the restriction as a Relids set that must overlap the
237 : * parameterization of any proposed join path. Note: param_source_rels
238 : * should contain only baserels, not OJ relids, so starting from
239 : * all_baserels not all_query_rels is correct.
240 : */
4090 tgl 241 GIC 501648 : foreach(lc, root->join_info_list)
242 : {
2328 tgl 243 CBC 267954 : SpecialJoinInfo *sjinfo2 = (SpecialJoinInfo *) lfirst(lc);
244 :
4090 tgl 245 ECB : /*
246 : * SJ is relevant to this join if we have some part of its RHS
247 : * (possibly not all of it), and haven't yet joined to its LHS. (This
248 : * test is pretty simplistic, but should be sufficient considering the
249 : * join has already been proven legal.) If the SJ is relevant, it
250 : * presents constraints for joining to anything not in its RHS.
251 : */
2011 rhaas 252 GIC 267954 : if (bms_overlap(joinrelids, sjinfo2->min_righthand) &&
253 175329 : !bms_overlap(joinrelids, sjinfo2->min_lefthand))
2891 tgl 254 CBC 8497 : extra.param_source_rels = bms_join(extra.param_source_rels,
2118 255 8497 : bms_difference(root->all_baserels,
256 8497 : sjinfo2->min_righthand));
4090 tgl 257 ECB :
258 : /* full joins constrain both sides symmetrically */
2328 tgl 259 GIC 270238 : if (sjinfo2->jointype == JOIN_FULL &&
2011 rhaas 260 2284 : bms_overlap(joinrelids, sjinfo2->min_lefthand) &&
2011 rhaas 261 CBC 2256 : !bms_overlap(joinrelids, sjinfo2->min_righthand))
2891 tgl 262 336 : extra.param_source_rels = bms_join(extra.param_source_rels,
2118 263 336 : bms_difference(root->all_baserels,
264 336 : sjinfo2->min_lefthand));
4090 tgl 265 ECB : }
266 :
267 : /*
268 : * However, when a LATERAL subquery is involved, there will simply not be
269 : * any paths for the joinrel that aren't parameterized by whatever the
270 : * subquery is parameterized by, unless its parameterization is resolved
271 : * within the joinrel. So we might as well allow additional dependencies
272 : * on whatever residual lateral dependencies the joinrel will have.
273 : */
2680 tgl 274 GIC 467388 : extra.param_source_rels = bms_add_members(extra.param_source_rels,
275 233694 : joinrel->lateral_relids);
3522 tgl 276 ECB :
8462 277 : /*
278 : * 1. Consider mergejoin paths where both relations must be explicitly
279 : * sorted. Skip this if we can't mergejoin.
280 : */
4482 tgl 281 GIC 233694 : if (mergejoin_allowed)
4483 282 230857 : sort_inner_and_outer(root, joinrel, outerrel, innerrel,
2891 tgl 283 ECB : jointype, &extra);
9345 bruce 284 :
285 : /*
286 : * 2. Consider paths where the outer relation need not be explicitly
287 : * sorted. This includes both nestloops and mergejoins where the outer
288 : * path is already ordered. Again, skip this if we can't mergejoin.
289 : * (That's okay because we know that nestloop can't handle
290 : * right/right-anti/full joins at all, so it wouldn't work in the
291 : * prohibited cases either.)
292 : */
4482 tgl 293 GIC 233694 : if (mergejoin_allowed)
4483 294 230857 : match_unsorted_outer(root, joinrel, outerrel, innerrel,
295 : jointype, &extra);
9345 bruce 296 ECB :
8454 tgl 297 : #ifdef NOT_USED
298 :
299 : /*
300 : * 3. Consider paths where the inner relation need not be explicitly
301 : * sorted. This includes mergejoins only (nestloops were already built in
302 : * match_unsorted_outer).
303 : *
304 : * Diked out as redundant 2/13/2000 -- tgl. There isn't any really
305 : * significant difference between the inner and outer side of a mergejoin,
306 : * so match_unsorted_inner creates no paths that aren't equivalent to
307 : * those made by match_unsorted_outer when add_paths_to_joinrel() is
308 : * invoked with the two rels given in the other order.
309 : */
310 : if (mergejoin_allowed)
311 : match_unsorted_inner(root, joinrel, outerrel, innerrel,
312 : jointype, &extra);
313 : #endif
314 :
315 : /*
316 : * 4. Consider paths where both outer and inner relations must be hashed
317 : * before being joined. As above, disregard enable_hashjoin for full
318 : * joins, because there may be no other alternative.
319 : */
4483 tgl 320 GIC 233694 : if (enable_hashjoin || jointype == JOIN_FULL)
8454 321 232482 : hash_inner_and_outer(root, joinrel, outerrel, innerrel,
322 : jointype, &extra);
2900 rhaas 323 ECB :
324 : /*
325 : * 5. If inner and outer relations are foreign tables (or joins) belonging
326 : * to the same server and assigned to the same user to check access
327 : * permissions as, give the FDW a chance to push down joins.
328 : */
2900 rhaas 329 GIC 233694 : if (joinrel->fdwroutine &&
330 611 : joinrel->fdwroutine->GetForeignJoinPaths)
331 609 : joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
2900 rhaas 332 ECB : outerrel, innerrel,
2891 tgl 333 : jointype, &extra);
334 :
335 : /*
336 : * 6. Finally, give extensions a chance to manipulate the path list.
337 : */
2900 rhaas 338 GIC 233694 : if (set_join_pathlist_hook)
2900 rhaas 339 UIC 0 : set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
340 : jointype, &extra);
4090 tgl 341 CBC 233694 : }
4090 tgl 342 EUB :
343 : /*
2805 tgl 344 ECB : * We override the param_source_rels heuristic to accept nestloop paths in
345 : * which the outer rel satisfies some but not all of the inner path's
346 : * parameterization. This is necessary to get good plans for star-schema
347 : * scenarios, in which a parameterized path for a large table may require
348 : * parameters from multiple small tables that will not get joined directly to
349 : * each other. We can handle that by stacking nestloops that have the small
350 : * tables on the outside; but this breaks the rule the param_source_rels
351 : * heuristic is based on, namely that parameters should not be passed down
352 : * across joins unless there's a join-order-constraint-based reason to do so.
353 : * So we ignore the param_source_rels restriction when this case applies.
354 : *
355 : * allow_star_schema_join() returns true if the param_source_rels restriction
356 : * should be overridden, ie, it's okay to perform this join.
357 : */
358 : static inline bool
2805 tgl 359 GIC 89797 : allow_star_schema_join(PlannerInfo *root,
360 : Relids outerrelids,
361 : Relids inner_paramrels)
2805 tgl 362 ECB : {
363 : /*
364 : * It's a star-schema case if the outer rel provides some but not all of
365 : * the inner rel's parameterization.
366 : */
2063 rhaas 367 GIC 104256 : return (bms_overlap(inner_paramrels, outerrelids) &&
368 14459 : bms_nonempty_difference(inner_paramrels, outerrelids));
369 : }
2799 tgl 370 ECB :
371 : /*
372 : * If the parameterization is only partly satisfied by the outer rel,
373 : * the unsatisfied part can't include any outer-join relids that could
374 : * null rels of the satisfied part. That would imply that we're trying
375 : * to use a clause involving a Var with nonempty varnullingrels at
376 : * a join level where that value isn't yet computable.
377 : *
378 : * In practice, this test never finds a problem because earlier join order
379 : * restrictions prevent us from attempting a join that would cause a problem.
380 : * (That's unsurprising, because the code worked before we ever added
381 : * outer-join relids to expression relids.) It still seems worth checking
382 : * as a backstop, but we only do so in assert-enabled builds.
383 : */
384 : #ifdef USE_ASSERT_CHECKING
385 : static inline bool
69 tgl 386 GNC 908723 : have_unsafe_outer_join_ref(PlannerInfo *root,
387 : Relids outerrelids,
388 : Relids inner_paramrels)
389 : {
390 908723 : bool result = false;
391 908723 : Relids unsatisfied = bms_difference(inner_paramrels, outerrelids);
55 392 908723 : Relids satisfied = bms_intersect(inner_paramrels, outerrelids);
393 :
394 908723 : if (bms_overlap(unsatisfied, root->outer_join_rels))
395 : {
396 : ListCell *lc;
397 :
69 398 36 : foreach(lc, root->join_info_list)
399 : {
400 24 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
401 :
402 24 : if (!bms_is_member(sjinfo->ojrelid, unsatisfied))
403 12 : continue; /* not relevant */
55 404 12 : if (bms_overlap(satisfied, sjinfo->min_righthand) ||
69 405 12 : (sjinfo->jointype == JOIN_FULL &&
55 tgl 406 UNC 0 : bms_overlap(satisfied, sjinfo->min_lefthand)))
407 : {
69 408 0 : result = true; /* doesn't work */
409 0 : break;
410 : }
411 : }
412 : }
413 :
414 : /* Waste no memory when we reject a path here */
69 tgl 415 GNC 908723 : bms_free(unsatisfied);
55 416 908723 : bms_free(satisfied);
417 :
69 418 908723 : return result;
419 : }
420 : #endif /* USE_ASSERT_CHECKING */
421 :
737 drowley 422 ECB : /*
423 : * paraminfo_get_equal_hashops
424 : * Determine if param_info and innerrel's lateral_vars can be hashed.
425 : * Returns true the hashing is possible, otherwise return false.
426 : *
427 : * Additionally we also collect the outer exprs and the hash operators for
428 : * each parameter to innerrel. These set in 'param_exprs', 'operators' and
429 : * 'binary_mode' when we return true.
430 : */
431 : static bool
737 drowley 432 GIC 130961 : paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info,
433 : RelOptInfo *outerrel, RelOptInfo *innerrel,
434 : List **param_exprs, List **operators,
435 : bool *binary_mode)
436 :
437 : {
438 : ListCell *lc;
439 :
737 drowley 440 CBC 130961 : *param_exprs = NIL;
737 drowley 441 GIC 130961 : *operators = NIL;
501 442 130961 : *binary_mode = false;
443 :
737 drowley 444 CBC 130961 : if (param_info != NULL)
737 drowley 445 ECB : {
737 drowley 446 CBC 130961 : List *clauses = param_info->ppi_clauses;
447 :
448 235961 : foreach(lc, clauses)
449 : {
737 drowley 450 GIC 139782 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
451 : OpExpr *opexpr;
737 drowley 452 ECB : Node *expr;
453 : Oid hasheqoperator;
454 :
517 drowley 455 GIC 139782 : opexpr = (OpExpr *) rinfo->clause;
517 drowley 456 ECB :
457 : /*
458 : * Bail if the rinfo is not compatible. We need a join OpExpr
459 : * with 2 args.
517 drowley 460 EUB : */
517 drowley 461 GIC 139782 : if (!IsA(opexpr, OpExpr) || list_length(opexpr->args) != 2 ||
737 drowley 462 GBC 130436 : !clause_sides_match_join(rinfo, outerrel, innerrel))
737 drowley 463 EUB : {
737 drowley 464 GIC 34710 : list_free(*operators);
465 34710 : list_free(*param_exprs);
466 34782 : return false;
467 : }
468 :
737 drowley 469 CBC 105072 : if (rinfo->outer_is_left)
517 drowley 470 ECB : {
737 drowley 471 GIC 61697 : expr = (Node *) linitial(opexpr->args);
517 drowley 472 CBC 61697 : hasheqoperator = rinfo->left_hasheqoperator;
473 : }
474 : else
475 : {
737 drowley 476 GIC 43375 : expr = (Node *) lsecond(opexpr->args);
517 477 43375 : hasheqoperator = rinfo->right_hasheqoperator;
478 : }
479 :
480 : /* can't do memoize if we can't hash the outer type */
481 105072 : if (!OidIsValid(hasheqoperator))
482 : {
483 72 : list_free(*operators);
484 72 : list_free(*param_exprs);
485 72 : return false;
517 drowley 486 ECB : }
487 :
517 drowley 488 GIC 105000 : *operators = lappend_oid(*operators, hasheqoperator);
737 489 105000 : *param_exprs = lappend(*param_exprs, expr);
490 :
491 : /*
492 : * When the join operator is not hashable then it's possible that
493 : * the operator will be able to distinguish something that the
501 drowley 494 ECB : * hash equality operator could not. For example with floating
495 : * point types -0.0 and +0.0 are classed as equal by the hash
496 : * function and equality function, but some other operator may be
497 : * able to tell those values apart. This means that we must put
498 : * memoize into binary comparison mode so that it does bit-by-bit
499 : * comparisons rather than a "logical" comparison as it would
500 : * using the hash equality operator.
501 : */
501 drowley 502 CBC 105000 : if (!OidIsValid(rinfo->hashjoinoperator))
501 drowley 503 GIC 297 : *binary_mode = true;
737 drowley 504 ECB : }
505 : }
506 :
507 : /* Now add any lateral vars to the cache key too */
737 drowley 508 GIC 97653 : foreach(lc, innerrel->lateral_vars)
737 drowley 509 ECB : {
737 drowley 510 GIC 1546 : Node *expr = (Node *) lfirst(lc);
511 : TypeCacheEntry *typentry;
512 :
513 : /* Reject if there are any volatile functions */
514 1546 : if (contain_volatile_functions(expr))
737 drowley 515 ECB : {
737 drowley 516 LBC 0 : list_free(*operators);
737 drowley 517 UIC 0 : list_free(*param_exprs);
737 drowley 518 CBC 72 : return false;
737 drowley 519 ECB : }
520 :
737 drowley 521 GIC 1546 : typentry = lookup_type_cache(exprType(expr),
522 : TYPECACHE_HASH_PROC | TYPECACHE_EQ_OPR);
737 drowley 523 ECB :
524 : /* can't use a memoize node without a valid hash equals operator */
737 drowley 525 CBC 1546 : if (!OidIsValid(typentry->hash_proc) || !OidIsValid(typentry->eq_opr))
737 drowley 526 ECB : {
737 drowley 527 GIC 72 : list_free(*operators);
528 72 : list_free(*param_exprs);
529 72 : return false;
737 drowley 530 ECB : }
531 :
737 drowley 532 GIC 1474 : *operators = lappend_oid(*operators, typentry->eq_opr);
533 1474 : *param_exprs = lappend(*param_exprs, expr);
534 :
501 drowley 535 ECB : /*
536 : * We must go into binary mode as we don't have too much of an idea of
537 : * how these lateral Vars are being used. See comment above when we
538 : * set *binary_mode for the non-lateral Var case. This could be
539 : * relaxed a bit if we had the RestrictInfos and knew the operators
540 : * being used, however for cases like Vars that are arguments to
541 : * functions we must operate in binary mode as we don't have
542 : * visibility into what the function is doing with the Vars.
543 : */
501 drowley 544 GIC 1474 : *binary_mode = true;
545 : }
546 :
547 : /* We're okay to use memoize */
737 548 96107 : return true;
549 : }
550 :
551 : /*
552 : * get_memoize_path
553 : * If possible, make and return a Memoize path atop of 'inner_path'.
554 : * Otherwise return NULL.
555 : */
737 drowley 556 ECB : static Path *
634 drowley 557 CBC 583363 : get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
558 : RelOptInfo *outerrel, Path *inner_path,
559 : Path *outer_path, JoinType jointype,
560 : JoinPathExtraData *extra)
561 : {
562 : List *param_exprs;
737 drowley 563 ECB : List *hash_operators;
564 : ListCell *lc;
565 : bool binary_mode;
566 :
567 : /* Obviously not if it's disabled */
634 drowley 568 GIC 583363 : if (!enable_memoize)
737 drowley 569 GBC 194 : return NULL;
737 drowley 570 EUB :
737 drowley 571 ECB : /*
572 : * We can safely not bother with all this unless we expect to perform more
573 : * than one inner scan. The first scan is always going to be a cache
574 : * miss. This would likely fail later anyway based on costs, so this is
575 : * really just to save some wasted effort.
576 : */
737 drowley 577 GIC 583169 : if (outer_path->parent->rows < 2)
737 drowley 578 CBC 186716 : return NULL;
579 :
737 drowley 580 ECB : /*
634 581 : * We can only have a memoize node when there's some kind of cache key,
737 582 : * either parameterized path clauses or lateral Vars. No cache key sounds
583 : * more like something a Materialize node might be more useful for.
584 : */
737 drowley 585 CBC 396453 : if ((inner_path->param_info == NULL ||
586 163199 : inner_path->param_info->ppi_clauses == NIL) &&
737 drowley 587 GIC 239113 : innerrel->lateral_vars == NIL)
588 238019 : return NULL;
589 :
590 : /*
591 : * Currently we don't do this for SEMI and ANTI joins unless they're
592 : * marked as inner_unique. This is because nested loop SEMI/ANTI joins
593 : * don't scan the inner node to completion, which will mean memoize cannot
594 : * mark the cache entry as complete.
595 : *
596 : * XXX Currently we don't attempt to mark SEMI/ANTI joins as inner_unique
737 drowley 597 ECB : * = true. Should we? See add_paths_to_joinrel()
598 : */
737 drowley 599 GIC 158434 : if (!extra->inner_unique && (jointype == JOIN_SEMI ||
600 : jointype == JOIN_ANTI))
737 drowley 601 CBC 7865 : return NULL;
602 :
603 : /*
604 : * Memoize normally marks cache entries as complete when it runs out of
605 : * tuples to read from its subplan. However, with unique joins, Nested
606 : * Loop will skip to the next outer tuple after finding the first matching
607 : * inner tuple. This means that we may not read the inner side of the
608 : * join to completion which leaves no opportunity to mark the cache entry
609 : * as complete. To work around that, when the join is unique we
687 drowley 610 ECB : * automatically mark cache entries as complete after fetching the first
611 : * tuple. This works when the entire join condition is parameterized.
612 : * Otherwise, when the parameterization is only a subset of the join
613 : * condition, we can't be sure which part of it causes the join to be
614 : * unique. This means there are no guarantees that only 1 tuple will be
615 : * read. We cannot mark the cache entry as complete after reading the
616 : * first tuple without that guarantee. This means the scope of Memoize
617 : * node's usefulness is limited to only outer rows that have no join
618 : * partner as this is the only case where Nested Loop would exhaust the
619 : * inner scan of a unique join. Since the scope is limited to that, we
620 : * just don't bother making a memoize path in this case.
621 : *
622 : * Lateral vars needn't be considered here as they're not considered when
623 : * determining if the join is unique.
624 : *
625 : * XXX this could be enabled if the remaining join quals were made part of
626 : * the inner scan's filter instead of the join filter. Maybe it's worth
627 : * considering doing that?
628 : */
687 drowley 629 GIC 150569 : if (extra->inner_unique &&
685 drowley 630 CBC 178356 : (inner_path->param_info == NULL ||
631 89178 : list_length(inner_path->param_info->ppi_clauses) <
685 drowley 632 GIC 89178 : list_length(extra->restrictlist)))
687 633 19608 : return NULL;
634 :
635 : /*
636 : * We can't use a memoize node if there are volatile functions in the
637 : * inner rel's target list or restrict list. A cache hit could reduce the
737 drowley 638 ECB : * number of calls to these functions.
639 : */
737 drowley 640 CBC 130961 : if (contain_volatile_functions((Node *) innerrel->reltarget))
737 drowley 641 LBC 0 : return NULL;
642 :
737 drowley 643 GIC 213814 : foreach(lc, innerrel->baserestrictinfo)
644 : {
645 82853 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
646 :
647 82853 : if (contain_volatile_functions((Node *) rinfo))
737 drowley 648 UIC 0 : return NULL;
649 : }
650 :
651 : /* Check if we have hash ops for each parameter to the path */
737 drowley 652 GIC 130961 : if (paraminfo_get_equal_hashops(root,
653 : inner_path->param_info,
125 tgl 654 GNC 130961 : outerrel->top_parent ?
655 : outerrel->top_parent : outerrel,
656 : innerrel,
657 : ¶m_exprs,
658 : &hash_operators,
659 : &binary_mode))
660 : {
634 drowley 661 GIC 96107 : return (Path *) create_memoize_path(root,
662 : innerrel,
663 : inner_path,
664 : param_exprs,
665 : hash_operators,
666 96107 : extra->inner_unique,
667 : binary_mode,
668 : outer_path->rows);
669 : }
670 :
737 671 34854 : return NULL;
737 drowley 672 ECB : }
673 :
4090 tgl 674 : /*
675 : * try_nestloop_path
676 : * Consider a nestloop join path; if it appears useful, push it into
677 : * the joinrel's pathlist via add_path().
678 : */
679 : static void
4090 tgl 680 GIC 998363 : try_nestloop_path(PlannerInfo *root,
681 : RelOptInfo *joinrel,
682 : Path *outer_path,
4090 tgl 683 ECB : Path *inner_path,
2891 tgl 684 EUB : List *pathkeys,
685 : JoinType jointype,
2891 tgl 686 ECB : JoinPathExtraData *extra)
687 : {
4090 688 : Relids required_outer;
689 : JoinCostWorkspace workspace;
2063 rhaas 690 CBC 998363 : RelOptInfo *innerrel = inner_path->parent;
2063 rhaas 691 GBC 998363 : RelOptInfo *outerrel = outer_path->parent;
692 : Relids innerrelids;
693 : Relids outerrelids;
2063 rhaas 694 GIC 998363 : Relids inner_paramrels = PATH_REQ_OUTER(inner_path);
2063 rhaas 695 CBC 998363 : Relids outer_paramrels = PATH_REQ_OUTER(outer_path);
696 :
2011 rhaas 697 ECB : /*
698 : * Paths are parameterized by top-level parents, so run parameterization
699 : * tests on the parent relids.
700 : */
2011 rhaas 701 GIC 998363 : if (innerrel->top_parent_relids)
702 15710 : innerrelids = innerrel->top_parent_relids;
703 : else
2011 rhaas 704 CBC 982653 : innerrelids = innerrel->relids;
705 :
2011 rhaas 706 GIC 998363 : if (outerrel->top_parent_relids)
707 15710 : outerrelids = outerrel->top_parent_relids;
708 : else
2011 rhaas 709 CBC 982653 : outerrelids = outerrel->relids;
710 :
711 : /*
712 : * Check to see if proposed path is still parameterized, and reject if the
713 : * parameterization wouldn't be sensible --- unless allow_star_schema_join
55 tgl 714 ECB : * says to allow it anyway. Also, we must reject if have_dangerous_phv
715 : * doesn't like the look of it, which could only happen if the nestloop is
716 : * still parameterized.
717 : */
2063 rhaas 718 GIC 998363 : required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels,
719 : innerrelids, inner_paramrels);
4090 tgl 720 998363 : if (required_outer &&
2799 721 102114 : ((!bms_overlap(required_outer, extra->param_source_rels) &&
2063 rhaas 722 102319 : !allow_star_schema_join(root, outerrelids, inner_paramrels)) ||
2063 rhaas 723 CBC 12522 : have_dangerous_phv(root, outerrelids, inner_paramrels)))
724 : {
725 : /* Waste no memory when we reject a path here */
2805 tgl 726 GIC 89640 : bms_free(required_outer);
727 89640 : return;
728 : }
729 :
730 : /* If we got past that, we shouldn't have any unsafe outer-join refs */
55 tgl 731 GNC 908723 : Assert(!have_unsafe_outer_join_ref(root, outerrelids, inner_paramrels));
732 :
733 : /*
734 : * Do a precheck to quickly eliminate obviously-inferior paths. We
735 : * calculate a cheap lower bound on the path's cost and then use
4090 tgl 736 ECB : * add_path_precheck() to see if the path is clearly going to be dominated
737 : * by some existing path for the joinrel. If not, do the full pushup with
738 : * creating a fully valid path structure and submitting it to add_path().
739 : * The latter two steps are expensive enough to make this two-phase
740 : * methodology worthwhile.
741 : */
4090 tgl 742 GIC 908723 : initial_cost_nestloop(root, &workspace, jointype,
743 : outer_path, inner_path, extra);
744 :
745 908723 : if (add_path_precheck(joinrel,
746 : workspace.startup_cost, workspace.total_cost,
4090 tgl 747 ECB : pathkeys, required_outer))
748 : {
749 : /*
2011 rhaas 750 : * If the inner path is parameterized, it is parameterized by the
751 : * topmost parent of the outer rel, not the outer rel itself. Fix
752 : * that.
753 : */
2011 rhaas 754 GIC 440509 : if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
2011 rhaas 755 ECB : {
2011 rhaas 756 GIC 1283 : inner_path = reparameterize_path_by_child(root, inner_path,
757 : outer_path->parent);
758 :
759 : /*
760 : * If we could not translate the path, we can't create nest loop
761 : * path.
762 : */
763 1283 : if (!inner_path)
2011 rhaas 764 ECB : {
2011 rhaas 765 UIC 0 : bms_free(required_outer);
2011 rhaas 766 LBC 0 : return;
2011 rhaas 767 ECB : }
768 : }
769 :
4090 tgl 770 GIC 440509 : add_path(joinrel, (Path *)
771 440509 : create_nestloop_path(root,
4090 tgl 772 ECB : joinrel,
773 : jointype,
774 : &workspace,
775 : extra,
776 : outer_path,
777 : inner_path,
778 : extra->restrictlist,
779 : pathkeys,
780 : required_outer));
781 : }
782 : else
783 : {
784 : /* Waste no memory when we reject a path here */
4090 tgl 785 GIC 468214 : bms_free(required_outer);
786 : }
787 : }
4090 tgl 788 ECB :
789 : /*
790 : * try_partial_nestloop_path
2636 rhaas 791 : * Consider a partial nestloop join path; if it appears useful, push it into
792 : * the joinrel's partial_pathlist via add_partial_path().
793 : */
794 : static void
2636 rhaas 795 GIC 12296 : try_partial_nestloop_path(PlannerInfo *root,
796 : RelOptInfo *joinrel,
797 : Path *outer_path,
798 : Path *inner_path,
799 : List *pathkeys,
2559 tgl 800 ECB : JoinType jointype,
801 : JoinPathExtraData *extra)
2636 rhaas 802 : {
803 : JoinCostWorkspace workspace;
804 :
805 : /*
806 : * If the inner path is parameterized, the parameterization must be fully
807 : * satisfied by the proposed outer path. Parameterized partial paths are
808 : * not supported. The caller should already have verified that no lateral
725 tgl 809 : * rels are required here.
810 : */
2636 rhaas 811 GBC 12296 : Assert(bms_is_empty(joinrel->lateral_relids));
812 12296 : if (inner_path->param_info != NULL)
813 : {
2636 rhaas 814 GIC 6034 : Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
2011 815 6034 : RelOptInfo *outerrel = outer_path->parent;
2011 rhaas 816 ECB : Relids outerrelids;
2636 817 :
818 : /*
819 : * The inner and outer paths are parameterized, if at all, by the top
820 : * level parents, not the child relations, so we must use those relids
821 : * for our parameterization tests.
822 : */
2011 rhaas 823 GIC 6034 : if (outerrel->top_parent_relids)
824 4482 : outerrelids = outerrel->top_parent_relids;
825 : else
826 1552 : outerrelids = outerrel->relids;
827 :
828 6034 : if (!bms_is_subset(inner_paramrels, outerrelids))
2636 829 8622 : return;
830 : }
2636 rhaas 831 ECB :
832 : /*
833 : * Before creating a path, get a quick lower bound on what it is likely to
834 : * cost. Bail out right away if it looks terrible.
835 : */
2636 rhaas 836 GIC 11777 : initial_cost_nestloop(root, &workspace, jointype,
837 : outer_path, inner_path, extra);
838 11777 : if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
839 8103 : return;
840 :
2011 rhaas 841 ECB : /*
842 : * If the inner path is parameterized, it is parameterized by the topmost
843 : * parent of the outer rel, not the outer rel itself. Fix that.
844 : */
2011 rhaas 845 GIC 3674 : if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
846 : {
847 1263 : inner_path = reparameterize_path_by_child(root, inner_path,
848 : outer_path->parent);
849 :
850 : /*
851 : * If we could not translate the path, we can't create nest loop path.
852 : */
853 1263 : if (!inner_path)
2011 rhaas 854 UIC 0 : return;
855 : }
856 :
2636 rhaas 857 ECB : /* Might be good enough to be worth trying, so let's try it. */
2636 rhaas 858 CBC 3674 : add_partial_path(joinrel, (Path *)
2559 tgl 859 GIC 3674 : create_nestloop_path(root,
2559 tgl 860 ECB : joinrel,
861 : jointype,
862 : &workspace,
863 : extra,
864 : outer_path,
865 : inner_path,
866 : extra->restrictlist,
867 : pathkeys,
868 : NULL));
2636 rhaas 869 : }
870 :
871 : /*
4090 tgl 872 : * try_mergejoin_path
873 : * Consider a merge join path; if it appears useful, push it into
874 : * the joinrel's pathlist via add_path().
875 : */
876 : static void
4090 tgl 877 GIC 436058 : try_mergejoin_path(PlannerInfo *root,
878 : RelOptInfo *joinrel,
879 : Path *outer_path,
880 : Path *inner_path,
881 : List *pathkeys,
4090 tgl 882 ECB : List *mergeclauses,
883 : List *outersortkeys,
2891 884 : List *innersortkeys,
885 : JoinType jointype,
886 : JoinPathExtraData *extra,
887 : bool is_partial)
888 : {
889 : Relids required_outer;
890 : JoinCostWorkspace workspace;
4090 891 :
2224 rhaas 892 GIC 436058 : if (is_partial)
2224 rhaas 893 ECB : {
2224 rhaas 894 GIC 2895 : try_partial_mergejoin_path(root,
895 : joinrel,
896 : outer_path,
897 : inner_path,
898 : pathkeys,
2224 rhaas 899 ECB : mergeclauses,
2224 rhaas 900 EUB : outersortkeys,
901 : innersortkeys,
902 : jointype,
903 : extra);
2224 rhaas 904 CBC 14900 : return;
2224 rhaas 905 ECB : }
906 :
907 : /*
908 : * Check to see if proposed path is still parameterized, and reject if the
909 : * parameterization wouldn't be sensible.
910 : */
4090 tgl 911 GIC 433163 : required_outer = calc_non_nestloop_required_outer(outer_path,
912 : inner_path);
913 433163 : if (required_outer &&
2891 914 13083 : !bms_overlap(required_outer, extra->param_source_rels))
915 : {
916 : /* Waste no memory when we reject a path here */
4090 917 12005 : bms_free(required_outer);
918 12005 : return;
919 : }
920 :
921 : /*
922 : * If the given paths are already well enough ordered, we can skip doing
4090 tgl 923 ECB : * an explicit sort.
924 : */
4090 tgl 925 GIC 626322 : if (outersortkeys &&
926 205164 : pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
927 3948 : outersortkeys = NIL;
928 759748 : if (innersortkeys &&
929 338590 : pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
930 6574 : innersortkeys = NIL;
931 :
932 : /*
933 : * See comments in try_nestloop_path().
934 : */
935 421158 : initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
936 : outer_path, inner_path,
937 : outersortkeys, innersortkeys,
2193 tgl 938 ECB : extra);
939 :
4090 tgl 940 CBC 421158 : if (add_path_precheck(joinrel,
941 : workspace.startup_cost, workspace.total_cost,
942 : pathkeys, required_outer))
943 : {
4090 tgl 944 GIC 102384 : add_path(joinrel, (Path *)
945 102384 : create_mergejoin_path(root,
946 : joinrel,
947 : jointype,
948 : &workspace,
949 : extra,
4090 tgl 950 ECB : outer_path,
951 : inner_path,
952 : extra->restrictlist,
953 : pathkeys,
954 : required_outer,
955 : mergeclauses,
956 : outersortkeys,
957 : innersortkeys));
958 : }
959 : else
960 : {
961 : /* Waste no memory when we reject a path here */
4090 tgl 962 GIC 318774 : bms_free(required_outer);
4090 tgl 963 ECB : }
964 : }
965 :
966 : /*
967 : * try_partial_mergejoin_path
968 : * Consider a partial merge join path; if it appears useful, push it into
969 : * the joinrel's pathlist via add_partial_path().
970 : */
2224 rhaas 971 : static void
2224 rhaas 972 CBC 8853 : try_partial_mergejoin_path(PlannerInfo *root,
2224 rhaas 973 ECB : RelOptInfo *joinrel,
974 : Path *outer_path,
975 : Path *inner_path,
976 : List *pathkeys,
977 : List *mergeclauses,
978 : List *outersortkeys,
979 : List *innersortkeys,
980 : JoinType jointype,
981 : JoinPathExtraData *extra)
982 : {
983 : JoinCostWorkspace workspace;
984 :
985 : /*
986 : * See comments in try_partial_hashjoin_path().
987 : */
2224 rhaas 988 GIC 8853 : Assert(bms_is_empty(joinrel->lateral_relids));
989 8853 : if (inner_path->param_info != NULL)
2224 rhaas 990 ECB : {
2224 rhaas 991 LBC 0 : Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
992 :
2224 rhaas 993 UIC 0 : if (!bms_is_empty(inner_paramrels))
2224 rhaas 994 GIC 4406 : return;
995 : }
996 :
997 : /*
998 : * If the given paths are already well enough ordered, we can skip doing
999 : * an explicit sort.
1000 : */
1001 14811 : if (outersortkeys &&
1002 5958 : pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
1003 48 : outersortkeys = NIL;
1004 16542 : if (innersortkeys &&
1005 7689 : pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
1006 126 : innersortkeys = NIL;
1007 :
2224 rhaas 1008 ECB : /*
1009 : * See comments in try_partial_nestloop_path().
1010 : */
2224 rhaas 1011 GIC 8853 : initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
1012 : outer_path, inner_path,
1013 : outersortkeys, innersortkeys,
1014 : extra);
1015 :
1016 8853 : if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
1017 4406 : return;
2224 rhaas 1018 ECB :
1019 : /* Might be good enough to be worth trying, so let's try it. */
2224 rhaas 1020 GIC 4447 : add_partial_path(joinrel, (Path *)
1021 4447 : create_mergejoin_path(root,
1022 : joinrel,
1023 : jointype,
1024 : &workspace,
1025 : extra,
1026 : outer_path,
1027 : inner_path,
1028 : extra->restrictlist,
1029 : pathkeys,
1030 : NULL,
1031 : mergeclauses,
1032 : outersortkeys,
1033 : innersortkeys));
2224 rhaas 1034 ECB : }
1035 :
1036 : /*
4090 tgl 1037 EUB : * try_hashjoin_path
1038 : * Consider a hash join path; if it appears useful, push it into
1039 : * the joinrel's pathlist via add_path().
4090 tgl 1040 ECB : */
1041 : static void
4090 tgl 1042 GIC 251361 : try_hashjoin_path(PlannerInfo *root,
1043 : RelOptInfo *joinrel,
1044 : Path *outer_path,
1045 : Path *inner_path,
1046 : List *hashclauses,
2891 tgl 1047 ECB : JoinType jointype,
1048 : JoinPathExtraData *extra)
4090 1049 : {
1050 : Relids required_outer;
3955 bruce 1051 : JoinCostWorkspace workspace;
4090 tgl 1052 :
1053 : /*
1054 : * Check to see if proposed path is still parameterized, and reject if the
1055 : * parameterization wouldn't be sensible.
1056 : */
4090 tgl 1057 CBC 251361 : required_outer = calc_non_nestloop_required_outer(outer_path,
1058 : inner_path);
4090 tgl 1059 GIC 251361 : if (required_outer &&
2891 1060 39222 : !bms_overlap(required_outer, extra->param_source_rels))
1061 : {
4090 tgl 1062 ECB : /* Waste no memory when we reject a path here */
4090 tgl 1063 CBC 34427 : bms_free(required_outer);
4090 tgl 1064 GIC 34427 : return;
1065 : }
4090 tgl 1066 ECB :
1067 : /*
1068 : * See comments in try_nestloop_path(). Also note that hashjoin paths
1069 : * never have any output pathkeys, per comments in create_hashjoin_path.
1070 : */
4090 tgl 1071 GIC 216934 : initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1072 : outer_path, inner_path, extra, false);
1073 :
1074 216934 : if (add_path_precheck(joinrel,
1075 : workspace.startup_cost, workspace.total_cost,
1076 : NIL, required_outer))
1077 : {
1078 92639 : add_path(joinrel, (Path *)
1079 92639 : create_hashjoin_path(root,
1080 : joinrel,
1081 : jointype,
1082 : &workspace,
1083 : extra,
1084 : outer_path,
1085 : inner_path,
1086 : false, /* parallel_hash */
1087 : extra->restrictlist,
4090 tgl 1088 ECB : required_outer,
1089 : hashclauses));
1090 : }
1091 : else
1092 : {
1093 : /* Waste no memory when we reject a path here */
4090 tgl 1094 GIC 124295 : bms_free(required_outer);
1095 : }
1096 : }
1097 :
1098 : /*
1099 : * try_partial_hashjoin_path
1100 : * Consider a partial hashjoin join path; if it appears useful, push it into
1101 : * the joinrel's partial_pathlist via add_partial_path().
1102 : * The outer side is partial. If parallel_hash is true, then the inner path
1936 andres 1103 ECB : * must be partial and will be run in parallel to create one or more shared
1104 : * hash tables; otherwise the inner path must be complete and a copy of it
1105 : * is run in every process to create separate identical private hash tables.
2636 rhaas 1106 : */
1107 : static void
2636 rhaas 1108 GIC 9744 : try_partial_hashjoin_path(PlannerInfo *root,
2636 rhaas 1109 ECB : RelOptInfo *joinrel,
1110 : Path *outer_path,
1111 : Path *inner_path,
1112 : List *hashclauses,
1113 : JoinType jointype,
1114 : JoinPathExtraData *extra,
1115 : bool parallel_hash)
1116 : {
1117 : JoinCostWorkspace workspace;
1118 :
1119 : /*
1120 : * If the inner path is parameterized, the parameterization must be fully
1121 : * satisfied by the proposed outer path. Parameterized partial paths are
1122 : * not supported. The caller should already have verified that no lateral
1123 : * rels are required here.
1124 : */
2636 rhaas 1125 CBC 9744 : Assert(bms_is_empty(joinrel->lateral_relids));
2636 rhaas 1126 GIC 9744 : if (inner_path->param_info != NULL)
1127 : {
2636 rhaas 1128 UIC 0 : Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
1129 :
1130 0 : if (!bms_is_empty(inner_paramrels))
2636 rhaas 1131 GIC 4655 : return;
1132 : }
1133 :
1134 : /*
1135 : * Before creating a path, get a quick lower bound on what it is likely to
1136 : * cost. Bail out right away if it looks terrible.
1137 : */
1138 9744 : initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1139 : outer_path, inner_path, extra, parallel_hash);
2636 rhaas 1140 CBC 9744 : if (!add_partial_path_precheck(joinrel, workspace.total_cost, NIL))
2636 rhaas 1141 GIC 4655 : return;
1142 :
1143 : /* Might be good enough to be worth trying, so let's try it. */
1144 5089 : add_partial_path(joinrel, (Path *)
2559 tgl 1145 5089 : create_hashjoin_path(root,
1146 : joinrel,
1147 : jointype,
1148 : &workspace,
1149 : extra,
1150 : outer_path,
1151 : inner_path,
1152 : parallel_hash,
1153 : extra->restrictlist,
2559 tgl 1154 ECB : NULL,
1155 : hashclauses));
1156 : }
1157 :
1158 : /*
1159 : * clause_sides_match_join
1160 : * Determine whether a join clause is of the right form to use in this join.
1161 : *
1162 : * We already know that the clause is a binary opclause referencing only the
1163 : * rels in the current join. The point here is to check whether it has the
1164 : * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
1165 : * rather than mixing outer and inner vars on either side. If it matches,
1166 : * we set the transient flag outer_is_left to identify which side is which.
1167 : */
1168 : static inline bool
4950 tgl 1169 GIC 546735 : clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
1170 : RelOptInfo *innerrel)
4951 tgl 1171 ECB : {
4951 tgl 1172 CBC 817398 : if (bms_is_subset(rinfo->left_relids, outerrel->relids) &&
4951 tgl 1173 GIC 270663 : bms_is_subset(rinfo->right_relids, innerrel->relids))
4951 tgl 1174 EUB : {
1175 : /* lefthand side is outer */
4951 tgl 1176 GBC 270495 : rinfo->outer_is_left = true;
4951 tgl 1177 CBC 270495 : return true;
1178 : }
4951 tgl 1179 GIC 544920 : else if (bms_is_subset(rinfo->left_relids, innerrel->relids) &&
1180 268680 : bms_is_subset(rinfo->right_relids, outerrel->relids))
1181 : {
1182 : /* righthand side is outer */
1183 250432 : rinfo->outer_is_left = false;
4951 tgl 1184 CBC 250432 : return true;
1185 : }
1186 25808 : return false; /* no good for these input relations */
4951 tgl 1187 ECB : }
1188 :
1189 : /*
8821 bruce 1190 : * sort_inner_and_outer
9014 1191 : * Create mergejoin join paths by explicitly sorting both the outer and
1192 : * inner join relations on each available merge ordering.
1193 : *
1194 : * 'joinrel' is the join relation
1195 : * 'outerrel' is the outer join relation
1196 : * 'innerrel' is the inner join relation
1197 : * 'jointype' is the type of join to do
1198 : * 'extra' contains additional input values
1199 : */
1200 : static void
6517 tgl 1201 GIC 230857 : sort_inner_and_outer(PlannerInfo *root,
1202 : RelOptInfo *joinrel,
1203 : RelOptInfo *outerrel,
1204 : RelOptInfo *innerrel,
1205 : JoinType jointype,
1206 : JoinPathExtraData *extra)
1207 : {
2224 rhaas 1208 230857 : JoinType save_jointype = jointype;
1209 : Path *outer_path;
1210 : Path *inner_path;
1211 230857 : Path *cheapest_partial_outer = NULL;
1212 230857 : Path *cheapest_safe_inner = NULL;
1213 : List *all_pathkeys;
1214 : ListCell *l;
9345 bruce 1215 ECB :
1216 : /*
1217 : * We only consider the cheapest-total-cost input paths, since we are
7384 tgl 1218 : * assuming here that a sort is required. We will consider
6385 bruce 1219 : * cheapest-startup-cost input paths later, and only if they don't need a
1220 : * sort.
1221 : *
3875 tgl 1222 : * This function intentionally does not consider parameterized input
3260 bruce 1223 : * paths, except when the cheapest-total is parameterized. If we did so,
1224 : * we'd have a combinatorial explosion of mergejoin paths of dubious
3875 tgl 1225 : * value. This interacts with decisions elsewhere that also discriminate
1226 : * against mergejoins with parameterized inputs; see comments in
1227 : * src/backend/optimizer/README.
1228 : */
7384 tgl 1229 CBC 230857 : outer_path = outerrel->cheapest_total_path;
1230 230857 : inner_path = innerrel->cheapest_total_path;
1231 :
3875 tgl 1232 ECB : /*
1233 : * If either cheapest-total path is parameterized by the other rel, we
1234 : * can't use a mergejoin. (There's no use looking for alternative input
1235 : * paths, since these should already be the least-parameterized available
1236 : * paths.)
1237 : */
3875 tgl 1238 GIC 230857 : if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
1239 226711 : PATH_PARAM_BY_REL(inner_path, outerrel))
3897 1240 8298 : return;
1241 :
1242 : /*
1243 : * If unique-ification is requested, do it and then handle as a plain
1244 : * inner join.
1245 : */
7384 1246 222559 : if (jointype == JOIN_UNIQUE_OUTER)
7384 tgl 1247 ECB : {
5351 tgl 1248 GIC 1649 : outer_path = (Path *) create_unique_path(root, outerrel,
1249 : outer_path, extra->sjinfo);
1250 1649 : Assert(outer_path);
7384 1251 1649 : jointype = JOIN_INNER;
1252 : }
1253 220910 : else if (jointype == JOIN_UNIQUE_INNER)
7384 tgl 1254 ECB : {
5351 tgl 1255 GIC 1649 : inner_path = (Path *) create_unique_path(root, innerrel,
1256 : inner_path, extra->sjinfo);
5351 tgl 1257 CBC 1649 : Assert(inner_path);
7384 1258 1649 : jointype = JOIN_INNER;
1259 : }
1260 :
1261 : /*
1262 : * If the joinrel is parallel-safe, we may be able to consider a partial
1263 : * merge join. However, we can't handle JOIN_UNIQUE_OUTER, because the
1264 : * outer path will be partial, and therefore we won't be able to properly
1265 : * guarantee uniqueness. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT
1266 : * and JOIN_RIGHT_ANTI, because they can produce false null extended rows.
1267 : * Also, the resulting path must not be parameterized.
1268 : */
2224 rhaas 1269 GIC 222559 : if (joinrel->consider_parallel &&
1270 186444 : save_jointype != JOIN_UNIQUE_OUTER &&
1271 185230 : save_jointype != JOIN_FULL &&
1272 153642 : save_jointype != JOIN_RIGHT &&
4 tgl 1273 GNC 153079 : save_jointype != JOIN_RIGHT_ANTI &&
2224 rhaas 1274 GIC 153079 : outerrel->partial_pathlist != NIL &&
1275 4759 : bms_is_empty(joinrel->lateral_relids))
2224 rhaas 1276 ECB : {
2224 rhaas 1277 CBC 4759 : cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist);
1278 :
2224 rhaas 1279 GIC 4759 : if (inner_path->parallel_safe)
1280 4585 : cheapest_safe_inner = inner_path;
1281 174 : else if (save_jointype != JOIN_UNIQUE_INNER)
1282 : cheapest_safe_inner =
1283 171 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
1284 : }
2224 rhaas 1285 ECB :
8637 tgl 1286 : /*
6385 bruce 1287 : * Each possible ordering of the available mergejoin clauses will generate
1288 : * a differently-sorted result path at essentially the same cost. We have
1289 : * no basis for choosing one over another at this level of joining, but
1290 : * some sort orders may be more useful than others for higher-level
1291 : * mergejoins, so it's worth considering multiple orderings.
1292 : *
8151 tgl 1293 : * Actually, it's not quite true that every mergeclause ordering will
1294 : * generate a different path order, because some of the clauses may be
3260 bruce 1295 : * partially redundant (refer to the same EquivalenceClasses). Therefore,
1296 : * what we do is convert the mergeclause list to a list of canonical
5923 tgl 1297 : * pathkeys, and then consider different orderings of the pathkeys.
8151 1298 : *
1299 : * Generating a path for *every* permutation of the pathkeys doesn't seem
8053 bruce 1300 : * like a winning strategy; the cost in planning time is too high. For
1301 : * now, we generate one path for each pathkey, listing that pathkey first
6385 1302 : * and the rest in random order. This should allow at least a one-clause
1303 : * mergejoin without re-sorting against any other possible mergejoin
1304 : * partner path. But if we've not guessed the right ordering of secondary
1305 : * keys, we may end up evaluating clauses as qpquals when they could have
1306 : * been done as mergeclauses. (In practice, it's rare that there's more
1307 : * than two or three mergeclauses, so expending a huge amount of thought
1308 : * on that is probably not worth it.)
1309 : *
1310 : * The pathkey order returned by select_outer_pathkeys_for_merge() has
1311 : * some heuristics behind it (see that function), so be sure to try it
1312 : * exactly as-is as well as making variants.
1313 : */
5923 tgl 1314 GIC 222559 : all_pathkeys = select_outer_pathkeys_for_merge(root,
1315 : extra->mergeclause_list,
5923 tgl 1316 ECB : joinrel);
8151 1317 :
6892 neilc 1318 CBC 427723 : foreach(l, all_pathkeys)
9345 bruce 1319 ECB : {
332 tgl 1320 CBC 205164 : PathKey *front_pathkey = (PathKey *) lfirst(l);
8151 tgl 1321 ECB : List *cur_mergeclauses;
8397 bruce 1322 : List *outerkeys;
1323 : List *innerkeys;
1324 : List *merge_pathkeys;
1325 :
5923 tgl 1326 : /* Make a pathkey list with this guy first */
6892 neilc 1327 CBC 205164 : if (l != list_head(all_pathkeys))
5923 tgl 1328 18867 : outerkeys = lcons(front_pathkey,
1329 : list_delete_nth_cell(list_copy(all_pathkeys),
899 drowley 1330 ECB : foreach_current_index(l)));
1331 : else
5624 bruce 1332 GIC 186297 : outerkeys = all_pathkeys; /* no work at first one... */
1333 :
1334 : /* Sort the mergeclauses into the corresponding ordering */
1335 : cur_mergeclauses =
1871 tgl 1336 205164 : find_mergeclauses_for_outer_pathkeys(root,
1337 : outerkeys,
1338 : extra->mergeclause_list);
1339 :
1340 : /* Should have used them all... */
2891 1341 205164 : Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));
1342 :
1343 : /* Build sort pathkeys for the inner side */
5923 1344 205164 : innerkeys = make_inner_pathkeys_for_merge(root,
1345 : cur_mergeclauses,
1346 : outerkeys);
1347 :
1348 : /* Build pathkeys representing output sort order */
6650 1349 205164 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1350 : outerkeys);
1351 :
1352 : /*
1353 : * And now we can make the path.
1354 : *
1355 : * Note: it's possible that the cheapest paths will already be sorted
1356 : * properly. try_mergejoin_path will detect that case and suppress an
1357 : * explicit sort step, so we needn't do so here.
1358 : */
4090 1359 205164 : try_mergejoin_path(root,
1360 : joinrel,
4090 tgl 1361 ECB : outer_path,
1362 : inner_path,
1363 : merge_pathkeys,
1364 : cur_mergeclauses,
1365 : outerkeys,
1366 : innerkeys,
2891 1367 : jointype,
1368 : extra,
1369 : false);
1370 :
1371 : /*
1372 : * If we have partial outer and parallel safe inner path then try
1373 : * partial mergejoin path.
2224 rhaas 1374 : */
2224 rhaas 1375 CBC 205164 : if (cheapest_partial_outer && cheapest_safe_inner)
2224 rhaas 1376 GIC 5958 : try_partial_mergejoin_path(root,
1377 : joinrel,
1378 : cheapest_partial_outer,
2224 rhaas 1379 ECB : cheapest_safe_inner,
1380 : merge_pathkeys,
1381 : cur_mergeclauses,
1382 : outerkeys,
1383 : innerkeys,
1384 : jointype,
1385 : extra);
1386 : }
1387 : }
9770 scrappy 1388 :
1389 : /*
1390 : * generate_mergejoin_paths
2300 rhaas 1391 : * Creates possible mergejoin paths for input outerpath.
1392 : *
1393 : * We generate mergejoins if mergejoin clauses are available. We have
1394 : * two ways to generate the inner path for a mergejoin: sort the cheapest
1395 : * inner path, or use an inner path that is already suitably ordered for the
1396 : * merge. If we have several mergeclauses, it could be that there is no inner
1397 : * path (or only a very expensive one) for the full list of mergeclauses, but
1398 : * better paths exist if we truncate the mergeclause list (thereby discarding
1399 : * some sort key requirements). So, we consider truncations of the
1400 : * mergeclause list as well as the full list. (Ideally we'd consider all
1401 : * subsets of the mergeclause list, but that seems way too expensive.)
1402 : */
1403 : static void
2300 rhaas 1404 GIC 427481 : generate_mergejoin_paths(PlannerInfo *root,
1405 : RelOptInfo *joinrel,
2300 rhaas 1406 ECB : RelOptInfo *innerrel,
1407 : Path *outerpath,
1408 : JoinType jointype,
1409 : JoinPathExtraData *extra,
1410 : bool useallclauses,
1411 : Path *inner_cheapest_total,
1412 : List *merge_pathkeys,
1413 : bool is_partial)
1414 : {
1415 : List *mergeclauses;
1416 : List *innersortkeys;
1417 : List *trialsortkeys;
1418 : Path *cheapest_startup_inner;
1419 : Path *cheapest_total_inner;
2300 rhaas 1420 GIC 427481 : JoinType save_jointype = jointype;
1421 : int num_sortkeys;
2300 rhaas 1422 ECB : int sortkeycnt;
1423 :
2300 rhaas 1424 GIC 427481 : if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
1425 2847 : jointype = JOIN_INNER;
1426 :
1427 : /* Look for useful mergeclauses (if any) */
1428 : mergeclauses =
1871 tgl 1429 427481 : find_mergeclauses_for_outer_pathkeys(root,
1430 : outerpath->pathkeys,
1431 : extra->mergeclause_list);
1432 :
1433 : /*
1434 : * Done with this outer path if no chance for a mergejoin.
1435 : *
1436 : * Special corner case: for "x FULL JOIN y ON true", there will be no join
1437 : * clauses at all. Ordinarily we'd generate a clauseless nestloop path,
1438 : * but since mergejoin is our only join type that supports FULL JOIN
1439 : * without any join clauses, it's necessary to generate a clauseless
1440 : * mergejoin path instead.
1441 : */
2300 rhaas 1442 427481 : if (mergeclauses == NIL)
1443 : {
1444 282365 : if (jointype == JOIN_FULL)
1445 : /* okay to try for mergejoin */ ;
1446 : else
1447 280799 : return;
1448 : }
1449 181972 : if (useallclauses &&
1450 35290 : list_length(mergeclauses) != list_length(extra->mergeclause_list))
2300 rhaas 1451 CBC 4458 : return;
1452 :
1453 : /* Compute the required ordering of the inner path */
2300 rhaas 1454 GIC 142224 : innersortkeys = make_inner_pathkeys_for_merge(root,
1455 : mergeclauses,
1456 : outerpath->pathkeys);
1457 :
1458 : /*
1459 : * Generate a mergejoin on the basis of sorting the cheapest inner. Since
1460 : * a sort will be needed, only cheapest total cost matters. (But
1461 : * try_mergejoin_path will do the right thing if inner_cheapest_total is
1462 : * already correctly sorted.)
1463 : */
1464 142224 : try_mergejoin_path(root,
1465 : joinrel,
1466 : outerpath,
2300 rhaas 1467 ECB : inner_cheapest_total,
1468 : merge_pathkeys,
1469 : mergeclauses,
1470 : NIL,
1471 : innersortkeys,
1472 : jointype,
1473 : extra,
1474 : is_partial);
1475 :
1476 : /* Can't do anything else if inner path needs to be unique'd */
2300 rhaas 1477 GIC 142224 : if (save_jointype == JOIN_UNIQUE_INNER)
1478 588 : return;
1479 :
1480 : /*
1481 : * Look for presorted inner paths that satisfy the innersortkey list ---
1482 : * or any truncation thereof, if we are allowed to build a mergejoin using
1483 : * a subset of the merge clauses. Here, we consider both cheap startup
1484 : * cost and cheap total cost.
1485 : *
1486 : * Currently we do not consider parameterized inner paths here. This
1487 : * interacts with decisions elsewhere that also discriminate against
1488 : * mergejoins with parameterized inputs; see comments in
2300 rhaas 1489 ECB : * src/backend/optimizer/README.
1490 : *
1491 : * As we shorten the sortkey list, we should consider only paths that are
1492 : * strictly cheaper than (in particular, not the same as) any path found
1493 : * in an earlier iteration. Otherwise we'd be intentionally using fewer
1494 : * merge keys than a given path allows (treating the rest as plain
1495 : * joinquals), which is unlikely to be a good idea. Also, eliminating
1496 : * paths here on the basis of compare_path_costs is a lot cheaper than
1497 : * building the mergejoin path only to throw it away.
1498 : *
1499 : * If inner_cheapest_total is well enough sorted to have not required a
1500 : * sort in the path made above, we shouldn't make a duplicate path with
1501 : * it, either. We handle that case with the same logic that handles the
1502 : * previous consideration, by initializing the variables that track
1503 : * cheapest-so-far properly. Note that we do NOT reject
1504 : * inner_cheapest_total if we find it matches some shorter set of
1505 : * pathkeys. That case corresponds to using fewer mergekeys to avoid
1506 : * sorting inner_cheapest_total, whereas we did sort it above, so the
1507 : * plans being considered are different.
1508 : */
2300 rhaas 1509 GIC 141636 : if (pathkeys_contained_in(innersortkeys,
1510 : inner_cheapest_total->pathkeys))
2300 rhaas 1511 ECB : {
1512 : /* inner_cheapest_total didn't require a sort */
2300 rhaas 1513 GIC 3064 : cheapest_startup_inner = inner_cheapest_total;
1514 3064 : cheapest_total_inner = inner_cheapest_total;
1515 : }
1516 : else
1517 : {
1518 : /* it did require a sort, at least for the full set of keys */
1519 138572 : cheapest_startup_inner = NULL;
1520 138572 : cheapest_total_inner = NULL;
1521 : }
1522 141636 : num_sortkeys = list_length(innersortkeys);
1523 141636 : if (num_sortkeys > 1 && !useallclauses)
2118 tgl 1524 CBC 4255 : trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
2300 rhaas 1525 ECB : else
2300 rhaas 1526 GIC 137381 : trialsortkeys = innersortkeys; /* won't really truncate */
1527 :
1528 256763 : for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
1529 : {
1530 : Path *innerpath;
1531 145893 : List *newclauses = NIL;
1532 :
1533 : /*
1534 : * Look for an inner path ordered well enough for the first
1535 : * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
1536 : * destructively, which is why we made a copy...
1537 : */
1538 145893 : trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
1539 145893 : innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1540 : trialsortkeys,
1541 : NULL,
1542 : TOTAL_COST,
1543 : is_partial);
1544 145893 : if (innerpath != NULL &&
1545 5387 : (cheapest_total_inner == NULL ||
1546 5387 : compare_path_costs(innerpath, cheapest_total_inner,
1547 : TOTAL_COST) < 0))
1548 : {
1549 : /* Found a cheap (or even-cheaper) sorted path */
1550 : /* Select the right mergeclauses, if we didn't already */
1551 88166 : if (sortkeycnt < num_sortkeys)
1552 : {
1553 : newclauses =
1871 tgl 1554 1199 : trim_mergeclauses_for_inner_pathkeys(root,
1555 : mergeclauses,
1871 tgl 1556 ECB : trialsortkeys);
2300 rhaas 1557 GIC 1199 : Assert(newclauses != NIL);
1558 : }
1559 : else
2300 rhaas 1560 CBC 86967 : newclauses = mergeclauses;
1561 88166 : try_mergejoin_path(root,
1562 : joinrel,
1563 : outerpath,
1564 : innerpath,
1565 : merge_pathkeys,
2300 rhaas 1566 ECB : newclauses,
1567 : NIL,
1568 : NIL,
1569 : jointype,
2224 1570 : extra,
1571 : is_partial);
2300 rhaas 1572 GIC 88166 : cheapest_total_inner = innerpath;
2300 rhaas 1573 ECB : }
1574 : /* Same on the basis of cheapest startup cost ... */
2300 rhaas 1575 CBC 145893 : innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1576 : trialsortkeys,
1577 : NULL,
2224 rhaas 1578 ECB : STARTUP_COST,
1579 : is_partial);
2300 rhaas 1580 GIC 145893 : if (innerpath != NULL &&
1581 5387 : (cheapest_startup_inner == NULL ||
1582 5387 : compare_path_costs(innerpath, cheapest_startup_inner,
1583 : STARTUP_COST) < 0))
1584 : {
2300 rhaas 1585 ECB : /* Found a cheap (or even-cheaper) sorted path */
2300 rhaas 1586 CBC 87526 : if (innerpath != cheapest_total_inner)
1587 : {
1588 : /*
1589 : * Avoid rebuilding clause list if we already made one; saves
1590 : * memory in big join trees...
2300 rhaas 1591 ECB : */
2300 rhaas 1592 CBC 504 : if (newclauses == NIL)
2300 rhaas 1593 ECB : {
2300 rhaas 1594 GIC 9 : if (sortkeycnt < num_sortkeys)
1595 : {
1596 : newclauses =
1871 tgl 1597 UIC 0 : trim_mergeclauses_for_inner_pathkeys(root,
1871 tgl 1598 ECB : mergeclauses,
1599 : trialsortkeys);
2300 rhaas 1600 UIC 0 : Assert(newclauses != NIL);
2300 rhaas 1601 ECB : }
1602 : else
2300 rhaas 1603 GIC 9 : newclauses = mergeclauses;
2300 rhaas 1604 ECB : }
2300 rhaas 1605 GIC 504 : try_mergejoin_path(root,
1606 : joinrel,
2300 rhaas 1607 ECB : outerpath,
1608 : innerpath,
1609 : merge_pathkeys,
1610 : newclauses,
1611 : NIL,
1612 : NIL,
1613 : jointype,
1614 : extra,
1615 : is_partial);
1616 : }
2300 rhaas 1617 GIC 87526 : cheapest_startup_inner = innerpath;
1618 : }
2300 rhaas 1619 ECB :
1620 : /*
1621 : * Don't consider truncated sortkeys if we need all clauses.
1622 : */
2300 rhaas 1623 GIC 145893 : if (useallclauses)
1624 30766 : break;
1625 : }
1626 : }
2300 rhaas 1627 ECB :
9345 bruce 1628 : /*
8821 1629 : * match_unsorted_outer
1630 : * Creates possible join paths for processing a single join relation
1631 : * 'joinrel' by employing either iterative substitution or
1632 : * mergejoining on each of its possible outer paths (considering
8637 tgl 1633 : * only outer paths that are already ordered well enough for merging).
1634 : *
1635 : * We always generate a nestloop path for each available outer path.
1636 : * In fact we may generate as many as five: one on the cheapest-total-cost
1637 : * inner path, one on the same with materialization, one on the
1638 : * cheapest-startup-cost inner path (if different), one on the
5801 1639 : * cheapest-total inner-indexscan path (if any), and one on the
1640 : * cheapest-startup inner-indexscan path (if different).
8637 1641 : *
1642 : * We also consider mergejoins if mergejoin clauses are available. See
1643 : * detailed comments in generate_mergejoin_paths.
9345 bruce 1644 EUB : *
1645 : * 'joinrel' is the join relation
1646 : * 'outerrel' is the outer join relation
9770 scrappy 1647 : * 'innerrel' is the inner join relation
1648 : * 'jointype' is the type of join to do
1649 : * 'extra' contains additional input values
9770 scrappy 1650 ECB : */
1651 : static void
6517 tgl 1652 CBC 230857 : match_unsorted_outer(PlannerInfo *root,
1653 : RelOptInfo *joinrel,
1654 : RelOptInfo *outerrel,
1655 : RelOptInfo *innerrel,
1656 : JoinType jointype,
1657 : JoinPathExtraData *extra)
1658 : {
7384 tgl 1659 GIC 230857 : JoinType save_jointype = jointype;
1660 : bool nestjoinOK;
1661 : bool useallclauses;
1662 230857 : Path *inner_cheapest_total = innerrel->cheapest_total_path;
7435 1663 230857 : Path *matpath = NULL;
4090 tgl 1664 ECB : ListCell *lc1;
1665 :
1666 : /*
1667 : * Nestloop only supports inner, left, semi, and anti joins. Also, if we
1668 : * are doing a right, right-anti or full mergejoin, we must use *all* the
1669 : * mergeclauses as join clauses, else we will not have a valid plan.
1670 : * (Although these two flags are currently inverses, keep them separate
1671 : * for clarity and possible future changes.)
1672 : */
8244 tgl 1673 GIC 230857 : switch (jointype)
1674 : {
1675 189080 : case JOIN_INNER:
1676 : case JOIN_LEFT:
1677 : case JOIN_SEMI:
1678 : case JOIN_ANTI:
1679 189080 : nestjoinOK = true;
8029 1680 189080 : useallclauses = false;
8244 1681 189080 : break;
8029 1682 38479 : case JOIN_RIGHT:
1683 : case JOIN_RIGHT_ANTI:
1684 : case JOIN_FULL:
8244 1685 38479 : nestjoinOK = false;
8029 1686 38479 : useallclauses = true;
1687 38479 : break;
5351 1688 3298 : case JOIN_UNIQUE_OUTER:
1689 : case JOIN_UNIQUE_INNER:
1690 3298 : jointype = JOIN_INNER;
1691 3298 : nestjoinOK = true;
1692 3298 : useallclauses = false;
1693 3298 : break;
8029 tgl 1694 UIC 0 : default:
7198 1695 0 : elog(ERROR, "unrecognized join type: %d",
1696 : (int) jointype);
1697 : nestjoinOK = false; /* keep compiler quiet */
1698 : useallclauses = false;
1699 : break;
8244 tgl 1700 ECB : }
1701 :
1702 : /*
1703 : * If inner_cheapest_total is parameterized by the outer rel, ignore it;
1704 : * we will consider it below as a member of cheapest_parameterized_paths,
1705 : * but the other possibilities considered in this routine aren't usable.
1706 : */
3875 tgl 1707 CBC 230857 : if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
3875 tgl 1708 GIC 4152 : inner_cheapest_total = NULL;
1709 :
7384 tgl 1710 ECB : /*
7188 bruce 1711 : * If we need to unique-ify the inner path, we will consider only the
1712 : * cheapest-total inner.
1713 : */
5351 tgl 1714 GIC 230857 : if (save_jointype == JOIN_UNIQUE_INNER)
1715 : {
1716 : /* No way to do this with an inner path parameterized by outer rel */
3897 1717 1649 : if (inner_cheapest_total == NULL)
3897 tgl 1718 UIC 0 : return;
1719 : inner_cheapest_total = (Path *)
2891 tgl 1720 GIC 1649 : create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo);
5351 tgl 1721 CBC 1649 : Assert(inner_cheapest_total);
1722 : }
7384 1723 229208 : else if (nestjoinOK)
1724 : {
1725 : /*
1726 : * Consider materializing the cheapest inner path, unless
4738 rhaas 1727 ECB : * enable_material is off or the path in question materializes its
1728 : * output anyway.
7435 tgl 1729 : */
3897 tgl 1730 CBC 190729 : if (enable_material && inner_cheapest_total != NULL &&
4738 rhaas 1731 GIC 186364 : !ExecMaterializesOutput(inner_cheapest_total->pathtype))
1732 : matpath = (Path *)
7384 tgl 1733 CBC 177635 : create_material_path(innerrel, inner_cheapest_total);
7435 tgl 1734 ECB : }
9345 bruce 1735 :
4090 tgl 1736 CBC 747415 : foreach(lc1, outerrel->pathlist)
1737 : {
1738 516558 : Path *outerpath = (Path *) lfirst(lc1);
8637 tgl 1739 ECB : List *merge_pathkeys;
1740 :
4090 1741 : /*
4090 tgl 1742 EUB : * We cannot use an outer path that is parameterized by the inner rel.
1743 : */
3875 tgl 1744 GIC 516558 : if (PATH_PARAM_BY_REL(outerpath, innerrel))
4090 1745 89298 : continue;
1746 :
1747 : /*
1748 : * If we need to unique-ify the outer path, it's pointless to consider
1749 : * any but the cheapest outer. (XXX we don't consider parameterized
1750 : * outers, nor inners, for unique-ified cases. Should we?)
1751 : */
7384 1752 427260 : if (save_jointype == JOIN_UNIQUE_OUTER)
1753 : {
1754 1928 : if (outerpath != outerrel->cheapest_total_path)
7384 tgl 1755 CBC 279 : continue;
5351 1756 1649 : outerpath = (Path *) create_unique_path(root, outerrel,
1757 : outerpath, extra->sjinfo);
5351 tgl 1758 GIC 1649 : Assert(outerpath);
1759 : }
1760 :
1761 : /*
6385 bruce 1762 ECB : * The result will have this sort order (even if it is implemented as
1763 : * a nestloop, and even if some of the mergeclauses are implemented by
1764 : * qpquals rather than as true mergeclauses):
8637 tgl 1765 : */
6650 tgl 1766 GBC 426981 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1767 : outerpath->pathkeys);
8637 tgl 1768 ECB :
4090 tgl 1769 CBC 426981 : if (save_jointype == JOIN_UNIQUE_INNER)
1770 : {
4090 tgl 1771 ECB : /*
1772 : * Consider nestloop join, but only with the unique-ified cheapest
1773 : * inner path
1774 : */
4090 tgl 1775 GIC 2424 : try_nestloop_path(root,
1776 : joinrel,
1777 : outerpath,
4090 tgl 1778 ECB : inner_cheapest_total,
2891 1779 : merge_pathkeys,
1780 : jointype,
1781 : extra);
1782 : }
4090 tgl 1783 GIC 424557 : else if (nestjoinOK)
8244 tgl 1784 ECB : {
1785 : /*
4090 1786 : * Consider nestloop joins using this outer path and various
1787 : * available paths for the inner relation. We consider the
1788 : * cheapest-total paths for each available parameterization of the
1789 : * inner relation, including the unparameterized case.
1790 : */
1791 : ListCell *lc2;
1792 :
4090 tgl 1793 CBC 923536 : foreach(lc2, innerrel->cheapest_parameterized_paths)
1794 : {
4090 tgl 1795 GIC 573437 : Path *innerpath = (Path *) lfirst(lc2);
1796 : Path *mpath;
1797 :
1798 573437 : try_nestloop_path(root,
1799 : joinrel,
4090 tgl 1800 ECB : outerpath,
1801 : innerpath,
2891 1802 : merge_pathkeys,
1803 : jointype,
1804 : extra);
1805 :
737 drowley 1806 : /*
1807 : * Try generating a memoize path and see if that makes the
1808 : * nested loop any cheaper.
1809 : */
634 drowley 1810 GIC 573437 : mpath = get_memoize_path(root, innerrel, outerrel,
1811 : innerpath, outerpath, jointype,
1812 : extra);
1813 573437 : if (mpath != NULL)
737 drowley 1814 CBC 93737 : try_nestloop_path(root,
1815 : joinrel,
1816 : outerpath,
634 drowley 1817 ECB : mpath,
1818 : merge_pathkeys,
1819 : jointype,
1820 : extra);
1821 : }
1822 :
4090 tgl 1823 : /* Also consider materialized form of the cheapest inner path */
7435 tgl 1824 GIC 350099 : if (matpath != NULL)
4090 1825 328765 : try_nestloop_path(root,
1826 : joinrel,
1827 : outerpath,
1828 : matpath,
1829 : merge_pathkeys,
1830 : jointype,
2891 tgl 1831 ECB : extra);
1832 : }
1833 :
1834 : /* Can't do anything else if outer path needs to be unique'd */
7384 tgl 1835 GIC 426981 : if (save_jointype == JOIN_UNIQUE_OUTER)
1836 1649 : continue;
1837 :
1838 : /* Can't do anything else if inner rel is parameterized by outer */
3875 1839 425332 : if (inner_cheapest_total == NULL)
3897 1840 4293 : continue;
3897 tgl 1841 ECB :
1842 : /* Generate merge join paths */
2300 rhaas 1843 CBC 421039 : generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
1844 : save_jointype, extra, useallclauses,
1845 : inner_cheapest_total, merge_pathkeys,
2224 rhaas 1846 ECB : false);
1847 : }
1848 :
1849 : /*
1850 : * Consider partial nestloop and mergejoin plan if outerrel has any
1851 : * partial path and the joinrel is parallel-safe. However, we can't
1852 : * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
1853 : * therefore we won't be able to properly guarantee uniqueness. Nor can
1854 : * we handle joins needing lateral rels, since partial paths must not be
1855 : * parameterized. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT and
1856 : * JOIN_RIGHT_ANTI, because they can produce false null extended rows.
1857 : */
2224 rhaas 1858 CBC 230857 : if (joinrel->consider_parallel &&
2636 rhaas 1859 GIC 188649 : save_jointype != JOIN_UNIQUE_OUTER &&
2224 1860 187435 : save_jointype != JOIN_FULL &&
2224 rhaas 1861 CBC 155703 : save_jointype != JOIN_RIGHT &&
4 tgl 1862 GNC 155140 : save_jointype != JOIN_RIGHT_ANTI &&
2224 rhaas 1863 CBC 155140 : outerrel->partial_pathlist != NIL &&
2636 rhaas 1864 GIC 4828 : bms_is_empty(joinrel->lateral_relids))
1865 : {
2224 1866 4828 : if (nestjoinOK)
1867 4828 : consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
1868 : save_jointype, extra);
1869 :
1870 : /*
1871 : * If inner_cheapest_total is NULL or non parallel-safe then find the
1872 : * cheapest total parallel safe path. If doing JOIN_UNIQUE_INNER, we
2224 rhaas 1873 ECB : * can't use any alternative inner path.
1874 : */
2224 rhaas 1875 GIC 4828 : if (inner_cheapest_total == NULL ||
1876 4759 : !inner_cheapest_total->parallel_safe)
1877 : {
1878 243 : if (save_jointype == JOIN_UNIQUE_INNER)
1879 3 : return;
1880 :
1165 alvherre 1881 240 : inner_cheapest_total = get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
1882 : }
1883 :
2224 rhaas 1884 CBC 4825 : if (inner_cheapest_total)
1885 4753 : consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
1886 : save_jointype, extra,
1887 : inner_cheapest_total);
2224 rhaas 1888 ECB : }
1889 : }
1890 :
1891 : /*
1892 : * consider_parallel_mergejoin
1893 : * Try to build partial paths for a joinrel by joining a partial path
1894 : * for the outer relation to a complete path for the inner relation.
1895 : *
1896 : * 'joinrel' is the join relation
1897 : * 'outerrel' is the outer join relation
1898 : * 'innerrel' is the inner join relation
1899 : * 'jointype' is the type of join to do
1900 : * 'extra' contains additional input values
1901 : * 'inner_cheapest_total' cheapest total path for innerrel
1902 : */
1903 : static void
2224 rhaas 1904 GIC 4753 : consider_parallel_mergejoin(PlannerInfo *root,
1905 : RelOptInfo *joinrel,
1906 : RelOptInfo *outerrel,
2224 rhaas 1907 ECB : RelOptInfo *innerrel,
1908 : JoinType jointype,
1909 : JoinPathExtraData *extra,
1910 : Path *inner_cheapest_total)
1911 : {
1912 : ListCell *lc1;
1913 :
1914 : /* generate merge join path for each partial outer path */
2224 rhaas 1915 CBC 11195 : foreach(lc1, outerrel->partial_pathlist)
2224 rhaas 1916 ECB : {
2224 rhaas 1917 GIC 6442 : Path *outerpath = (Path *) lfirst(lc1);
1918 : List *merge_pathkeys;
1919 :
1920 : /*
1921 : * Figure out what useful ordering any paths we create will have.
1922 : */
1923 6442 : merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
2224 rhaas 1924 ECB : outerpath->pathkeys);
1925 :
2224 rhaas 1926 GIC 6442 : generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype,
2224 rhaas 1927 ECB : extra, false, inner_cheapest_total,
1928 : merge_pathkeys, true);
1929 : }
2636 rhaas 1930 CBC 4753 : }
1931 :
1932 : /*
2636 rhaas 1933 ECB : * consider_parallel_nestloop
1934 : * Try to build partial paths for a joinrel by joining a partial path for the
1935 : * outer relation to a complete path for the inner relation.
1936 : *
1937 : * 'joinrel' is the join relation
1938 : * 'outerrel' is the outer join relation
1939 : * 'innerrel' is the inner join relation
1940 : * 'jointype' is the type of join to do
1941 : * 'extra' contains additional input values
1942 : */
1943 : static void
2636 rhaas 1944 GIC 4828 : consider_parallel_nestloop(PlannerInfo *root,
1945 : RelOptInfo *joinrel,
1946 : RelOptInfo *outerrel,
1947 : RelOptInfo *innerrel,
1948 : JoinType jointype,
1949 : JoinPathExtraData *extra)
1950 : {
2322 tgl 1951 4828 : JoinType save_jointype = jointype;
1952 : ListCell *lc1;
2636 rhaas 1953 ECB :
2322 tgl 1954 GIC 4828 : if (jointype == JOIN_UNIQUE_INNER)
1955 282 : jointype = JOIN_INNER;
1956 :
2636 rhaas 1957 11369 : foreach(lc1, outerrel->partial_pathlist)
1958 : {
1959 6541 : Path *outerpath = (Path *) lfirst(lc1);
1960 : List *pathkeys;
1961 : ListCell *lc2;
1962 :
1963 : /* Figure out what useful ordering any paths we create will have. */
2636 rhaas 1964 CBC 6541 : pathkeys = build_join_pathkeys(root, joinrel, jointype,
1965 : outerpath->pathkeys);
2636 rhaas 1966 ECB :
1967 : /*
1968 : * Try the cheapest parameterized paths; only those which will produce
1969 : * an unparameterized path when joined to this outerrel will survive
1970 : * try_partial_nestloop_path. The cheapest unparameterized path is
1971 : * also in this list.
1972 : */
2636 rhaas 1973 GIC 17103 : foreach(lc2, innerrel->cheapest_parameterized_paths)
1974 : {
2636 rhaas 1975 CBC 10562 : Path *innerpath = (Path *) lfirst(lc2);
1976 : Path *mpath;
1977 :
1978 : /* Can't join to an inner path that is not parallel-safe */
1979 10562 : if (!innerpath->parallel_safe)
2636 rhaas 1980 GIC 198 : continue;
1981 :
1982 : /*
1983 : * If we're doing JOIN_UNIQUE_INNER, we can only use the inner's
1984 : * cheapest_total_path, and we have to unique-ify it. (We might
1985 : * be able to relax this to allow other safe, unparameterized
1986 : * inner paths, but right now create_unique_path is not on board
1987 : * with that.)
1988 : */
2322 tgl 1989 10364 : if (save_jointype == JOIN_UNIQUE_INNER)
1990 : {
1991 861 : if (innerpath != innerrel->cheapest_total_path)
2636 rhaas 1992 438 : continue;
2636 rhaas 1993 CBC 423 : innerpath = (Path *) create_unique_path(root, innerrel,
1994 : innerpath,
1995 : extra->sjinfo);
2629 rhaas 1996 GIC 423 : Assert(innerpath);
1997 : }
1998 :
2636 1999 9926 : try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
2636 rhaas 2000 ECB : pathkeys, jointype, extra);
2001 :
2002 : /*
634 drowley 2003 : * Try generating a memoize path and see if that makes the nested
2004 : * loop any cheaper.
2005 : */
634 drowley 2006 CBC 9926 : mpath = get_memoize_path(root, innerrel, outerrel,
2007 : innerpath, outerpath, jointype,
634 drowley 2008 ECB : extra);
634 drowley 2009 GIC 9926 : if (mpath != NULL)
2010 2370 : try_partial_nestloop_path(root, joinrel, outerpath, mpath,
2011 : pathkeys, jointype, extra);
2012 : }
2636 rhaas 2013 ECB : }
9770 scrappy 2014 GIC 4828 : }
2015 :
2016 : /*
2017 : * hash_inner_and_outer
2018 : * Create hashjoin join paths by explicitly hashing both the outer and
2019 : * inner keys of each available hash clause.
2020 : *
2021 : * 'joinrel' is the join relation
9770 scrappy 2022 ECB : * 'outerrel' is the outer join relation
2023 : * 'innerrel' is the inner join relation
8244 tgl 2024 : * 'jointype' is the type of join to do
2025 : * 'extra' contains additional input values
2026 : */
2027 : static void
6517 tgl 2028 CBC 232482 : hash_inner_and_outer(PlannerInfo *root,
8647 tgl 2029 ECB : RelOptInfo *joinrel,
2030 : RelOptInfo *outerrel,
2031 : RelOptInfo *innerrel,
2032 : JoinType jointype,
2033 : JoinPathExtraData *extra)
2034 : {
2322 tgl 2035 GIC 232482 : JoinType save_jointype = jointype;
4483 2036 232482 : bool isouterjoin = IS_OUTER_JOIN(jointype);
2037 : List *hashclauses;
6892 neilc 2038 ECB : ListCell *l;
2039 :
8462 tgl 2040 : /*
4090 2041 : * We need to build only one hashclauses list for any given pair of outer
2042 : * and inner relations; all of the hashable clauses will be used as keys.
2043 : *
2044 : * Scan the join's restrictinfo list to find hashjoinable clauses that are
6385 bruce 2045 : * usable with this pair of sub-relations.
2046 : */
7435 tgl 2047 GIC 232482 : hashclauses = NIL;
2891 tgl 2048 CBC 475742 : foreach(l, extra->restrictlist)
2049 : {
6892 neilc 2050 GIC 243260 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
2051 :
2052 : /*
2053 : * If processing an outer join, only use its own join clauses for
2054 : * hashing. For inner joins we need not be so picky.
8244 tgl 2055 ECB : */
1815 tgl 2056 GIC 243260 : if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
8244 2057 7258 : continue;
8244 tgl 2058 ECB :
4951 tgl 2059 CBC 236002 : if (!restrictinfo->can_join ||
4951 tgl 2060 GIC 214203 : restrictinfo->hashjoinoperator == InvalidOid)
2061 28460 : continue; /* not hashjoinable */
2062 :
8151 tgl 2063 ECB : /*
2064 : * Check if clause has the form "outer op inner" or "inner op outer".
2065 : */
4950 tgl 2066 GIC 207542 : if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
8462 2067 66 : continue; /* no good for these input relations */
2068 :
7435 2069 207476 : hashclauses = lappend(hashclauses, restrictinfo);
2070 : }
2071 :
2072 : /* If we found any usable hashclauses, make paths */
2073 232482 : if (hashclauses)
2074 : {
2075 : /*
2076 : * We consider both the cheapest-total-cost and cheapest-startup-cost
6385 bruce 2077 ECB : * outer paths. There's no need to consider any but the
2078 : * cheapest-total-cost inner path, however.
2079 : */
7188 bruce 2080 GIC 188608 : Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
2081 188608 : Path *cheapest_total_outer = outerrel->cheapest_total_path;
2082 188608 : Path *cheapest_total_inner = innerrel->cheapest_total_path;
2083 :
3875 tgl 2084 ECB : /*
2085 : * If either cheapest-total path is parameterized by the other rel, we
2086 : * can't use a hashjoin. (There's no use looking for alternative
2087 : * input paths, since these should already be the least-parameterized
2088 : * available paths.)
2089 : */
3875 tgl 2090 GIC 188608 : if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
2091 188417 : PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
3897 2092 382 : return;
2093 :
2094 : /* Unique-ify if need be; we ignore parameterized possibilities */
7384 2095 188226 : if (jointype == JOIN_UNIQUE_OUTER)
7384 tgl 2096 ECB : {
2097 : cheapest_total_outer = (Path *)
5351 tgl 2098 GIC 809 : create_unique_path(root, outerrel,
2891 tgl 2099 ECB : cheapest_total_outer, extra->sjinfo);
5351 tgl 2100 GIC 809 : Assert(cheapest_total_outer);
7384 2101 809 : jointype = JOIN_INNER;
4090 2102 809 : try_hashjoin_path(root,
2103 : joinrel,
2104 : cheapest_total_outer,
4090 tgl 2105 ECB : cheapest_total_inner,
2891 2106 : hashclauses,
2107 : jointype,
2108 : extra);
4090 2109 : /* no possibility of cheap startup here */
7384 2110 : }
7384 tgl 2111 GIC 187417 : else if (jointype == JOIN_UNIQUE_INNER)
2112 : {
2113 : cheapest_total_inner = (Path *)
5351 2114 809 : create_unique_path(root, innerrel,
2891 tgl 2115 ECB : cheapest_total_inner, extra->sjinfo);
5351 tgl 2116 CBC 809 : Assert(cheapest_total_inner);
7384 tgl 2117 GIC 809 : jointype = JOIN_INNER;
4090 tgl 2118 CBC 809 : try_hashjoin_path(root,
2119 : joinrel,
2120 : cheapest_total_outer,
2121 : cheapest_total_inner,
2891 tgl 2122 ECB : hashclauses,
2123 : jointype,
2124 : extra);
3875 tgl 2125 GIC 809 : if (cheapest_startup_outer != NULL &&
2126 : cheapest_startup_outer != cheapest_total_outer)
4090 2127 28 : try_hashjoin_path(root,
2128 : joinrel,
4090 tgl 2129 ECB : cheapest_startup_outer,
2130 : cheapest_total_inner,
2891 2131 : hashclauses,
2132 : jointype,
2133 : extra);
2134 : }
2135 : else
2136 : {
2137 : /*
2138 : * For other jointypes, we consider the cheapest startup outer
4090 2139 : * together with the cheapest total inner, and then consider
2140 : * pairings of cheapest-total paths including parameterized ones.
2141 : * There is no use in generating parameterized paths on the basis
2142 : * of possibly cheap startup cost, so this is sufficient.
2143 : */
2144 : ListCell *lc1;
2145 : ListCell *lc2;
2146 :
3875 tgl 2147 CBC 186608 : if (cheapest_startup_outer != NULL)
3875 tgl 2148 GIC 186311 : try_hashjoin_path(root,
3875 tgl 2149 ECB : joinrel,
2150 : cheapest_startup_outer,
2151 : cheapest_total_inner,
2152 : hashclauses,
2153 : jointype,
2154 : extra);
2155 :
4090 tgl 2156 GIC 476659 : foreach(lc1, outerrel->cheapest_parameterized_paths)
2157 : {
2158 290051 : Path *outerpath = (Path *) lfirst(lc1);
2159 :
4090 tgl 2160 ECB : /*
2161 : * We cannot use an outer path that is parameterized by the
2162 : * inner rel.
2163 : */
3875 tgl 2164 GIC 290051 : if (PATH_PARAM_BY_REL(outerpath, innerrel))
4090 tgl 2165 CBC 86336 : continue;
6277 tgl 2166 ECB :
4090 tgl 2167 CBC 525188 : foreach(lc2, innerrel->cheapest_parameterized_paths)
2168 : {
4090 tgl 2169 GIC 321473 : Path *innerpath = (Path *) lfirst(lc2);
2170 :
2171 : /*
2172 : * We cannot use an inner path that is parameterized by
2173 : * the outer rel, either.
4090 tgl 2174 ECB : */
3875 tgl 2175 GIC 321473 : if (PATH_PARAM_BY_REL(innerpath, outerrel))
4090 tgl 2176 CBC 96156 : continue;
2177 :
4090 tgl 2178 GIC 225317 : if (outerpath == cheapest_startup_outer &&
2179 : innerpath == cheapest_total_inner)
2118 2180 161913 : continue; /* already tried it */
2181 :
4090 2182 63404 : try_hashjoin_path(root,
2183 : joinrel,
2184 : outerpath,
2185 : innerpath,
2186 : hashclauses,
2187 : jointype,
2188 : extra);
2189 : }
2190 : }
2191 : }
2192 :
2193 : /*
2194 : * If the joinrel is parallel-safe, we may be able to consider a
2195 : * partial hash join. However, we can't handle JOIN_UNIQUE_OUTER,
2636 rhaas 2196 ECB : * because the outer path will be partial, and therefore we won't be
2197 : * able to properly guarantee uniqueness. Also, the resulting path
2198 : * must not be parameterized.
2199 : */
2545 rhaas 2200 CBC 188226 : if (joinrel->consider_parallel &&
2322 tgl 2201 GIC 161814 : save_jointype != JOIN_UNIQUE_OUTER &&
2636 rhaas 2202 161814 : outerrel->partial_pathlist != NIL &&
2203 6126 : bms_is_empty(joinrel->lateral_relids))
2204 : {
2205 : Path *cheapest_partial_outer;
1936 andres 2206 CBC 6126 : Path *cheapest_partial_inner = NULL;
2559 tgl 2207 6126 : Path *cheapest_safe_inner = NULL;
2208 :
2636 rhaas 2209 6126 : cheapest_partial_outer =
2636 rhaas 2210 GIC 6126 : (Path *) linitial(outerrel->partial_pathlist);
2636 rhaas 2211 ECB :
2212 : /*
2213 : * Can we use a partial inner plan too, so that we can build a
2214 : * shared hash table in parallel? We can't handle
2215 : * JOIN_UNIQUE_INNER because we can't guarantee uniqueness.
2216 : */
1391 tmunro 2217 CBC 6126 : if (innerrel->partial_pathlist != NIL &&
2218 5721 : save_jointype != JOIN_UNIQUE_INNER &&
2219 : enable_parallel_hash)
1936 andres 2220 ECB : {
1936 andres 2221 GIC 5589 : cheapest_partial_inner =
1936 andres 2222 CBC 5589 : (Path *) linitial(innerrel->partial_pathlist);
1936 andres 2223 GIC 5589 : try_partial_hashjoin_path(root, joinrel,
1936 andres 2224 ECB : cheapest_partial_outer,
2225 : cheapest_partial_inner,
2226 : hashclauses, jointype, extra,
2227 : true /* parallel_hash */ );
2228 : }
2229 :
2230 : /*
2231 : * Normally, given that the joinrel is parallel-safe, the cheapest
2232 : * total inner path will also be parallel-safe, but if not, we'll
2233 : * have to search for the cheapest safe, unparameterized inner
2234 : * path. If doing JOIN_UNIQUE_INNER, we can't use any alternative
2235 : * inner path. If full, right, or right-anti join, we can't use
2236 : * parallelism (building the hash table in each backend) because
2237 : * no one process has all the match bits.
2238 : */
4 tgl 2239 GNC 6126 : if (save_jointype == JOIN_FULL ||
2240 4326 : save_jointype == JOIN_RIGHT ||
2241 : save_jointype == JOIN_RIGHT_ANTI)
9 tmunro 2242 1965 : cheapest_safe_inner = NULL;
2243 4161 : else if (cheapest_total_inner->parallel_safe)
2636 rhaas 2244 GIC 4047 : cheapest_safe_inner = cheapest_total_inner;
2322 tgl 2245 114 : else if (save_jointype != JOIN_UNIQUE_INNER)
2246 : cheapest_safe_inner =
2224 rhaas 2247 111 : get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
2636 rhaas 2248 ECB :
2636 rhaas 2249 CBC 6126 : if (cheapest_safe_inner != NULL)
2250 4155 : try_partial_hashjoin_path(root, joinrel,
2636 rhaas 2251 ECB : cheapest_partial_outer,
2252 : cheapest_safe_inner,
2253 : hashclauses, jointype, extra,
1936 andres 2254 : false /* parallel_hash */ );
2636 rhaas 2255 : }
2256 : }
6277 tgl 2257 : }
2258 :
2259 : /*
2260 : * select_mergejoin_clauses
2261 : * Select mergejoin clauses that are usable for a particular join.
2262 : * Returns a list of RestrictInfo nodes for those clauses.
2263 : *
2264 : * *mergejoin_allowed is normally set to true, but it is set to false if
2265 : * this is a right/right-anti/full join and there are nonmergejoinable join
2266 : * clauses. The executor's mergejoin machinery cannot handle such cases, so
2267 : * we have to avoid generating a mergejoin plan. (Note that this flag does
2268 : * NOT consider whether there are actually any mergejoinable clauses. This is
4482 2269 : * correct because in some cases we need to build a clauseless mergejoin.
2270 : * Simply returning NIL is therefore not enough to distinguish safe from
2271 : * unsafe cases.)
2272 : *
2273 : * We also mark each selected RestrictInfo to show which side is currently
2274 : * being considered as outer. These are transient markings that are only
2275 : * good for the duration of the current add_paths_to_joinrel() call!
2276 : *
2277 : * We examine each restrictinfo clause known for the join to see
2278 : * if it is mergejoinable and involves vars from the two sub-relations
2279 : * currently of interest.
2280 : */
2281 : static List *
5569 tgl 2282 GIC 233164 : select_mergejoin_clauses(PlannerInfo *root,
2283 : RelOptInfo *joinrel,
2284 : RelOptInfo *outerrel,
2285 : RelOptInfo *innerrel,
2286 : List *restrictlist,
4483 tgl 2287 ECB : JoinType jointype,
4482 2288 : bool *mergejoin_allowed)
2289 : {
8637 tgl 2290 CBC 233164 : List *result_list = NIL;
8244 2291 233164 : bool isouterjoin = IS_OUTER_JOIN(jointype);
4482 2292 233164 : bool have_nonmergeable_joinclause = false;
6892 neilc 2293 ECB : ListCell *l;
2294 :
6892 neilc 2295 CBC 477098 : foreach(l, restrictlist)
2296 : {
2297 243934 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
8462 tgl 2298 ECB :
2299 : /*
2300 : * If processing an outer join, only use its own join clauses in the
2301 : * merge. For inner joins we can use pushed-down clauses too. (Note:
2302 : * we don't set have_nonmergeable_joinclause here because pushed-down
2303 : * clauses will become otherquals not joinquals.)
2304 : */
1815 tgl 2305 GIC 243934 : if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
6375 2306 7270 : continue;
2307 :
2308 : /* Check that clause is a mergeable operator clause */
7034 2309 236664 : if (!restrictinfo->can_join ||
5923 2310 214865 : restrictinfo->mergeopfamilies == NIL)
2311 : {
2312 : /*
2313 : * The executor can handle extra joinquals that are constants, but
2314 : * not anything else, when doing right/right-anti/full merge join.
2315 : * (The reason to support constants is so we can do FULL JOIN ON
2316 : * FALSE.)
2317 : */
4842 2318 27907 : if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
4482 2319 27715 : have_nonmergeable_joinclause = true;
8462 2320 27907 : continue; /* not mergejoinable */
2321 : }
2322 :
2323 : /*
2324 : * Check if clause has the form "outer op inner" or "inner op outer".
2325 : */
4950 2326 208757 : if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
2327 : {
4482 2328 378 : have_nonmergeable_joinclause = true;
7389 2329 378 : continue; /* no good for these input relations */
6375 tgl 2330 ECB : }
2331 :
2332 : /*
2333 : * Insist that each side have a non-redundant eclass. This
2334 : * restriction is needed because various bits of the planner expect
2335 : * that each clause in a merge be associable with some pathkey in a
2336 : * canonical pathkey list, but redundant eclasses can't appear in
2337 : * canonical sort orderings. (XXX it might be worth relaxing this,
5569 2338 : * but not enough time to address it for 8.3.)
2339 : */
4545 tgl 2340 GIC 208379 : update_mergeclause_eclasses(root, restrictinfo);
5569 tgl 2341 ECB :
5569 tgl 2342 CBC 208379 : if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
5569 tgl 2343 GIC 208349 : EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
2344 : {
4482 tgl 2345 CBC 70 : have_nonmergeable_joinclause = true;
5569 2346 70 : continue; /* can't handle redundant eclasses */
2347 : }
2348 :
5923 tgl 2349 GIC 208309 : result_list = lappend(result_list, restrictinfo);
2350 : }
2351 :
2352 : /*
2353 : * Report whether mergejoin is allowed (see comment at top of function).
6375 tgl 2354 ECB : */
4483 tgl 2355 CBC 233164 : switch (jointype)
6375 tgl 2356 ECB : {
4483 tgl 2357 GIC 41266 : case JOIN_RIGHT:
2358 : case JOIN_RIGHT_ANTI:
2359 : case JOIN_FULL:
4482 2360 41266 : *mergejoin_allowed = !have_nonmergeable_joinclause;
4483 2361 41266 : break;
2362 191898 : default:
4482 tgl 2363 CBC 191898 : *mergejoin_allowed = true;
4483 tgl 2364 GIC 191898 : break;
6375 tgl 2365 ECB : }
2366 :
8637 tgl 2367 GIC 233164 : return result_list;
2368 : }
|