Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * prepunion.c
4 : * Routines to plan set-operation queries. The filename is a leftover
5 : * from a time when only UNIONs were implemented.
6 : *
7 : * There are two code paths in the planner for set-operation queries.
8 : * If a subquery consists entirely of simple UNION ALL operations, it
9 : * is converted into an "append relation". Otherwise, it is handled
10 : * by the general code in this module (plan_set_operations and its
11 : * subroutines). There is some support code here for the append-relation
12 : * case, but most of the heavy lifting for that is done elsewhere,
13 : * notably in prepjointree.c and allpaths.c.
14 : *
15 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
16 : * Portions Copyright (c) 1994, Regents of the University of California
17 : *
18 : *
19 : * IDENTIFICATION
20 : * src/backend/optimizer/prep/prepunion.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 : #include "postgres.h"
25 :
26 : #include "access/htup_details.h"
27 : #include "access/sysattr.h"
28 : #include "catalog/partition.h"
29 : #include "catalog/pg_inherits.h"
30 : #include "catalog/pg_type.h"
31 : #include "miscadmin.h"
32 : #include "nodes/makefuncs.h"
33 : #include "nodes/nodeFuncs.h"
34 : #include "optimizer/cost.h"
35 : #include "optimizer/pathnode.h"
36 : #include "optimizer/paths.h"
37 : #include "optimizer/planmain.h"
38 : #include "optimizer/planner.h"
39 : #include "optimizer/prep.h"
40 : #include "optimizer/tlist.h"
41 : #include "parser/parse_coerce.h"
42 : #include "parser/parsetree.h"
43 : #include "utils/lsyscache.h"
44 : #include "utils/rel.h"
45 : #include "utils/selfuncs.h"
46 : #include "utils/syscache.h"
47 :
48 :
49 : static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
50 : List *colTypes, List *colCollations,
51 : bool junkOK,
52 : int flag, List *refnames_tlist,
53 : List **pTargetList,
54 : double *pNumGroups);
55 : static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
56 : PlannerInfo *root,
57 : List *refnames_tlist,
58 : List **pTargetList);
59 : static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
60 : List *refnames_tlist,
61 : List **pTargetList);
62 : static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
63 : List *refnames_tlist,
64 : List **pTargetList);
65 : static List *plan_union_children(PlannerInfo *root,
66 : SetOperationStmt *top_union,
67 : List *refnames_tlist,
68 : List **tlist_list);
69 : static Path *make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
70 : PlannerInfo *root);
71 : static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
72 : static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
73 : Path *input_path,
74 : double dNumGroups, double dNumOutputRows,
75 : const char *construct);
76 : static List *generate_setop_tlist(List *colTypes, List *colCollations,
77 : int flag,
78 : Index varno,
79 : bool hack_constants,
80 : List *input_tlist,
81 : List *refnames_tlist,
82 : bool *trivial_tlist);
83 : static List *generate_append_tlist(List *colTypes, List *colCollations,
84 : bool flag,
85 : List *input_tlists,
86 : List *refnames_tlist);
87 : static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
88 :
89 :
90 : /*
91 : * plan_set_operations
92 : *
93 : * Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
94 : *
95 : * This routine only deals with the setOperations tree of the given query.
96 : * Any top-level ORDER BY requested in root->parse->sortClause will be handled
97 : * when we return to grouping_planner; likewise for LIMIT.
98 : *
99 : * What we return is an "upperrel" RelOptInfo containing at least one Path
100 : * that implements the set-operation tree. In addition, root->processed_tlist
101 : * receives a targetlist representing the output of the topmost setop node.
102 : */
103 : RelOptInfo *
2589 tgl 104 GIC 2591 : plan_set_operations(PlannerInfo *root)
9770 scrappy 105 ECB : {
6517 tgl 106 GIC 2591 : Query *parse = root->parse;
2238 peter_e 107 CBC 2591 : SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations);
8221 tgl 108 ECB : Node *node;
109 : RangeTblEntry *leftmostRTE;
110 : Query *leftmostQuery;
111 : RelOptInfo *setop_rel;
112 : List *top_tlist;
113 :
2238 peter_e 114 GIC 2591 : Assert(topop);
8221 tgl 115 ECB :
116 : /* check for unsupported stuff */
7207 tgl 117 GIC 2591 : Assert(parse->jointree->fromlist == NIL);
7207 tgl 118 CBC 2591 : Assert(parse->jointree->quals == NULL);
119 2591 : Assert(parse->groupClause == NIL);
120 2591 : Assert(parse->havingQual == NULL);
5215 121 2591 : Assert(parse->windowClause == NIL);
7207 122 2591 : Assert(parse->distinctClause == NIL);
7207 tgl 123 ECB :
124 : /*
125 : * In the outer query level, we won't have any true equivalences to deal
126 : * with; but we do want to be able to make pathkeys, which will require
127 : * single-member EquivalenceClasses. Indicate that EC merging is complete
128 : * so that pathkeys.c won't complain.
129 : */
1358 drowley 130 GIC 2591 : Assert(root->eq_classes == NIL);
1358 drowley 131 CBC 2591 : root->ec_merging_done = true;
1358 drowley 132 ECB :
133 : /*
134 : * We'll need to build RelOptInfos for each of the leaf subqueries, which
135 : * are RTE_SUBQUERY rangetable entries in this Query. Prepare the index
136 : * arrays for those, and for AppendRelInfos in case they're needed.
137 : */
4236 tgl 138 GIC 2591 : setup_simple_rel_arrays(root);
4236 tgl 139 ECB :
140 : /*
141 : * Find the leftmost component Query. We need to use its column names for
142 : * all generated tlists (else SELECT INTO won't work right).
143 : */
8221 tgl 144 GIC 2591 : node = topop->larg;
8221 tgl 145 CBC 4015 : while (node && IsA(node, SetOperationStmt))
146 1424 : node = ((SetOperationStmt *) node)->larg;
147 2591 : Assert(node && IsA(node, RangeTblRef));
4236 148 2591 : leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex];
149 2591 : leftmostQuery = leftmostRTE->subquery;
8221 150 2591 : Assert(leftmostQuery != NULL);
9345 bruce 151 ECB :
152 : /*
153 : * If the topmost node is a recursive union, it needs special processing.
154 : */
5300 tgl 155 GIC 2591 : if (root->hasRecursion)
2589 tgl 156 ECB : {
1847 rhaas 157 GIC 357 : setop_rel = generate_recursion_path(topop, root,
1847 rhaas 158 ECB : leftmostQuery->targetList,
159 : &top_tlist);
160 : }
161 : else
162 : {
163 : /*
164 : * Recurse on setOperations tree to generate paths for set ops. The
165 : * final output paths should have just the column types shown as the
166 : * output from the top-level node, plus possibly resjunk working
167 : * columns (we can rely on upper-level nodes to deal with that).
168 : */
1847 rhaas 169 GIC 2234 : setop_rel = recurse_set_operations((Node *) topop, root,
1847 rhaas 170 ECB : topop->colTypes, topop->colCollations,
171 : true, -1,
172 : leftmostQuery->targetList,
173 : &top_tlist,
174 : NULL);
175 : }
176 :
177 : /* Must return the built tlist into root->processed_tlist. */
2589 tgl 178 GIC 2588 : root->processed_tlist = top_tlist;
2589 tgl 179 ECB :
2589 tgl 180 GIC 2588 : return setop_rel;
8221 tgl 181 ECB : }
182 :
183 : /*
184 : * recurse_set_operations
185 : * Recursively handle one step in a tree of set operations
186 : *
187 : * colTypes: OID list of set-op's result column datatypes
188 : * colCollations: OID list of set-op's result column collations
189 : * junkOK: if true, child resjunk columns may be left in the result
190 : * flag: if >= 0, add a resjunk output column indicating value of flag
191 : * refnames_tlist: targetlist to take column names from
192 : *
193 : * Returns a RelOptInfo for the subtree, as well as these output parameters:
194 : * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
195 : * *pNumGroups: if not NULL, we estimate the number of distinct groups
196 : * in the result, and store it there
197 : *
198 : * The pTargetList output parameter is mostly redundant with the pathtarget
199 : * of the returned RelOptInfo, but for the moment we need it because much of
200 : * the logic in this file depends on flag columns being marked resjunk.
201 : * Pending a redesign of how that works, this is the easy way out.
202 : *
203 : * We don't have to care about typmods here: the only allowed difference
204 : * between set-op input and output typmods is input is a specific typmod
205 : * and output is -1, and that does not require a coercion.
206 : */
207 : static RelOptInfo *
6517 tgl 208 GIC 8945 : recurse_set_operations(Node *setOp, PlannerInfo *root,
4376 tgl 209 ECB : List *colTypes, List *colCollations,
210 : bool junkOK,
211 : int flag, List *refnames_tlist,
212 : List **pTargetList,
213 : double *pNumGroups)
214 : {
1847 rhaas 215 GIC 8945 : RelOptInfo *rel = NULL; /* keep compiler quiet */
1847 rhaas 216 ECB :
217 : /* Guard against stack overflow due to overly complex setop nests */
1897 tgl 218 GIC 8945 : check_stack_depth();
1897 tgl 219 ECB :
8221 tgl 220 GIC 8945 : if (IsA(setOp, RangeTblRef))
9345 bruce 221 ECB : {
8221 tgl 222 GIC 6636 : RangeTblRef *rtr = (RangeTblRef *) setOp;
4236 tgl 223 CBC 6636 : RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
8053 bruce 224 6636 : Query *subquery = rte->subquery;
5890 tgl 225 ECB : PlannerInfo *subroot;
226 : RelOptInfo *final_rel;
227 : Path *subpath;
228 : Path *path;
229 : List *tlist;
230 : bool trivial_tlist;
231 :
8221 tgl 232 GIC 6636 : Assert(subquery != NULL);
233 :
1847 rhaas 234 ECB : /* Build a RelOptInfo for this leaf subquery. */
2197 rhaas 235 GIC 6636 : rel = build_simple_rel(root, rtr->rtindex, NULL);
236 :
3868 tgl 237 ECB : /* plan_params should not be in use in current query level */
3868 tgl 238 GIC 6636 : Assert(root->plan_params == NIL);
239 :
2589 tgl 240 ECB : /* Generate a subroot and Paths for the subquery */
2589 tgl 241 GIC 6636 : subroot = rel->subroot = subquery_planner(root->glob, subquery,
242 : root,
2589 tgl 243 ECB : false,
244 : root->tuple_fraction);
245 :
246 : /*
247 : * It should not be possible for the primitive query to contain any
248 : * cross-references to other primitive queries in the setop tree.
249 : */
3868 tgl 250 GIC 6636 : if (root->plan_params)
3868 tgl 251 UIC 0 : elog(ERROR, "unexpected outer reference in set operation subquery");
3868 tgl 252 ECB :
1847 rhaas 253 EUB : /* Figure out the appropriate target list for this subquery. */
1847 rhaas 254 GIC 6636 : tlist = generate_setop_tlist(colTypes, colCollations,
255 : flag,
1847 rhaas 256 CBC 6636 : rtr->rtindex,
257 : true,
1847 rhaas 258 ECB : subroot->processed_tlist,
259 : refnames_tlist,
260 : &trivial_tlist);
1847 rhaas 261 GIC 6636 : rel->reltarget = create_pathtarget(root, tlist);
262 :
263 : /* Return the fully-fledged tlist to caller, too */
1847 rhaas 264 CBC 6636 : *pTargetList = tlist;
265 :
266 : /*
2589 tgl 267 ECB : * Mark rel with estimated output rows, width, etc. Note that we have
268 : * to do this before generating outer-query paths, else
269 : * cost_subqueryscan is not happy.
270 : */
2589 tgl 271 GIC 6636 : set_subquery_size_estimates(root, rel);
272 :
273 : /*
1844 rhaas 274 ECB : * Since we may want to add a partial path to this relation, we must
275 : * set its consider_parallel flag correctly.
276 : */
1844 rhaas 277 GIC 6636 : final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
278 6636 : rel->consider_parallel = final_rel->consider_parallel;
279 :
2589 tgl 280 ECB : /*
281 : * For the moment, we consider only a single Path for the subquery.
282 : * This should change soon (make it look more like
283 : * set_subquery_pathlist).
284 : */
2589 tgl 285 GIC 6636 : subpath = get_cheapest_fractional_path(final_rel,
286 : root->tuple_fraction);
287 :
2589 tgl 288 ECB : /*
289 : * Stick a SubqueryScanPath atop that.
290 : *
291 : * We don't bother to determine the subquery's output ordering since
292 : * it won't be reflected in the set-op result anyhow; so just label
293 : * the SubqueryScanPath with nil pathkeys. (XXX that should change
294 : * soon too, likely.)
295 : */
2589 tgl 296 GIC 6636 : path = (Path *) create_subqueryscan_path(root, rel, subpath,
297 : trivial_tlist,
298 : NIL, NULL);
299 :
1847 rhaas 300 CBC 6636 : add_path(rel, path);
301 :
302 : /*
303 : * If we have a partial path for the child relation, we can use that
1844 rhaas 304 ECB : * to build a partial path for this relation. But there's no point in
305 : * considering any path but the cheapest.
306 : */
1810 rhaas 307 GIC 6636 : if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
308 3725 : final_rel->partial_pathlist != NIL)
309 : {
310 : Path *partial_subpath;
1844 rhaas 311 ECB : Path *partial_path;
312 :
1844 rhaas 313 GIC 6 : partial_subpath = linitial(final_rel->partial_pathlist);
314 : partial_path = (Path *)
315 6 : create_subqueryscan_path(root, rel, partial_subpath,
316 : trivial_tlist,
317 : NIL, NULL);
1844 rhaas 318 CBC 6 : add_partial_path(rel, partial_path);
319 : }
1844 rhaas 320 ECB :
321 : /*
322 : * Estimate number of groups if caller wants it. If the subquery used
5050 bruce 323 : * grouping or aggregation, its output is probably mostly unique
324 : * anyway; otherwise do statistical estimation.
325 : *
326 : * XXX you don't really want to know about this: we do the estimation
327 : * using the subquery's original targetlist expressions, not the
328 : * subroot->processed_tlist which might seem more appropriate. The
329 : * reason is that if the subquery is itself a setop, it may return a
330 : * processed_tlist containing "varno 0" Vars generated by
331 : * generate_append_tlist, and those would confuse estimate_num_groups
332 : * mightily. We ought to get rid of the "varno 0" hack, but that
333 : * requires a redesign of the parsetree representation of setops, so
334 : * that there can be an RTE corresponding to each setop's output.
335 : */
5358 tgl 336 GIC 6636 : if (pNumGroups)
337 : {
2885 andres 338 591 : if (subquery->groupClause || subquery->groupingSets ||
339 591 : subquery->distinctClause ||
5358 tgl 340 585 : subroot->hasHavingQual || subquery->hasAggs)
2589 tgl 341 CBC 6 : *pNumGroups = subpath->rows;
342 : else
5358 343 585 : *pNumGroups = estimate_num_groups(subroot,
2118 tgl 344 ECB : get_tlist_exprs(subquery->targetList, false),
2589 345 : subpath->rows,
740 drowley 346 : NULL,
347 : NULL);
5358 tgl 348 : }
349 : }
8221 tgl 350 GIC 2309 : else if (IsA(setOp, SetOperationStmt))
351 : {
352 2309 : SetOperationStmt *op = (SetOperationStmt *) setOp;
353 :
354 : /* UNIONs are much different from INTERSECT/EXCEPT */
8221 tgl 355 CBC 2309 : if (op->op == SETOP_UNION)
1847 rhaas 356 GIC 2006 : rel = generate_union_paths(op, root,
6512 tgl 357 ECB : refnames_tlist,
358 : pTargetList);
359 : else
1847 rhaas 360 CBC 303 : rel = generate_nonunion_paths(op, root,
6512 tgl 361 ECB : refnames_tlist,
362 : pTargetList);
1847 rhaas 363 GIC 2309 : if (pNumGroups)
364 15 : *pNumGroups = rel->rows;
8053 bruce 365 ECB :
366 : /*
367 : * If necessary, add a Result node to project the caller-requested
8221 tgl 368 : * output columns.
369 : *
370 : * XXX you don't really want to know about this: setrefs.c will apply
371 : * fix_upper_expr() to the Result node's tlist. This would fail if the
372 : * Vars generated by generate_setop_tlist() were not exactly equal()
373 : * to the corresponding tlist entries of the subplan. However, since
374 : * the subplan was generated by generate_union_paths() or
375 : * generate_nonunion_paths(), and hence its tlist was generated by
376 : * generate_append_tlist(), this will work. We just tell
377 : * generate_setop_tlist() to use varno 0.
378 : */
8186 tgl 379 GIC 2309 : if (flag >= 0 ||
2589 380 2294 : !tlist_same_datatypes(*pTargetList, colTypes, junkOK) ||
381 2243 : !tlist_same_collations(*pTargetList, colCollations, junkOK))
382 : {
383 : PathTarget *target;
384 : bool trivial_tlist;
1847 rhaas 385 ECB : ListCell *lc;
386 :
2589 tgl 387 CBC 66 : *pTargetList = generate_setop_tlist(colTypes, colCollations,
388 : flag,
389 : 0,
390 : false,
391 : *pTargetList,
392 : refnames_tlist,
393 : &trivial_tlist);
1847 rhaas 394 66 : target = create_pathtarget(root, *pTargetList);
395 :
396 : /* Apply projection to each path */
1847 rhaas 397 GIC 132 : foreach(lc, rel->pathlist)
398 : {
399 66 : Path *subpath = (Path *) lfirst(lc);
400 : Path *path;
1847 rhaas 401 ECB :
1847 rhaas 402 GIC 66 : Assert(subpath->param_info == NULL);
403 66 : path = apply_projection_to_path(root, subpath->parent,
1847 rhaas 404 ECB : subpath, target);
405 : /* If we had to add a Result, path is different from subpath */
1847 rhaas 406 CBC 66 : if (path != subpath)
1847 rhaas 407 GIC 66 : lfirst(lc) = path;
408 : }
1847 rhaas 409 ECB :
410 : /* Apply projection to each partial path */
1847 rhaas 411 GIC 66 : foreach(lc, rel->partial_pathlist)
412 : {
1847 rhaas 413 LBC 0 : Path *subpath = (Path *) lfirst(lc);
1847 rhaas 414 ECB : Path *path;
415 :
1847 rhaas 416 UIC 0 : Assert(subpath->param_info == NULL);
417 :
1847 rhaas 418 ECB : /* avoid apply_projection_to_path, in case of multiple refs */
1847 rhaas 419 UIC 0 : path = (Path *) create_projection_path(root, subpath->parent,
1847 rhaas 420 EUB : subpath, target);
1847 rhaas 421 UIC 0 : lfirst(lc) = path;
422 : }
9232 bruce 423 EUB : }
424 : }
425 : else
9345 426 : {
7198 tgl 427 UIC 0 : elog(ERROR, "unrecognized node type: %d",
8221 tgl 428 EUB : (int) nodeTag(setOp));
429 : *pTargetList = NIL;
430 : }
431 :
1847 rhaas 432 GIC 8945 : postprocess_setop_rel(root, rel);
433 :
1847 rhaas 434 GBC 8945 : return rel;
435 : }
436 :
437 : /*
438 : * Generate paths for a recursive UNION node
5300 tgl 439 ECB : */
440 : static RelOptInfo *
2589 tgl 441 CBC 357 : generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
442 : List *refnames_tlist,
443 : List **pTargetList)
444 : {
445 : RelOptInfo *result_rel;
446 : Path *path;
447 : RelOptInfo *lrel,
1847 rhaas 448 ECB : *rrel;
449 : Path *lpath;
450 : Path *rpath;
451 : List *lpath_tlist;
452 : List *rpath_tlist;
453 : List *tlist;
454 : List *groupList;
455 : double dNumGroups;
456 :
457 : /* Parser should have rejected other cases */
5297 tgl 458 GIC 357 : if (setOp->op != SETOP_UNION)
5297 tgl 459 UIC 0 : elog(ERROR, "only UNION queries can be recursive");
460 : /* Worktable ID should be assigned */
5300 tgl 461 GIC 357 : Assert(root->wt_param_id >= 0);
462 :
463 : /*
464 : * Unlike a regular UNION node, process the left and right inputs
5300 tgl 465 ECB : * separately without any intention of combining them into one Append.
5300 tgl 466 EUB : */
1847 rhaas 467 GIC 357 : lrel = recurse_set_operations(setOp->larg, root,
1847 rhaas 468 ECB : setOp->colTypes, setOp->colCollations,
469 : false, -1,
470 : refnames_tlist,
471 : &lpath_tlist,
472 : NULL);
1847 rhaas 473 GIC 357 : lpath = lrel->cheapest_total_path;
2589 tgl 474 ECB : /* The right path will want to look at the left one ... */
2589 tgl 475 GIC 357 : root->non_recursive_path = lpath;
1847 rhaas 476 357 : rrel = recurse_set_operations(setOp->rarg, root,
477 : setOp->colTypes, setOp->colCollations,
478 : false, -1,
479 : refnames_tlist,
1847 rhaas 480 ECB : &rpath_tlist,
481 : NULL);
1847 rhaas 482 CBC 357 : rpath = rrel->cheapest_total_path;
2589 tgl 483 357 : root->non_recursive_path = NULL;
484 :
485 : /*
486 : * Generate tlist for RecursiveUnion path node --- same as in Append cases
487 : */
4443 peter_e 488 GIC 357 : tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations, false,
2589 tgl 489 CBC 357 : list_make2(lpath_tlist, rpath_tlist),
5300 tgl 490 ECB : refnames_tlist);
491 :
2589 tgl 492 GIC 357 : *pTargetList = tlist;
493 :
494 : /* Build result relation. */
1847 rhaas 495 CBC 357 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
496 357 : bms_union(lrel->relids, rrel->relids));
1847 rhaas 497 GIC 357 : result_rel->reltarget = create_pathtarget(root, tlist);
498 :
5297 tgl 499 ECB : /*
500 : * If UNION, identify the grouping operators
501 : */
5297 tgl 502 CBC 357 : if (setOp->all)
5297 tgl 503 ECB : {
5297 tgl 504 CBC 210 : groupList = NIL;
2589 tgl 505 GIC 210 : dNumGroups = 0;
506 : }
507 : else
508 : {
5297 tgl 509 ECB : /* Identify the grouping semantics */
5297 tgl 510 GIC 147 : groupList = generate_setop_grouplist(setOp, tlist);
5297 tgl 511 ECB :
512 : /* We only support hashing here */
5297 tgl 513 GIC 147 : if (!grouping_is_hashable(groupList))
514 3 : ereport(ERROR,
515 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
516 : errmsg("could not implement recursive UNION"),
5297 tgl 517 ECB : errdetail("All column datatypes must be hashable.")));
518 :
519 : /*
5050 bruce 520 : * For the moment, take the number of distinct groups as equal to the
521 : * total input size, ie, the worst case.
522 : */
2589 tgl 523 GIC 144 : dNumGroups = lpath->rows + rpath->rows * 10;
524 : }
525 :
526 : /*
527 : * And make the path node.
528 : */
529 354 : path = (Path *) create_recursiveunion_path(root,
2589 tgl 530 ECB : result_rel,
531 : lpath,
532 : rpath,
1847 rhaas 533 GIC 354 : result_rel->reltarget,
534 : groupList,
535 : root->wt_param_id,
2589 tgl 536 ECB : dNumGroups);
537 :
1847 rhaas 538 GIC 354 : add_path(result_rel, path);
539 354 : postprocess_setop_rel(root, result_rel);
1847 rhaas 540 CBC 354 : return result_rel;
541 : }
542 :
543 : /*
544 : * Generate paths for a UNION or UNION ALL node
8221 tgl 545 ECB : */
1847 rhaas 546 : static RelOptInfo *
1847 rhaas 547 CBC 2006 : generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
548 : List *refnames_tlist,
549 : List **pTargetList)
550 : {
1847 rhaas 551 GIC 2006 : Relids relids = NULL;
552 : RelOptInfo *result_rel;
2589 tgl 553 2006 : double save_fraction = root->tuple_fraction;
1847 rhaas 554 ECB : ListCell *lc;
1847 rhaas 555 GIC 2006 : List *pathlist = NIL;
1844 556 2006 : List *partial_pathlist = NIL;
557 2006 : bool partial_paths_valid = true;
1844 rhaas 558 CBC 2006 : bool consider_parallel = true;
559 : List *rellist;
2589 tgl 560 ECB : List *tlist_list;
561 : List *tlist;
562 : Path *path;
8221 563 :
6512 564 : /*
565 : * If plain UNION, tell children to fetch all tuples.
566 : *
567 : * Note: in UNION ALL, we pass the top-level tuple_fraction unmodified to
568 : * each arm of the UNION ALL. One could make a case for reducing the
569 : * tuple fraction for later arms (discounting by the expected size of the
570 : * earlier arms' results) but it seems not worth the trouble. The normal
571 : * case where tuple_fraction isn't already zero is a LIMIT at top level,
572 : * and passing it down as-is is usually enough to get the desired result
573 : * of preferring fast-start plans.
574 : */
6512 tgl 575 GIC 2006 : if (!op->all)
2589 576 1763 : root->tuple_fraction = 0.0;
577 :
578 : /*
579 : * If any of my children are identical UNION nodes (same op, all-flag, and
580 : * colTypes) then they can be merged into this node so that we generate
581 : * only one Append and unique-ification for the lot. Recurse to find such
2589 tgl 582 ECB : * nodes and compute their children's paths.
8221 583 : */
1847 rhaas 584 GIC 2006 : rellist = plan_union_children(root, op, refnames_tlist, &tlist_list);
585 :
586 : /*
587 : * Generate tlist for Append plan node.
588 : *
589 : * The tlist for an Append plan isn't important as far as the Append is
590 : * concerned, but we must make it look real anyway for the benefit of the
6385 bruce 591 ECB : * next plan level up.
592 : */
4443 peter_e 593 GIC 2006 : tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
594 : tlist_list, refnames_tlist);
595 :
2589 tgl 596 2006 : *pTargetList = tlist;
597 :
598 : /* Build path lists and relid set. */
1847 rhaas 599 7397 : foreach(lc, rellist)
1847 rhaas 600 ECB : {
1847 rhaas 601 GIC 5391 : RelOptInfo *rel = lfirst(lc);
602 :
1847 rhaas 603 CBC 5391 : pathlist = lappend(pathlist, rel->cheapest_total_path);
604 :
1844 rhaas 605 GIC 5391 : if (consider_parallel)
1844 rhaas 606 ECB : {
1844 rhaas 607 GIC 3577 : if (!rel->consider_parallel)
1844 rhaas 608 ECB : {
1844 rhaas 609 GIC 1700 : consider_parallel = false;
1844 rhaas 610 CBC 1700 : partial_paths_valid = false;
611 : }
612 1877 : else if (rel->partial_pathlist == NIL)
1844 rhaas 613 GIC 1871 : partial_paths_valid = false;
1844 rhaas 614 ECB : else
1844 rhaas 615 GIC 6 : partial_pathlist = lappend(partial_pathlist,
1844 rhaas 616 CBC 6 : linitial(rel->partial_pathlist));
1844 rhaas 617 ECB : }
618 :
1847 rhaas 619 CBC 5391 : relids = bms_union(relids, rel->relids);
1847 rhaas 620 ECB : }
621 :
622 : /* Build result relation. */
1847 rhaas 623 CBC 2006 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
1847 rhaas 624 GIC 2006 : result_rel->reltarget = create_pathtarget(root, tlist);
1844 625 2006 : result_rel->consider_parallel = consider_parallel;
1847 rhaas 626 ECB :
627 : /*
628 : * Append the child results together.
629 : */
1828 alvherre 630 CBC 2006 : path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
797 tgl 631 ECB : NIL, NULL, 0, false, -1);
8053 bruce 632 :
633 : /*
634 : * For UNION ALL, we just need the Append path. For UNION, need to add
635 : * node(s) to remove duplicates.
636 : */
2589 tgl 637 CBC 2006 : if (!op->all)
2589 tgl 638 GIC 1763 : path = make_union_unique(op, path, tlist, root);
639 :
1847 rhaas 640 2006 : add_path(result_rel, path);
641 :
642 : /*
643 : * Estimate number of groups. For now we just assume the output is unique
1847 rhaas 644 ECB : * --- this is certainly true for the UNION case, and we want worst-case
645 : * estimates anyway.
646 : */
1847 rhaas 647 CBC 2006 : result_rel->rows = path->rows;
648 :
649 : /*
650 : * Now consider doing the same thing using the partial paths plus Append
651 : * plus Gather.
652 : */
1844 rhaas 653 GIC 2006 : if (partial_paths_valid)
1844 rhaas 654 ECB : {
655 : Path *ppath;
1844 rhaas 656 GIC 3 : int parallel_workers = 0;
657 :
658 : /* Find the highest number of workers requested for any subpath. */
1844 rhaas 659 CBC 9 : foreach(lc, partial_pathlist)
660 : {
186 drowley 661 GNC 6 : Path *subpath = lfirst(lc);
1844 rhaas 662 ECB :
186 drowley 663 GNC 6 : parallel_workers = Max(parallel_workers,
664 : subpath->parallel_workers);
665 : }
1844 rhaas 666 CBC 3 : Assert(parallel_workers > 0);
667 :
1844 rhaas 668 ECB : /*
669 : * If the use of parallel append is permitted, always request at least
670 : * log2(# of children) paths. We assume it can be useful to have
671 : * extra workers in this case because they will be spread out across
672 : * the children. The precise formula is just a guess; see
673 : * add_paths_to_append_rel.
674 : */
1844 rhaas 675 GIC 3 : if (enable_parallel_append)
676 : {
677 3 : parallel_workers = Max(parallel_workers,
678 : pg_leftmost_one_pos32(list_length(partial_pathlist)) + 1);
679 3 : parallel_workers = Min(parallel_workers,
680 : max_parallel_workers_per_gather);
681 : }
1844 rhaas 682 CBC 3 : Assert(parallel_workers > 0);
683 :
1844 rhaas 684 ECB : ppath = (Path *)
1828 alvherre 685 GIC 3 : create_append_path(root, result_rel, NIL, partial_pathlist,
1465 tgl 686 ECB : NIL, NULL,
687 : parallel_workers, enable_parallel_append,
688 : -1);
1844 rhaas 689 : ppath = (Path *)
1844 rhaas 690 GIC 3 : create_gather_path(root, result_rel, ppath,
691 3 : result_rel->reltarget, NULL, NULL);
1844 rhaas 692 CBC 3 : if (!op->all)
1844 rhaas 693 GIC 3 : ppath = make_union_unique(op, ppath, tlist, root);
694 3 : add_path(result_rel, ppath);
695 : }
696 :
2589 tgl 697 ECB : /* Undo effects of possibly forcing tuple_fraction to 0 */
2589 tgl 698 CBC 2006 : root->tuple_fraction = save_fraction;
2589 tgl 699 ECB :
1847 rhaas 700 CBC 2006 : return result_rel;
8221 tgl 701 ECB : }
702 :
703 : /*
704 : * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
705 : */
706 : static RelOptInfo *
1847 rhaas 707 CBC 303 : generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
708 : List *refnames_tlist,
709 : List **pTargetList)
710 : {
711 : RelOptInfo *result_rel;
712 : RelOptInfo *lrel,
713 : *rrel;
2589 tgl 714 303 : double save_fraction = root->tuple_fraction;
715 : Path *lpath,
716 : *rpath,
717 : *path;
718 : List *lpath_tlist,
719 : *rpath_tlist,
720 : *tlist_list,
2589 tgl 721 ECB : *tlist,
722 : *groupList,
723 : *pathlist;
724 : double dLeftGroups,
725 : dRightGroups,
726 : dNumGroups,
727 : dNumOutputRows;
728 : bool use_hash;
729 : SetOpCmd cmd;
730 : int firstFlag;
731 :
732 : /*
733 : * Tell children to fetch all tuples.
734 : */
2589 tgl 735 GIC 303 : root->tuple_fraction = 0.0;
736 :
737 : /* Recurse on children, ensuring their outputs are marked */
1847 rhaas 738 303 : lrel = recurse_set_operations(op->larg, root,
739 : op->colTypes, op->colCollations,
740 : false, 0,
741 : refnames_tlist,
1847 rhaas 742 ECB : &lpath_tlist,
743 : &dLeftGroups);
1847 rhaas 744 GIC 303 : lpath = lrel->cheapest_total_path;
1847 rhaas 745 CBC 303 : rrel = recurse_set_operations(op->rarg, root,
746 : op->colTypes, op->colCollations,
747 : false, 1,
748 : refnames_tlist,
749 : &rpath_tlist,
750 : &dRightGroups);
751 303 : rpath = rrel->cheapest_total_path;
2589 tgl 752 ECB :
753 : /* Undo effects of forcing tuple_fraction to 0 */
2589 tgl 754 GIC 303 : root->tuple_fraction = save_fraction;
755 :
756 : /*
757 : * For EXCEPT, we must put the left input first. For INTERSECT, either
5358 tgl 758 ECB : * order should give the same results, and we prefer to put the smaller
759 : * input first in order to minimize the size of the hash table in the
760 : * hashing case. "Smaller" means the one with the fewer groups.
761 : */
5358 tgl 762 GIC 303 : if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups)
763 288 : {
2589 764 288 : pathlist = list_make2(lpath, rpath);
765 288 : tlist_list = list_make2(lpath_tlist, rpath_tlist);
5358 766 288 : firstFlag = 0;
767 : }
768 : else
5358 tgl 769 ECB : {
2589 tgl 770 CBC 15 : pathlist = list_make2(rpath, lpath);
771 15 : tlist_list = list_make2(rpath_tlist, lpath_tlist);
5358 772 15 : firstFlag = 1;
5358 tgl 773 ECB : }
774 :
775 : /*
776 : * Generate tlist for Append plan node.
8221 777 : *
8053 bruce 778 : * The tlist for an Append plan isn't important as far as the Append is
6385 779 : * concerned, but we must make it look real anyway for the benefit of the
780 : * next plan level up. In fact, it has to be real enough that the flag
781 : * column is shown as a variable not a constant, else setrefs.c will get
782 : * confused.
783 : */
4443 peter_e 784 GIC 303 : tlist = generate_append_tlist(op->colTypes, op->colCollations, true,
785 : tlist_list, refnames_tlist);
786 :
2589 tgl 787 303 : *pTargetList = tlist;
788 :
789 : /* Build result relation. */
1847 rhaas 790 303 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
1847 rhaas 791 CBC 303 : bms_union(lrel->relids, rrel->relids));
1574 akapila 792 GIC 303 : result_rel->reltarget = create_pathtarget(root, tlist);
793 :
7818 tgl 794 ECB : /*
795 : * Append the child results together.
796 : */
1828 alvherre 797 CBC 303 : path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
797 tgl 798 ECB : NIL, NULL, 0, false, -1);
2589 799 :
800 : /* Identify the grouping semantics */
5358 tgl 801 GIC 303 : groupList = generate_setop_grouplist(op, tlist);
802 :
803 : /*
5358 tgl 804 ECB : * Estimate number of distinct groups that we'll need hashtable entries
805 : * for; this is the size of the left-hand input for EXCEPT, or the smaller
806 : * input for INTERSECT. Also estimate the number of eventual output rows.
807 : * In non-ALL cases, we estimate each group produces one output row; in
5050 bruce 808 : * ALL cases use the relevant relation size. These are worst-case
809 : * estimates, of course, but we need to be conservative.
810 : */
5358 tgl 811 GIC 303 : if (op->op == SETOP_EXCEPT)
812 : {
813 195 : dNumGroups = dLeftGroups;
2589 814 195 : dNumOutputRows = op->all ? lpath->rows : dNumGroups;
815 : }
816 : else
817 : {
5358 tgl 818 CBC 108 : dNumGroups = Min(dLeftGroups, dRightGroups);
2589 tgl 819 GIC 108 : dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
5358 tgl 820 ECB : }
821 :
822 : /*
823 : * Decide whether to hash or sort, and add a sort node if needed.
824 : */
2589 tgl 825 CBC 303 : use_hash = choose_hashed_setop(root, groupList, path,
2589 tgl 826 ECB : dNumGroups, dNumOutputRows,
2118 tgl 827 GIC 303 : (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
828 :
1934 829 303 : if (groupList && !use_hash)
2589 830 54 : path = (Path *) create_sort_path(root,
831 : result_rel,
2589 tgl 832 ECB : path,
833 : make_pathkeys_for_sortclauses(root,
2118 834 : groupList,
835 : tlist),
2589 836 : -1.0);
5358 837 :
838 : /*
839 : * Finally, add a SetOp path node to generate the correct output.
840 : */
8221 tgl 841 GIC 303 : switch (op->op)
842 : {
843 108 : case SETOP_INTERSECT:
844 108 : cmd = op->all ? SETOPCMD_INTERSECT_ALL : SETOPCMD_INTERSECT;
845 108 : break;
846 195 : case SETOP_EXCEPT:
847 195 : cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
8221 tgl 848 CBC 195 : break;
8221 tgl 849 UIC 0 : default:
5358 tgl 850 LBC 0 : elog(ERROR, "unrecognized set op: %d", (int) op->op);
8053 bruce 851 ECB : cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */
8221 tgl 852 : break;
853 : }
2589 tgl 854 CBC 303 : path = (Path *) create_setop_path(root,
2589 tgl 855 ECB : result_rel,
2589 tgl 856 EUB : path,
857 : cmd,
858 : use_hash ? SETOP_HASHED : SETOP_SORTED,
859 : groupList,
2589 tgl 860 GIC 303 : list_length(op->colTypes) + 1,
2589 tgl 861 ECB : use_hash ? firstFlag : -1,
862 : dNumGroups,
863 : dNumOutputRows);
864 :
1847 rhaas 865 GIC 303 : result_rel->rows = path->rows;
866 303 : add_path(result_rel, path);
1847 rhaas 867 CBC 303 : return result_rel;
868 : }
869 :
870 : /*
871 : * Pull up children of a UNION node that are identically-propertied UNIONs.
8221 tgl 872 ECB : *
873 : * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
874 : * output rows will be lost anyway.
875 : *
876 : * NOTE: currently, we ignore collations while determining if a child has
877 : * the same properties. This is semantically sound only so long as all
878 : * collations have the same notion of equality. It is valid from an
879 : * implementation standpoint because we don't care about the ordering of
880 : * a UNION child's result: UNION ALL results are always unordered, and
881 : * generate_union_paths will force a fresh sort if the top level is a UNION.
882 : */
883 : static List *
1847 rhaas 884 GIC 2006 : plan_union_children(PlannerInfo *root,
885 : SetOperationStmt *top_union,
886 : List *refnames_tlist,
887 : List **tlist_list)
888 : {
889 2006 : List *pending_rels = list_make1(top_union);
890 2006 : List *result = NIL;
2589 tgl 891 ECB : List *child_tlist;
892 :
1847 rhaas 893 GIC 2006 : *tlist_list = NIL;
894 :
895 10782 : while (pending_rels != NIL)
8221 tgl 896 ECB : {
1847 rhaas 897 CBC 8776 : Node *setOp = linitial(pending_rels);
898 :
1847 rhaas 899 GIC 8776 : pending_rels = list_delete_first(pending_rels);
1847 rhaas 900 ECB :
1847 rhaas 901 GIC 8776 : if (IsA(setOp, SetOperationStmt))
9345 bruce 902 ECB : {
1847 rhaas 903 GIC 3442 : SetOperationStmt *op = (SetOperationStmt *) setOp;
1847 rhaas 904 ECB :
1847 rhaas 905 GIC 3442 : if (op->op == top_union->op &&
1847 rhaas 906 CBC 6788 : (op->all == top_union->all || op->all) &&
1847 rhaas 907 GIC 3391 : equal(op->colTypes, top_union->colTypes))
1847 rhaas 908 ECB : {
909 : /* Same UNION, so fold children into parent */
1847 rhaas 910 CBC 3385 : pending_rels = lcons(op->rarg, pending_rels);
1847 rhaas 911 GIC 3385 : pending_rels = lcons(op->larg, pending_rels);
1847 rhaas 912 CBC 3385 : continue;
1847 rhaas 913 ECB : }
914 : }
915 :
916 : /*
917 : * Not same, so plan this child separately.
918 : *
919 : * Note we disallow any resjunk columns in child results. This is
920 : * necessary since the Append node that implements the union won't do
921 : * any projection, and upper levels will get confused if some of our
922 : * output tuples have junk and some don't. This case only arises when
923 : * we have an EXCEPT or INTERSECT as child, else there won't be
924 : * resjunk anyway.
925 : */
1847 rhaas 926 GIC 5391 : result = lappend(result, recurse_set_operations(setOp, root,
927 : top_union->colTypes,
928 : top_union->colCollations,
929 : false, -1,
930 : refnames_tlist,
931 : &child_tlist,
932 : NULL));
1847 rhaas 933 CBC 5391 : *tlist_list = lappend(*tlist_list, child_tlist);
934 : }
935 :
2589 tgl 936 GIC 2006 : return result;
937 : }
938 :
939 : /*
2589 tgl 940 ECB : * Add nodes to the given path tree to unique-ify the result of a UNION.
941 : */
942 : static Path *
2589 tgl 943 CBC 1766 : make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
944 : PlannerInfo *root)
945 : {
2589 tgl 946 GIC 1766 : RelOptInfo *result_rel = fetch_upper_rel(root, UPPERREL_SETOP, NULL);
947 : List *groupList;
948 : double dNumGroups;
949 :
5358 tgl 950 ECB : /* Identify the grouping semantics */
2589 tgl 951 GIC 1766 : groupList = generate_setop_grouplist(op, tlist);
952 :
5358 tgl 953 ECB : /*
954 : * XXX for the moment, take the number of distinct groups as equal to the
955 : * total input size, ie, the worst case. This is too conservative, but
956 : * it's not clear how to get a decent estimate of the true size. One
957 : * should note as well the propensity of novices to write UNION rather
985 pg 958 : * than UNION ALL even when they don't expect any duplicates...
959 : */
2589 tgl 960 GIC 1766 : dNumGroups = path->rows;
961 :
962 : /* Decide whether to hash or sort */
963 1766 : if (choose_hashed_setop(root, groupList, path,
964 : dNumGroups, dNumGroups,
965 : "UNION"))
966 : {
5358 tgl 967 ECB : /* Hashed aggregate plan --- no sort needed */
2589 tgl 968 GIC 1614 : path = (Path *) create_agg_path(root,
969 : result_rel,
2589 tgl 970 ECB : path,
971 : create_pathtarget(root, tlist),
972 : AGG_HASHED,
973 : AGGSPLIT_SIMPLE,
974 : groupList,
975 : NIL,
976 : NULL,
977 : dNumGroups);
978 : }
979 : else
980 : {
981 : /* Sort and Unique */
1934 tgl 982 GIC 152 : if (groupList)
983 : path = (Path *)
984 143 : create_sort_path(root,
985 : result_rel,
986 : path,
987 : make_pathkeys_for_sortclauses(root,
988 : groupList,
1934 tgl 989 ECB : tlist),
990 : -1.0);
2589 tgl 991 CBC 152 : path = (Path *) create_upper_unique_path(root,
992 : result_rel,
993 : path,
2589 tgl 994 GIC 152 : list_length(path->pathkeys),
995 : dNumGroups);
996 : }
997 :
2589 tgl 998 CBC 1766 : return path;
999 : }
1000 :
1847 rhaas 1001 ECB : /*
1002 : * postprocess_setop_rel - perform steps required after adding paths
1003 : */
1004 : static void
1847 rhaas 1005 CBC 9299 : postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
1006 : {
1007 : /*
1008 : * We don't currently worry about allowing FDWs to contribute paths to
1009 : * this relation, but give extensions a chance.
1010 : */
1847 rhaas 1011 GIC 9299 : if (create_upper_paths_hook)
1847 rhaas 1012 LBC 0 : (*create_upper_paths_hook) (root, UPPERREL_SETOP,
1013 : NULL, rel, NULL);
1014 :
1015 : /* Select cheapest path */
1847 rhaas 1016 GIC 9299 : set_cheapest(rel);
1017 9299 : }
1847 rhaas 1018 ECB :
5358 tgl 1019 EUB : /*
1020 : * choose_hashed_setop - should we use hashing for a set operation?
1021 : */
1022 : static bool
5358 tgl 1023 CBC 2069 : choose_hashed_setop(PlannerInfo *root, List *groupClauses,
2589 tgl 1024 ECB : Path *input_path,
1025 : double dNumGroups, double dNumOutputRows,
1026 : const char *construct)
1027 : {
5358 tgl 1028 GIC 2069 : int numGroupCols = list_length(groupClauses);
623 1029 2069 : Size hash_mem_limit = get_hash_memory_limit();
5358 tgl 1030 ECB : bool can_sort;
1031 : bool can_hash;
1032 : Size hashentrysize;
1033 : Path hashed_p;
1034 : Path sorted_p;
2589 1035 : double tuple_fraction;
5358 1036 :
1037 : /* Check whether the operators support sorting or hashing */
5358 tgl 1038 GIC 2069 : can_sort = grouping_is_sortable(groupClauses);
1039 2069 : can_hash = grouping_is_hashable(groupClauses);
1040 2069 : if (can_hash && can_sort)
1041 : {
1042 : /* we have a meaningful choice to make, continue ... */
1043 : }
1044 369 : else if (can_hash)
5358 tgl 1045 CBC 303 : return true;
1046 66 : else if (can_sort)
1047 66 : return false;
1048 : else
5358 tgl 1049 UIC 0 : ereport(ERROR,
1050 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5050 bruce 1051 ECB : /* translator: %s is UNION, INTERSECT, or EXCEPT */
5358 tgl 1052 : errmsg("could not implement %s", construct),
1053 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
1054 :
1055 : /* Prefer sorting when enable_hashagg is off */
5358 tgl 1056 GBC 1700 : if (!enable_hashagg)
5358 tgl 1057 GIC 63 : return false;
1058 :
1059 : /*
1060 : * Don't do it if it doesn't look like the hashtable will fit into
1061 : * hash_mem.
1062 : */
2589 tgl 1063 CBC 1637 : hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
5358 tgl 1064 ECB :
623 tgl 1065 GIC 1637 : if (hashentrysize * dNumGroups > hash_mem_limit)
5358 tgl 1066 UIC 0 : return false;
1067 :
1068 : /*
1069 : * See if the estimated cost is no more than doing it the other way.
5358 tgl 1070 ECB : *
1071 : * We need to consider input_plan + hashagg versus input_plan + sort +
1072 : * group. Note that the actual result plan might involve a SetOp or
5358 tgl 1073 EUB : * Unique node, not Agg or Group, but the cost estimates for Agg and Group
1074 : * should be close enough for our purposes here.
1075 : *
1076 : * These path variables are dummies that just hold cost fields; we don't
1077 : * make actual Paths for these steps.
1078 : */
4368 tgl 1079 GIC 1637 : cost_agg(&hashed_p, root, AGG_HASHED, NULL,
1080 : numGroupCols, dNumGroups,
1081 : NIL,
1082 : input_path->startup_cost, input_path->total_cost,
1117 jdavis 1083 1637 : input_path->rows, input_path->pathtarget->width);
1084 :
1085 : /*
5358 tgl 1086 ECB : * Now for the sorted case. Note that the input is *always* unsorted,
1087 : * since it was made by appending unrelated sub-relations together.
1088 : */
2589 tgl 1089 GIC 1637 : sorted_p.startup_cost = input_path->startup_cost;
2589 tgl 1090 CBC 1637 : sorted_p.total_cost = input_path->total_cost;
1091 : /* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
5358 tgl 1092 GIC 1637 : cost_sort(&sorted_p, root, NIL, sorted_p.total_cost,
2589 1093 1637 : input_path->rows, input_path->pathtarget->width,
1094 : 0.0, work_mem, -1.0);
5358 1095 1637 : cost_group(&sorted_p, root, numGroupCols, dNumGroups,
1984 tgl 1096 ECB : NIL,
5358 1097 : sorted_p.startup_cost, sorted_p.total_cost,
1098 : input_path->rows);
1099 :
1100 : /*
1101 : * Now make the decision using the top-level tuple fraction. First we
1102 : * have to convert an absolute count (LIMIT) into fractional form.
1103 : */
2589 tgl 1104 GIC 1637 : tuple_fraction = root->tuple_fraction;
5358 1105 1637 : if (tuple_fraction >= 1.0)
5358 tgl 1106 UIC 0 : tuple_fraction /= dNumOutputRows;
1107 :
5358 tgl 1108 GIC 1637 : if (compare_fractional_path_costs(&hashed_p, &sorted_p,
1109 : tuple_fraction) < 0)
1110 : {
5358 tgl 1111 ECB : /* Hashed is cheaper, so use it */
5358 tgl 1112 CBC 1545 : return true;
5358 tgl 1113 EUB : }
5358 tgl 1114 GIC 92 : return false;
5358 tgl 1115 ECB : }
1116 :
1117 : /*
1118 : * Generate targetlist for a set-operation plan node
7908 1119 : *
1120 : * colTypes: OID list of set-op's result column datatypes
4376 1121 : * colCollations: OID list of set-op's result column collations
1122 : * flag: -1 if no flag column needed, 0 or 1 to create a const flag column
1123 : * varno: varno to use in generated Vars
1124 : * hack_constants: true to copy up constants (see comments in code)
1125 : * input_tlist: targetlist of this node's input node
1126 : * refnames_tlist: targetlist to take column names from
1127 : * trivial_tlist: output parameter, set to true if targetlist is trivial
1128 : */
1129 : static List *
4376 tgl 1130 GIC 6702 : generate_setop_tlist(List *colTypes, List *colCollations,
1131 : int flag,
1132 : Index varno,
1133 : bool hack_constants,
1134 : List *input_tlist,
1135 : List *refnames_tlist,
1136 : bool *trivial_tlist)
1137 : {
8221 1138 6702 : List *tlist = NIL;
8221 tgl 1139 CBC 6702 : int resno = 1;
1140 : ListCell *ctlc,
1141 : *cclc,
1142 : *itlc,
1143 : *rtlc;
1144 : TargetEntry *tle;
1145 : Node *expr;
1146 :
264 tgl 1147 GNC 6702 : *trivial_tlist = true; /* until proven differently */
1148 :
1501 tgl 1149 CBC 24820 : forfour(ctlc, colTypes, cclc, colCollations,
1501 tgl 1150 ECB : itlc, input_tlist, rtlc, refnames_tlist)
1151 : {
4376 tgl 1152 GIC 18118 : Oid colType = lfirst_oid(ctlc);
1153 18118 : Oid colColl = lfirst_oid(cclc);
1154 18118 : TargetEntry *inputtle = (TargetEntry *) lfirst(itlc);
1155 18118 : TargetEntry *reftle = (TargetEntry *) lfirst(rtlc);
1156 :
6577 1157 18118 : Assert(inputtle->resno == resno);
6577 tgl 1158 CBC 18118 : Assert(reftle->resno == resno);
6577 tgl 1159 GIC 18118 : Assert(!inputtle->resjunk);
6577 tgl 1160 CBC 18118 : Assert(!reftle->resjunk);
1161 :
1162 : /*
6385 bruce 1163 ECB : * Generate columns referencing input columns and having appropriate
1164 : * data types and column names. Insert datatype coercions where
1165 : * necessary.
8221 tgl 1166 : *
1167 : * HACK: constants in the input's targetlist are copied up as-is
6347 bruce 1168 : * rather than being referenced as subquery outputs. This is mainly
1169 : * to ensure that when we try to coerce them to the output column's
6385 1170 : * datatype, the right things happen for UNKNOWN constants. But do
1171 : * this only at the first level of subquery-scan plans; we don't want
1172 : * phony constants appearing in the output tlists of upper-level
1173 : * nodes!
1174 : *
1175 : * Note that copying a constant doesn't in itself require us to mark
1176 : * the tlist nontrivial; see trivial_subqueryscan() in setrefs.c.
1177 : */
8186 tgl 1178 GIC 18118 : if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
7423 1179 6334 : expr = (Node *) inputtle->expr;
1180 : else
6531 1181 47136 : expr = (Node *) makeVar(varno,
6577 1182 11784 : inputtle->resno,
1183 11784 : exprType((Node *) inputtle->expr),
1184 11784 : exprTypmod((Node *) inputtle->expr),
4443 peter_e 1185 11784 : exprCollation((Node *) inputtle->expr),
1186 : 0);
1187 :
6577 tgl 1188 18118 : if (exprType(expr) != colType)
1189 : {
1190 : /*
1191 : * Note: it's not really cool to be applying coerce_to_common_type
4376 tgl 1192 ECB : * here; one notable point is that assign_expr_collations never
1193 : * gets run on any generated nodes. For the moment that's not a
1194 : * problem because we force the correct exposed collation below.
1195 : * It would likely be best to make the parser generate the correct
1196 : * output tlist for every set-op to begin with, though.
1197 : */
7188 bruce 1198 CBC 614 : expr = coerce_to_common_type(NULL, /* no UNKNOWNs here */
7285 tgl 1199 ECB : expr,
1200 : colType,
1201 : "UNION/INTERSECT/EXCEPT");
264 tgl 1202 GNC 614 : *trivial_tlist = false; /* the coercion makes it not trivial */
7818 tgl 1203 ECB : }
1204 :
1205 : /*
1206 : * Ensure the tlist entry's exposed collation matches the set-op. This
1207 : * is necessary because plan_set_operations() reports the result
1208 : * ordering as a list of SortGroupClauses, which don't carry collation
1209 : * themselves but just refer to tlist entries. If we don't show the
1210 : * right collation then planner.c might do the wrong thing in
1211 : * higher-level queries.
1212 : *
4376 1213 : * Note we use RelabelType, not CollateExpr, since this expression
1214 : * will reach the executor without any further processing.
1215 : */
4376 tgl 1216 GIC 18118 : if (exprCollation(expr) != colColl)
1217 : {
963 tgl 1218 CBC 4989 : expr = applyRelabelType(expr,
1219 : exprType(expr), exprTypmod(expr), colColl,
1220 : COERCE_IMPLICIT_CAST, -1, false);
264 tgl 1221 GNC 4989 : *trivial_tlist = false; /* the relabel makes it not trivial */
1222 : }
1223 :
6577 tgl 1224 GIC 36236 : tle = makeTargetEntry((Expr *) expr,
1225 18118 : (AttrNumber) resno++,
1226 18118 : pstrdup(reftle->resname),
1227 : false);
1228 :
1229 : /*
1230 : * By convention, all non-resjunk columns in a setop tree have
1231 : * ressortgroupref equal to their resno. In some cases the ref isn't
1232 : * needed, but this is a cleaner way than modifying the tlist later.
1233 : */
2589 tgl 1234 CBC 18118 : tle->ressortgroupref = tle->resno;
1235 :
6577 1236 18118 : tlist = lappend(tlist, tle);
1237 : }
1238 :
8221 1239 6702 : if (flag >= 0)
1240 : {
1241 : /* Add a resjunk flag column */
7705 tgl 1242 ECB : /* flag value is the given constant */
7705 tgl 1243 CBC 606 : expr = (Node *) makeConst(INT4OID,
5867 tgl 1244 ECB : -1,
1245 : InvalidOid,
1246 : sizeof(int32),
1247 : Int32GetDatum(flag),
1248 : false,
1249 : true);
6577 tgl 1250 GIC 606 : tle = makeTargetEntry((Expr *) expr,
1251 606 : (AttrNumber) resno++,
6577 tgl 1252 ECB : pstrdup("flag"),
1253 : true);
6577 tgl 1254 CBC 606 : tlist = lappend(tlist, tle);
264 tgl 1255 GNC 606 : *trivial_tlist = false; /* the extra entry makes it not trivial */
1256 : }
1257 :
8221 tgl 1258 CBC 6702 : return tlist;
1259 : }
1260 :
1261 : /*
7705 tgl 1262 ECB : * Generate targetlist for a set-operation Append node
1263 : *
1264 : * colTypes: OID list of set-op's result column datatypes
1265 : * colCollations: OID list of set-op's result column collations
1266 : * flag: true to create a flag column copied up from subplans
1267 : * input_tlists: list of tlists for sub-plans of the Append
1268 : * refnames_tlist: targetlist to take column names from
7818 1269 : *
7705 1270 : * The entries in the Append's targetlist should always be simple Vars;
1271 : * we just have to make sure they have the right datatypes/typmods/collations.
1272 : * The Vars are always generated with varno 0.
2589 1273 : *
1274 : * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
1275 : * cannot figure out a realistic width for the tlist we make here. But we
1276 : * ought to refactor this code to produce a PathTarget directly, anyway.
7818 1277 : */
1278 : static List *
4376 tgl 1279 GIC 2666 : generate_append_tlist(List *colTypes, List *colCollations,
1280 : bool flag,
1281 : List *input_tlists,
1282 : List *refnames_tlist)
1283 : {
7705 1284 2666 : List *tlist = NIL;
1285 2666 : int resno = 1;
1286 : ListCell *curColType;
1287 : ListCell *curColCollation;
1288 : ListCell *ref_tl_item;
1289 : int colindex;
1290 : TargetEntry *tle;
1291 : Node *expr;
1292 : ListCell *tlistl;
1293 : int32 *colTypmods;
1294 :
1295 : /*
1296 : * First extract typmods to use.
1297 : *
7522 bruce 1298 ECB : * If the inputs all agree on type and typmod of a particular column, use
1299 : * that typmod; else use -1.
1300 : */
6882 tgl 1301 GIC 2666 : colTypmods = (int32 *) palloc(list_length(colTypes) * sizeof(int32));
1302 :
2589 tgl 1303 CBC 9377 : foreach(tlistl, input_tlists)
7818 tgl 1304 ECB : {
2589 tgl 1305 GIC 6711 : List *subtlist = (List *) lfirst(tlistl);
1306 : ListCell *subtlistl;
1307 :
6892 neilc 1308 6711 : curColType = list_head(colTypes);
7705 tgl 1309 6711 : colindex = 0;
2589 1310 25450 : foreach(subtlistl, subtlist)
1311 : {
1312 18739 : TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl);
1313 :
6577 1314 18739 : if (subtle->resjunk)
7818 1315 606 : continue;
6892 neilc 1316 18133 : Assert(curColType != NULL);
6577 tgl 1317 18133 : if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
1318 : {
1319 : /* If first subplan, copy the typmod; else compare */
6577 tgl 1320 CBC 18133 : int32 subtypmod = exprTypmod((Node *) subtle->expr);
1321 :
2589 1322 18133 : if (tlistl == list_head(input_tlists))
6577 tgl 1323 GIC 6696 : colTypmods[colindex] = subtypmod;
6577 tgl 1324 CBC 11437 : else if (subtypmod != colTypmods[colindex])
7705 tgl 1325 GIC 6 : colTypmods[colindex] = -1;
1326 : }
7705 tgl 1327 ECB : else
1328 : {
1329 : /* types disagree, so force typmod to -1 */
7705 tgl 1330 UIC 0 : colTypmods[colindex] = -1;
7705 tgl 1331 ECB : }
1364 tgl 1332 GIC 18133 : curColType = lnext(colTypes, curColType);
7705 tgl 1333 CBC 18133 : colindex++;
7818 tgl 1334 ECB : }
6892 neilc 1335 CBC 6711 : Assert(curColType == NULL);
7818 tgl 1336 ECB : }
1337 :
1338 : /*
7705 1339 : * Now we can build the tlist for the Append.
1340 : */
7705 tgl 1341 CBC 2666 : colindex = 0;
4376 1342 9362 : forthree(curColType, colTypes, curColCollation, colCollations,
4376 tgl 1343 ECB : ref_tl_item, refnames_tlist)
7705 1344 : {
6888 neilc 1345 GIC 6696 : Oid colType = lfirst_oid(curColType);
7705 tgl 1346 6696 : int32 colTypmod = colTypmods[colindex++];
4443 peter_e 1347 6696 : Oid colColl = lfirst_oid(curColCollation);
6892 neilc 1348 6696 : TargetEntry *reftle = (TargetEntry *) lfirst(ref_tl_item);
7705 tgl 1349 EUB :
6577 tgl 1350 GIC 6696 : Assert(reftle->resno == resno);
6577 tgl 1351 CBC 6696 : Assert(!reftle->resjunk);
7705 1352 6696 : expr = (Node *) makeVar(0,
1353 : resno,
7705 tgl 1354 ECB : colType,
1355 : colTypmod,
1356 : colColl,
1357 : 0);
6577 tgl 1358 GIC 13392 : tle = makeTargetEntry((Expr *) expr,
1359 6696 : (AttrNumber) resno++,
6577 tgl 1360 CBC 6696 : pstrdup(reftle->resname),
6577 tgl 1361 ECB : false);
1362 :
1363 : /*
2589 1364 : * By convention, all non-resjunk columns in a setop tree have
1365 : * ressortgroupref equal to their resno. In some cases the ref isn't
1366 : * needed, but this is a cleaner way than modifying the tlist later.
1367 : */
2589 tgl 1368 GIC 6696 : tle->ressortgroupref = tle->resno;
2589 tgl 1369 ECB :
6577 tgl 1370 CBC 6696 : tlist = lappend(tlist, tle);
7705 tgl 1371 ECB : }
1372 :
7705 tgl 1373 GIC 2666 : if (flag)
1374 : {
1375 : /* Add a resjunk flag column */
1376 : /* flag value is shown as copied up from subplan */
7705 tgl 1377 CBC 303 : expr = (Node *) makeVar(0,
6577 tgl 1378 ECB : resno,
7705 1379 : INT4OID,
1380 : -1,
1381 : InvalidOid,
1382 : 0);
6577 tgl 1383 GIC 303 : tle = makeTargetEntry((Expr *) expr,
1384 303 : (AttrNumber) resno++,
1385 : pstrdup("flag"),
1386 : true);
6577 tgl 1387 CBC 303 : tlist = lappend(tlist, tle);
1388 : }
7705 tgl 1389 ECB :
7705 tgl 1390 GIC 2666 : pfree(colTypmods);
1391 :
7705 tgl 1392 CBC 2666 : return tlist;
1393 : }
1394 :
1395 : /*
5358 tgl 1396 ECB : * generate_setop_grouplist
1397 : * Build a SortGroupClause list defining the sort/grouping properties
1398 : * of the setop's output columns.
1399 : *
1400 : * Parse analysis already determined the properties and built a suitable
1401 : * list, except that the entries do not have sortgrouprefs set because
1402 : * the parser output representation doesn't include a tlist for each
1403 : * setop. So what we need to do here is copy that list and install
1404 : * proper sortgrouprefs into it (copying those from the targetlist).
1405 : */
5365 1406 : static List *
5358 tgl 1407 GIC 2216 : generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
1408 : {
2222 peter_e 1409 CBC 2216 : List *grouplist = copyObject(op->groupClauses);
1410 : ListCell *lg;
5358 tgl 1411 ECB : ListCell *lt;
1412 :
5358 tgl 1413 GIC 2216 : lg = list_head(grouplist);
1414 7889 : foreach(lt, targetlist)
1415 : {
1416 5673 : TargetEntry *tle = (TargetEntry *) lfirst(lt);
1417 : SortGroupClause *sgc;
1418 :
1419 5673 : if (tle->resjunk)
1420 : {
1421 : /* resjunk columns should not have sortgrouprefs */
2589 1422 303 : Assert(tle->ressortgroupref == 0);
5358 1423 303 : continue; /* ignore resjunk columns */
1424 : }
1425 :
2589 tgl 1426 ECB : /* non-resjunk columns should have sortgroupref = resno */
2589 tgl 1427 GIC 5370 : Assert(tle->ressortgroupref == tle->resno);
5358 tgl 1428 ECB :
1429 : /* non-resjunk columns should have grouping clauses */
5358 tgl 1430 GIC 5370 : Assert(lg != NULL);
1431 5370 : sgc = (SortGroupClause *) lfirst(lg);
1364 tgl 1432 CBC 5370 : lg = lnext(grouplist, lg);
5358 1433 5370 : Assert(sgc->tleSortGroupRef == 0);
1434 :
2589 1435 5370 : sgc->tleSortGroupRef = tle->ressortgroupref;
1436 : }
5358 tgl 1437 GIC 2216 : Assert(lg == NULL);
5358 tgl 1438 CBC 2216 : return grouplist;
1439 : }
|