TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * prepunion.c
4 : * Routines to plan set-operation queries. The filename is a leftover
5 : * from a time when only UNIONs were implemented.
6 : *
7 : * There are two code paths in the planner for set-operation queries.
8 : * If a subquery consists entirely of simple UNION ALL operations, it
9 : * is converted into an "append relation". Otherwise, it is handled
10 : * by the general code in this module (plan_set_operations and its
11 : * subroutines). There is some support code here for the append-relation
12 : * case, but most of the heavy lifting for that is done elsewhere,
13 : * notably in prepjointree.c and allpaths.c.
14 : *
15 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
16 : * Portions Copyright (c) 1994, Regents of the University of California
17 : *
18 : *
19 : * IDENTIFICATION
20 : * src/backend/optimizer/prep/prepunion.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 : #include "postgres.h"
25 :
26 : #include "access/htup_details.h"
27 : #include "access/sysattr.h"
28 : #include "catalog/partition.h"
29 : #include "catalog/pg_inherits.h"
30 : #include "catalog/pg_type.h"
31 : #include "miscadmin.h"
32 : #include "nodes/makefuncs.h"
33 : #include "nodes/nodeFuncs.h"
34 : #include "optimizer/cost.h"
35 : #include "optimizer/pathnode.h"
36 : #include "optimizer/paths.h"
37 : #include "optimizer/planmain.h"
38 : #include "optimizer/planner.h"
39 : #include "optimizer/prep.h"
40 : #include "optimizer/tlist.h"
41 : #include "parser/parse_coerce.h"
42 : #include "parser/parsetree.h"
43 : #include "utils/lsyscache.h"
44 : #include "utils/rel.h"
45 : #include "utils/selfuncs.h"
46 : #include "utils/syscache.h"
47 :
48 :
49 : static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
50 : List *colTypes, List *colCollations,
51 : bool junkOK,
52 : int flag, List *refnames_tlist,
53 : List **pTargetList,
54 : double *pNumGroups);
55 : static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
56 : PlannerInfo *root,
57 : List *refnames_tlist,
58 : List **pTargetList);
59 : static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
60 : List *refnames_tlist,
61 : List **pTargetList);
62 : static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
63 : List *refnames_tlist,
64 : List **pTargetList);
65 : static List *plan_union_children(PlannerInfo *root,
66 : SetOperationStmt *top_union,
67 : List *refnames_tlist,
68 : List **tlist_list);
69 : static Path *make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
70 : PlannerInfo *root);
71 : static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
72 : static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
73 : Path *input_path,
74 : double dNumGroups, double dNumOutputRows,
75 : const char *construct);
76 : static List *generate_setop_tlist(List *colTypes, List *colCollations,
77 : int flag,
78 : Index varno,
79 : bool hack_constants,
80 : List *input_tlist,
81 : List *refnames_tlist,
82 : bool *trivial_tlist);
83 : static List *generate_append_tlist(List *colTypes, List *colCollations,
84 : bool flag,
85 : List *input_tlists,
86 : List *refnames_tlist);
87 : static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
88 :
89 :
90 : /*
91 : * plan_set_operations
92 : *
93 : * Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
94 : *
95 : * This routine only deals with the setOperations tree of the given query.
96 : * Any top-level ORDER BY requested in root->parse->sortClause will be handled
97 : * when we return to grouping_planner; likewise for LIMIT.
98 : *
99 : * What we return is an "upperrel" RelOptInfo containing at least one Path
100 : * that implements the set-operation tree. In addition, root->processed_tlist
101 : * receives a targetlist representing the output of the topmost setop node.
102 : */
103 : RelOptInfo *
104 GIC 2591 : plan_set_operations(PlannerInfo *root)
105 ECB : {
106 GIC 2591 : Query *parse = root->parse;
107 CBC 2591 : SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations);
108 ECB : Node *node;
109 : RangeTblEntry *leftmostRTE;
110 : Query *leftmostQuery;
111 : RelOptInfo *setop_rel;
112 : List *top_tlist;
113 :
114 GIC 2591 : Assert(topop);
115 ECB :
116 : /* check for unsupported stuff */
117 GIC 2591 : Assert(parse->jointree->fromlist == NIL);
118 CBC 2591 : Assert(parse->jointree->quals == NULL);
119 2591 : Assert(parse->groupClause == NIL);
120 2591 : Assert(parse->havingQual == NULL);
121 2591 : Assert(parse->windowClause == NIL);
122 2591 : Assert(parse->distinctClause == NIL);
123 ECB :
124 : /*
125 : * In the outer query level, we won't have any true equivalences to deal
126 : * with; but we do want to be able to make pathkeys, which will require
127 : * single-member EquivalenceClasses. Indicate that EC merging is complete
128 : * so that pathkeys.c won't complain.
129 : */
130 GIC 2591 : Assert(root->eq_classes == NIL);
131 CBC 2591 : root->ec_merging_done = true;
132 ECB :
133 : /*
134 : * We'll need to build RelOptInfos for each of the leaf subqueries, which
135 : * are RTE_SUBQUERY rangetable entries in this Query. Prepare the index
136 : * arrays for those, and for AppendRelInfos in case they're needed.
137 : */
138 GIC 2591 : setup_simple_rel_arrays(root);
139 ECB :
140 : /*
141 : * Find the leftmost component Query. We need to use its column names for
142 : * all generated tlists (else SELECT INTO won't work right).
143 : */
144 GIC 2591 : node = topop->larg;
145 CBC 4015 : while (node && IsA(node, SetOperationStmt))
146 1424 : node = ((SetOperationStmt *) node)->larg;
147 2591 : Assert(node && IsA(node, RangeTblRef));
148 2591 : leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex];
149 2591 : leftmostQuery = leftmostRTE->subquery;
150 2591 : Assert(leftmostQuery != NULL);
151 ECB :
152 : /*
153 : * If the topmost node is a recursive union, it needs special processing.
154 : */
155 GIC 2591 : if (root->hasRecursion)
156 ECB : {
157 GIC 357 : setop_rel = generate_recursion_path(topop, root,
158 ECB : leftmostQuery->targetList,
159 : &top_tlist);
160 : }
161 : else
162 : {
163 : /*
164 : * Recurse on setOperations tree to generate paths for set ops. The
165 : * final output paths should have just the column types shown as the
166 : * output from the top-level node, plus possibly resjunk working
167 : * columns (we can rely on upper-level nodes to deal with that).
168 : */
169 GIC 2234 : setop_rel = recurse_set_operations((Node *) topop, root,
170 ECB : topop->colTypes, topop->colCollations,
171 : true, -1,
172 : leftmostQuery->targetList,
173 : &top_tlist,
174 : NULL);
175 : }
176 :
177 : /* Must return the built tlist into root->processed_tlist. */
178 GIC 2588 : root->processed_tlist = top_tlist;
179 ECB :
180 GIC 2588 : return setop_rel;
181 ECB : }
182 :
183 : /*
184 : * recurse_set_operations
185 : * Recursively handle one step in a tree of set operations
186 : *
187 : * colTypes: OID list of set-op's result column datatypes
188 : * colCollations: OID list of set-op's result column collations
189 : * junkOK: if true, child resjunk columns may be left in the result
190 : * flag: if >= 0, add a resjunk output column indicating value of flag
191 : * refnames_tlist: targetlist to take column names from
192 : *
193 : * Returns a RelOptInfo for the subtree, as well as these output parameters:
194 : * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
195 : * *pNumGroups: if not NULL, we estimate the number of distinct groups
196 : * in the result, and store it there
197 : *
198 : * The pTargetList output parameter is mostly redundant with the pathtarget
199 : * of the returned RelOptInfo, but for the moment we need it because much of
200 : * the logic in this file depends on flag columns being marked resjunk.
201 : * Pending a redesign of how that works, this is the easy way out.
202 : *
203 : * We don't have to care about typmods here: the only allowed difference
204 : * between set-op input and output typmods is input is a specific typmod
205 : * and output is -1, and that does not require a coercion.
206 : */
207 : static RelOptInfo *
208 GIC 8945 : recurse_set_operations(Node *setOp, PlannerInfo *root,
209 ECB : List *colTypes, List *colCollations,
210 : bool junkOK,
211 : int flag, List *refnames_tlist,
212 : List **pTargetList,
213 : double *pNumGroups)
214 : {
215 GIC 8945 : RelOptInfo *rel = NULL; /* keep compiler quiet */
216 ECB :
217 : /* Guard against stack overflow due to overly complex setop nests */
218 GIC 8945 : check_stack_depth();
219 ECB :
220 GIC 8945 : if (IsA(setOp, RangeTblRef))
221 ECB : {
222 GIC 6636 : RangeTblRef *rtr = (RangeTblRef *) setOp;
223 CBC 6636 : RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
224 6636 : Query *subquery = rte->subquery;
225 ECB : PlannerInfo *subroot;
226 : RelOptInfo *final_rel;
227 : Path *subpath;
228 : Path *path;
229 : List *tlist;
230 : bool trivial_tlist;
231 :
232 GIC 6636 : Assert(subquery != NULL);
233 :
234 ECB : /* Build a RelOptInfo for this leaf subquery. */
235 GIC 6636 : rel = build_simple_rel(root, rtr->rtindex, NULL);
236 :
237 ECB : /* plan_params should not be in use in current query level */
238 GIC 6636 : Assert(root->plan_params == NIL);
239 :
240 ECB : /* Generate a subroot and Paths for the subquery */
241 GIC 6636 : subroot = rel->subroot = subquery_planner(root->glob, subquery,
242 : root,
243 ECB : false,
244 : root->tuple_fraction);
245 :
246 : /*
247 : * It should not be possible for the primitive query to contain any
248 : * cross-references to other primitive queries in the setop tree.
249 : */
250 GIC 6636 : if (root->plan_params)
251 UIC 0 : elog(ERROR, "unexpected outer reference in set operation subquery");
252 ECB :
253 EUB : /* Figure out the appropriate target list for this subquery. */
254 GIC 6636 : tlist = generate_setop_tlist(colTypes, colCollations,
255 : flag,
256 CBC 6636 : rtr->rtindex,
257 : true,
258 ECB : subroot->processed_tlist,
259 : refnames_tlist,
260 : &trivial_tlist);
261 GIC 6636 : rel->reltarget = create_pathtarget(root, tlist);
262 :
263 : /* Return the fully-fledged tlist to caller, too */
264 CBC 6636 : *pTargetList = tlist;
265 :
266 : /*
267 ECB : * Mark rel with estimated output rows, width, etc. Note that we have
268 : * to do this before generating outer-query paths, else
269 : * cost_subqueryscan is not happy.
270 : */
271 GIC 6636 : set_subquery_size_estimates(root, rel);
272 :
273 : /*
274 ECB : * Since we may want to add a partial path to this relation, we must
275 : * set its consider_parallel flag correctly.
276 : */
277 GIC 6636 : final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
278 6636 : rel->consider_parallel = final_rel->consider_parallel;
279 :
280 ECB : /*
281 : * For the moment, we consider only a single Path for the subquery.
282 : * This should change soon (make it look more like
283 : * set_subquery_pathlist).
284 : */
285 GIC 6636 : subpath = get_cheapest_fractional_path(final_rel,
286 : root->tuple_fraction);
287 :
288 ECB : /*
289 : * Stick a SubqueryScanPath atop that.
290 : *
291 : * We don't bother to determine the subquery's output ordering since
292 : * it won't be reflected in the set-op result anyhow; so just label
293 : * the SubqueryScanPath with nil pathkeys. (XXX that should change
294 : * soon too, likely.)
295 : */
296 GIC 6636 : path = (Path *) create_subqueryscan_path(root, rel, subpath,
297 : trivial_tlist,
298 : NIL, NULL);
299 :
300 CBC 6636 : add_path(rel, path);
301 :
302 : /*
303 : * If we have a partial path for the child relation, we can use that
304 ECB : * to build a partial path for this relation. But there's no point in
305 : * considering any path but the cheapest.
306 : */
307 GIC 6636 : if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
308 3725 : final_rel->partial_pathlist != NIL)
309 : {
310 : Path *partial_subpath;
311 ECB : Path *partial_path;
312 :
313 GIC 6 : partial_subpath = linitial(final_rel->partial_pathlist);
314 : partial_path = (Path *)
315 6 : create_subqueryscan_path(root, rel, partial_subpath,
316 : trivial_tlist,
317 : NIL, NULL);
318 CBC 6 : add_partial_path(rel, partial_path);
319 : }
320 ECB :
321 : /*
322 : * Estimate number of groups if caller wants it. If the subquery used
323 : * grouping or aggregation, its output is probably mostly unique
324 : * anyway; otherwise do statistical estimation.
325 : *
326 : * XXX you don't really want to know about this: we do the estimation
327 : * using the subquery's original targetlist expressions, not the
328 : * subroot->processed_tlist which might seem more appropriate. The
329 : * reason is that if the subquery is itself a setop, it may return a
330 : * processed_tlist containing "varno 0" Vars generated by
331 : * generate_append_tlist, and those would confuse estimate_num_groups
332 : * mightily. We ought to get rid of the "varno 0" hack, but that
333 : * requires a redesign of the parsetree representation of setops, so
334 : * that there can be an RTE corresponding to each setop's output.
335 : */
336 GIC 6636 : if (pNumGroups)
337 : {
338 591 : if (subquery->groupClause || subquery->groupingSets ||
339 591 : subquery->distinctClause ||
340 585 : subroot->hasHavingQual || subquery->hasAggs)
341 CBC 6 : *pNumGroups = subpath->rows;
342 : else
343 585 : *pNumGroups = estimate_num_groups(subroot,
344 ECB : get_tlist_exprs(subquery->targetList, false),
345 : subpath->rows,
346 : NULL,
347 : NULL);
348 : }
349 : }
350 GIC 2309 : else if (IsA(setOp, SetOperationStmt))
351 : {
352 2309 : SetOperationStmt *op = (SetOperationStmt *) setOp;
353 :
354 : /* UNIONs are much different from INTERSECT/EXCEPT */
355 CBC 2309 : if (op->op == SETOP_UNION)
356 GIC 2006 : rel = generate_union_paths(op, root,
357 ECB : refnames_tlist,
358 : pTargetList);
359 : else
360 CBC 303 : rel = generate_nonunion_paths(op, root,
361 ECB : refnames_tlist,
362 : pTargetList);
363 GIC 2309 : if (pNumGroups)
364 15 : *pNumGroups = rel->rows;
365 ECB :
366 : /*
367 : * If necessary, add a Result node to project the caller-requested
368 : * output columns.
369 : *
370 : * XXX you don't really want to know about this: setrefs.c will apply
371 : * fix_upper_expr() to the Result node's tlist. This would fail if the
372 : * Vars generated by generate_setop_tlist() were not exactly equal()
373 : * to the corresponding tlist entries of the subplan. However, since
374 : * the subplan was generated by generate_union_paths() or
375 : * generate_nonunion_paths(), and hence its tlist was generated by
376 : * generate_append_tlist(), this will work. We just tell
377 : * generate_setop_tlist() to use varno 0.
378 : */
379 GIC 2309 : if (flag >= 0 ||
380 2294 : !tlist_same_datatypes(*pTargetList, colTypes, junkOK) ||
381 2243 : !tlist_same_collations(*pTargetList, colCollations, junkOK))
382 : {
383 : PathTarget *target;
384 : bool trivial_tlist;
385 ECB : ListCell *lc;
386 :
387 CBC 66 : *pTargetList = generate_setop_tlist(colTypes, colCollations,
388 : flag,
389 : 0,
390 : false,
391 : *pTargetList,
392 : refnames_tlist,
393 : &trivial_tlist);
394 66 : target = create_pathtarget(root, *pTargetList);
395 :
396 : /* Apply projection to each path */
397 GIC 132 : foreach(lc, rel->pathlist)
398 : {
399 66 : Path *subpath = (Path *) lfirst(lc);
400 : Path *path;
401 ECB :
402 GIC 66 : Assert(subpath->param_info == NULL);
403 66 : path = apply_projection_to_path(root, subpath->parent,
404 ECB : subpath, target);
405 : /* If we had to add a Result, path is different from subpath */
406 CBC 66 : if (path != subpath)
407 GIC 66 : lfirst(lc) = path;
408 : }
409 ECB :
410 : /* Apply projection to each partial path */
411 GIC 66 : foreach(lc, rel->partial_pathlist)
412 : {
413 LBC 0 : Path *subpath = (Path *) lfirst(lc);
414 ECB : Path *path;
415 :
416 UIC 0 : Assert(subpath->param_info == NULL);
417 :
418 ECB : /* avoid apply_projection_to_path, in case of multiple refs */
419 UIC 0 : path = (Path *) create_projection_path(root, subpath->parent,
420 EUB : subpath, target);
421 UIC 0 : lfirst(lc) = path;
422 : }
423 EUB : }
424 : }
425 : else
426 : {
427 UIC 0 : elog(ERROR, "unrecognized node type: %d",
428 EUB : (int) nodeTag(setOp));
429 : *pTargetList = NIL;
430 : }
431 :
432 GIC 8945 : postprocess_setop_rel(root, rel);
433 :
434 GBC 8945 : return rel;
435 : }
436 :
437 : /*
438 : * Generate paths for a recursive UNION node
439 ECB : */
440 : static RelOptInfo *
441 CBC 357 : generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
442 : List *refnames_tlist,
443 : List **pTargetList)
444 : {
445 : RelOptInfo *result_rel;
446 : Path *path;
447 : RelOptInfo *lrel,
448 ECB : *rrel;
449 : Path *lpath;
450 : Path *rpath;
451 : List *lpath_tlist;
452 : List *rpath_tlist;
453 : List *tlist;
454 : List *groupList;
455 : double dNumGroups;
456 :
457 : /* Parser should have rejected other cases */
458 GIC 357 : if (setOp->op != SETOP_UNION)
459 UIC 0 : elog(ERROR, "only UNION queries can be recursive");
460 : /* Worktable ID should be assigned */
461 GIC 357 : Assert(root->wt_param_id >= 0);
462 :
463 : /*
464 : * Unlike a regular UNION node, process the left and right inputs
465 ECB : * separately without any intention of combining them into one Append.
466 EUB : */
467 GIC 357 : lrel = recurse_set_operations(setOp->larg, root,
468 ECB : setOp->colTypes, setOp->colCollations,
469 : false, -1,
470 : refnames_tlist,
471 : &lpath_tlist,
472 : NULL);
473 GIC 357 : lpath = lrel->cheapest_total_path;
474 ECB : /* The right path will want to look at the left one ... */
475 GIC 357 : root->non_recursive_path = lpath;
476 357 : rrel = recurse_set_operations(setOp->rarg, root,
477 : setOp->colTypes, setOp->colCollations,
478 : false, -1,
479 : refnames_tlist,
480 ECB : &rpath_tlist,
481 : NULL);
482 CBC 357 : rpath = rrel->cheapest_total_path;
483 357 : root->non_recursive_path = NULL;
484 :
485 : /*
486 : * Generate tlist for RecursiveUnion path node --- same as in Append cases
487 : */
488 GIC 357 : tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations, false,
489 CBC 357 : list_make2(lpath_tlist, rpath_tlist),
490 ECB : refnames_tlist);
491 :
492 GIC 357 : *pTargetList = tlist;
493 :
494 : /* Build result relation. */
495 CBC 357 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
496 357 : bms_union(lrel->relids, rrel->relids));
497 GIC 357 : result_rel->reltarget = create_pathtarget(root, tlist);
498 :
499 ECB : /*
500 : * If UNION, identify the grouping operators
501 : */
502 CBC 357 : if (setOp->all)
503 ECB : {
504 CBC 210 : groupList = NIL;
505 GIC 210 : dNumGroups = 0;
506 : }
507 : else
508 : {
509 ECB : /* Identify the grouping semantics */
510 GIC 147 : groupList = generate_setop_grouplist(setOp, tlist);
511 ECB :
512 : /* We only support hashing here */
513 GIC 147 : if (!grouping_is_hashable(groupList))
514 3 : ereport(ERROR,
515 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
516 : errmsg("could not implement recursive UNION"),
517 ECB : errdetail("All column datatypes must be hashable.")));
518 :
519 : /*
520 : * For the moment, take the number of distinct groups as equal to the
521 : * total input size, ie, the worst case.
522 : */
523 GIC 144 : dNumGroups = lpath->rows + rpath->rows * 10;
524 : }
525 :
526 : /*
527 : * And make the path node.
528 : */
529 354 : path = (Path *) create_recursiveunion_path(root,
530 ECB : result_rel,
531 : lpath,
532 : rpath,
533 GIC 354 : result_rel->reltarget,
534 : groupList,
535 : root->wt_param_id,
536 ECB : dNumGroups);
537 :
538 GIC 354 : add_path(result_rel, path);
539 354 : postprocess_setop_rel(root, result_rel);
540 CBC 354 : return result_rel;
541 : }
542 :
543 : /*
544 : * Generate paths for a UNION or UNION ALL node
545 ECB : */
546 : static RelOptInfo *
547 CBC 2006 : generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
548 : List *refnames_tlist,
549 : List **pTargetList)
550 : {
551 GIC 2006 : Relids relids = NULL;
552 : RelOptInfo *result_rel;
553 2006 : double save_fraction = root->tuple_fraction;
554 ECB : ListCell *lc;
555 GIC 2006 : List *pathlist = NIL;
556 2006 : List *partial_pathlist = NIL;
557 2006 : bool partial_paths_valid = true;
558 CBC 2006 : bool consider_parallel = true;
559 : List *rellist;
560 ECB : List *tlist_list;
561 : List *tlist;
562 : Path *path;
563 :
564 : /*
565 : * If plain UNION, tell children to fetch all tuples.
566 : *
567 : * Note: in UNION ALL, we pass the top-level tuple_fraction unmodified to
568 : * each arm of the UNION ALL. One could make a case for reducing the
569 : * tuple fraction for later arms (discounting by the expected size of the
570 : * earlier arms' results) but it seems not worth the trouble. The normal
571 : * case where tuple_fraction isn't already zero is a LIMIT at top level,
572 : * and passing it down as-is is usually enough to get the desired result
573 : * of preferring fast-start plans.
574 : */
575 GIC 2006 : if (!op->all)
576 1763 : root->tuple_fraction = 0.0;
577 :
578 : /*
579 : * If any of my children are identical UNION nodes (same op, all-flag, and
580 : * colTypes) then they can be merged into this node so that we generate
581 : * only one Append and unique-ification for the lot. Recurse to find such
582 ECB : * nodes and compute their children's paths.
583 : */
584 GIC 2006 : rellist = plan_union_children(root, op, refnames_tlist, &tlist_list);
585 :
586 : /*
587 : * Generate tlist for Append plan node.
588 : *
589 : * The tlist for an Append plan isn't important as far as the Append is
590 : * concerned, but we must make it look real anyway for the benefit of the
591 ECB : * next plan level up.
592 : */
593 GIC 2006 : tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
594 : tlist_list, refnames_tlist);
595 :
596 2006 : *pTargetList = tlist;
597 :
598 : /* Build path lists and relid set. */
599 7397 : foreach(lc, rellist)
600 ECB : {
601 GIC 5391 : RelOptInfo *rel = lfirst(lc);
602 :
603 CBC 5391 : pathlist = lappend(pathlist, rel->cheapest_total_path);
604 :
605 GIC 5391 : if (consider_parallel)
606 ECB : {
607 GIC 3577 : if (!rel->consider_parallel)
608 ECB : {
609 GIC 1700 : consider_parallel = false;
610 CBC 1700 : partial_paths_valid = false;
611 : }
612 1877 : else if (rel->partial_pathlist == NIL)
613 GIC 1871 : partial_paths_valid = false;
614 ECB : else
615 GIC 6 : partial_pathlist = lappend(partial_pathlist,
616 CBC 6 : linitial(rel->partial_pathlist));
617 ECB : }
618 :
619 CBC 5391 : relids = bms_union(relids, rel->relids);
620 ECB : }
621 :
622 : /* Build result relation. */
623 CBC 2006 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
624 GIC 2006 : result_rel->reltarget = create_pathtarget(root, tlist);
625 2006 : result_rel->consider_parallel = consider_parallel;
626 ECB :
627 : /*
628 : * Append the child results together.
629 : */
630 CBC 2006 : path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
631 ECB : NIL, NULL, 0, false, -1);
632 :
633 : /*
634 : * For UNION ALL, we just need the Append path. For UNION, need to add
635 : * node(s) to remove duplicates.
636 : */
637 CBC 2006 : if (!op->all)
638 GIC 1763 : path = make_union_unique(op, path, tlist, root);
639 :
640 2006 : add_path(result_rel, path);
641 :
642 : /*
643 : * Estimate number of groups. For now we just assume the output is unique
644 ECB : * --- this is certainly true for the UNION case, and we want worst-case
645 : * estimates anyway.
646 : */
647 CBC 2006 : result_rel->rows = path->rows;
648 :
649 : /*
650 : * Now consider doing the same thing using the partial paths plus Append
651 : * plus Gather.
652 : */
653 GIC 2006 : if (partial_paths_valid)
654 ECB : {
655 : Path *ppath;
656 GIC 3 : int parallel_workers = 0;
657 :
658 : /* Find the highest number of workers requested for any subpath. */
659 CBC 9 : foreach(lc, partial_pathlist)
660 : {
661 GNC 6 : Path *subpath = lfirst(lc);
662 ECB :
663 GNC 6 : parallel_workers = Max(parallel_workers,
664 : subpath->parallel_workers);
665 : }
666 CBC 3 : Assert(parallel_workers > 0);
667 :
668 ECB : /*
669 : * If the use of parallel append is permitted, always request at least
670 : * log2(# of children) paths. We assume it can be useful to have
671 : * extra workers in this case because they will be spread out across
672 : * the children. The precise formula is just a guess; see
673 : * add_paths_to_append_rel.
674 : */
675 GIC 3 : if (enable_parallel_append)
676 : {
677 3 : parallel_workers = Max(parallel_workers,
678 : pg_leftmost_one_pos32(list_length(partial_pathlist)) + 1);
679 3 : parallel_workers = Min(parallel_workers,
680 : max_parallel_workers_per_gather);
681 : }
682 CBC 3 : Assert(parallel_workers > 0);
683 :
684 ECB : ppath = (Path *)
685 GIC 3 : create_append_path(root, result_rel, NIL, partial_pathlist,
686 ECB : NIL, NULL,
687 : parallel_workers, enable_parallel_append,
688 : -1);
689 : ppath = (Path *)
690 GIC 3 : create_gather_path(root, result_rel, ppath,
691 3 : result_rel->reltarget, NULL, NULL);
692 CBC 3 : if (!op->all)
693 GIC 3 : ppath = make_union_unique(op, ppath, tlist, root);
694 3 : add_path(result_rel, ppath);
695 : }
696 :
697 ECB : /* Undo effects of possibly forcing tuple_fraction to 0 */
698 CBC 2006 : root->tuple_fraction = save_fraction;
699 ECB :
700 CBC 2006 : return result_rel;
701 ECB : }
702 :
703 : /*
704 : * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
705 : */
706 : static RelOptInfo *
707 CBC 303 : generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
708 : List *refnames_tlist,
709 : List **pTargetList)
710 : {
711 : RelOptInfo *result_rel;
712 : RelOptInfo *lrel,
713 : *rrel;
714 303 : double save_fraction = root->tuple_fraction;
715 : Path *lpath,
716 : *rpath,
717 : *path;
718 : List *lpath_tlist,
719 : *rpath_tlist,
720 : *tlist_list,
721 ECB : *tlist,
722 : *groupList,
723 : *pathlist;
724 : double dLeftGroups,
725 : dRightGroups,
726 : dNumGroups,
727 : dNumOutputRows;
728 : bool use_hash;
729 : SetOpCmd cmd;
730 : int firstFlag;
731 :
732 : /*
733 : * Tell children to fetch all tuples.
734 : */
735 GIC 303 : root->tuple_fraction = 0.0;
736 :
737 : /* Recurse on children, ensuring their outputs are marked */
738 303 : lrel = recurse_set_operations(op->larg, root,
739 : op->colTypes, op->colCollations,
740 : false, 0,
741 : refnames_tlist,
742 ECB : &lpath_tlist,
743 : &dLeftGroups);
744 GIC 303 : lpath = lrel->cheapest_total_path;
745 CBC 303 : rrel = recurse_set_operations(op->rarg, root,
746 : op->colTypes, op->colCollations,
747 : false, 1,
748 : refnames_tlist,
749 : &rpath_tlist,
750 : &dRightGroups);
751 303 : rpath = rrel->cheapest_total_path;
752 ECB :
753 : /* Undo effects of forcing tuple_fraction to 0 */
754 GIC 303 : root->tuple_fraction = save_fraction;
755 :
756 : /*
757 : * For EXCEPT, we must put the left input first. For INTERSECT, either
758 ECB : * order should give the same results, and we prefer to put the smaller
759 : * input first in order to minimize the size of the hash table in the
760 : * hashing case. "Smaller" means the one with the fewer groups.
761 : */
762 GIC 303 : if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups)
763 288 : {
764 288 : pathlist = list_make2(lpath, rpath);
765 288 : tlist_list = list_make2(lpath_tlist, rpath_tlist);
766 288 : firstFlag = 0;
767 : }
768 : else
769 ECB : {
770 CBC 15 : pathlist = list_make2(rpath, lpath);
771 15 : tlist_list = list_make2(rpath_tlist, lpath_tlist);
772 15 : firstFlag = 1;
773 ECB : }
774 :
775 : /*
776 : * Generate tlist for Append plan node.
777 : *
778 : * The tlist for an Append plan isn't important as far as the Append is
779 : * concerned, but we must make it look real anyway for the benefit of the
780 : * next plan level up. In fact, it has to be real enough that the flag
781 : * column is shown as a variable not a constant, else setrefs.c will get
782 : * confused.
783 : */
784 GIC 303 : tlist = generate_append_tlist(op->colTypes, op->colCollations, true,
785 : tlist_list, refnames_tlist);
786 :
787 303 : *pTargetList = tlist;
788 :
789 : /* Build result relation. */
790 303 : result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
791 CBC 303 : bms_union(lrel->relids, rrel->relids));
792 GIC 303 : result_rel->reltarget = create_pathtarget(root, tlist);
793 :
794 ECB : /*
795 : * Append the child results together.
796 : */
797 CBC 303 : path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
798 ECB : NIL, NULL, 0, false, -1);
799 :
800 : /* Identify the grouping semantics */
801 GIC 303 : groupList = generate_setop_grouplist(op, tlist);
802 :
803 : /*
804 ECB : * Estimate number of distinct groups that we'll need hashtable entries
805 : * for; this is the size of the left-hand input for EXCEPT, or the smaller
806 : * input for INTERSECT. Also estimate the number of eventual output rows.
807 : * In non-ALL cases, we estimate each group produces one output row; in
808 : * ALL cases use the relevant relation size. These are worst-case
809 : * estimates, of course, but we need to be conservative.
810 : */
811 GIC 303 : if (op->op == SETOP_EXCEPT)
812 : {
813 195 : dNumGroups = dLeftGroups;
814 195 : dNumOutputRows = op->all ? lpath->rows : dNumGroups;
815 : }
816 : else
817 : {
818 CBC 108 : dNumGroups = Min(dLeftGroups, dRightGroups);
819 GIC 108 : dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
820 ECB : }
821 :
822 : /*
823 : * Decide whether to hash or sort, and add a sort node if needed.
824 : */
825 CBC 303 : use_hash = choose_hashed_setop(root, groupList, path,
826 ECB : dNumGroups, dNumOutputRows,
827 GIC 303 : (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
828 :
829 303 : if (groupList && !use_hash)
830 54 : path = (Path *) create_sort_path(root,
831 : result_rel,
832 ECB : path,
833 : make_pathkeys_for_sortclauses(root,
834 : groupList,
835 : tlist),
836 : -1.0);
837 :
838 : /*
839 : * Finally, add a SetOp path node to generate the correct output.
840 : */
841 GIC 303 : switch (op->op)
842 : {
843 108 : case SETOP_INTERSECT:
844 108 : cmd = op->all ? SETOPCMD_INTERSECT_ALL : SETOPCMD_INTERSECT;
845 108 : break;
846 195 : case SETOP_EXCEPT:
847 195 : cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
848 CBC 195 : break;
849 UIC 0 : default:
850 LBC 0 : elog(ERROR, "unrecognized set op: %d", (int) op->op);
851 ECB : cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */
852 : break;
853 : }
854 CBC 303 : path = (Path *) create_setop_path(root,
855 ECB : result_rel,
856 EUB : path,
857 : cmd,
858 : use_hash ? SETOP_HASHED : SETOP_SORTED,
859 : groupList,
860 GIC 303 : list_length(op->colTypes) + 1,
861 ECB : use_hash ? firstFlag : -1,
862 : dNumGroups,
863 : dNumOutputRows);
864 :
865 GIC 303 : result_rel->rows = path->rows;
866 303 : add_path(result_rel, path);
867 CBC 303 : return result_rel;
868 : }
869 :
870 : /*
871 : * Pull up children of a UNION node that are identically-propertied UNIONs.
872 ECB : *
873 : * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
874 : * output rows will be lost anyway.
875 : *
876 : * NOTE: currently, we ignore collations while determining if a child has
877 : * the same properties. This is semantically sound only so long as all
878 : * collations have the same notion of equality. It is valid from an
879 : * implementation standpoint because we don't care about the ordering of
880 : * a UNION child's result: UNION ALL results are always unordered, and
881 : * generate_union_paths will force a fresh sort if the top level is a UNION.
882 : */
883 : static List *
884 GIC 2006 : plan_union_children(PlannerInfo *root,
885 : SetOperationStmt *top_union,
886 : List *refnames_tlist,
887 : List **tlist_list)
888 : {
889 2006 : List *pending_rels = list_make1(top_union);
890 2006 : List *result = NIL;
891 ECB : List *child_tlist;
892 :
893 GIC 2006 : *tlist_list = NIL;
894 :
895 10782 : while (pending_rels != NIL)
896 ECB : {
897 CBC 8776 : Node *setOp = linitial(pending_rels);
898 :
899 GIC 8776 : pending_rels = list_delete_first(pending_rels);
900 ECB :
901 GIC 8776 : if (IsA(setOp, SetOperationStmt))
902 ECB : {
903 GIC 3442 : SetOperationStmt *op = (SetOperationStmt *) setOp;
904 ECB :
905 GIC 3442 : if (op->op == top_union->op &&
906 CBC 6788 : (op->all == top_union->all || op->all) &&
907 GIC 3391 : equal(op->colTypes, top_union->colTypes))
908 ECB : {
909 : /* Same UNION, so fold children into parent */
910 CBC 3385 : pending_rels = lcons(op->rarg, pending_rels);
911 GIC 3385 : pending_rels = lcons(op->larg, pending_rels);
912 CBC 3385 : continue;
913 ECB : }
914 : }
915 :
916 : /*
917 : * Not same, so plan this child separately.
918 : *
919 : * Note we disallow any resjunk columns in child results. This is
920 : * necessary since the Append node that implements the union won't do
921 : * any projection, and upper levels will get confused if some of our
922 : * output tuples have junk and some don't. This case only arises when
923 : * we have an EXCEPT or INTERSECT as child, else there won't be
924 : * resjunk anyway.
925 : */
926 GIC 5391 : result = lappend(result, recurse_set_operations(setOp, root,
927 : top_union->colTypes,
928 : top_union->colCollations,
929 : false, -1,
930 : refnames_tlist,
931 : &child_tlist,
932 : NULL));
933 CBC 5391 : *tlist_list = lappend(*tlist_list, child_tlist);
934 : }
935 :
936 GIC 2006 : return result;
937 : }
938 :
939 : /*
940 ECB : * Add nodes to the given path tree to unique-ify the result of a UNION.
941 : */
942 : static Path *
943 CBC 1766 : make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
944 : PlannerInfo *root)
945 : {
946 GIC 1766 : RelOptInfo *result_rel = fetch_upper_rel(root, UPPERREL_SETOP, NULL);
947 : List *groupList;
948 : double dNumGroups;
949 :
950 ECB : /* Identify the grouping semantics */
951 GIC 1766 : groupList = generate_setop_grouplist(op, tlist);
952 :
953 ECB : /*
954 : * XXX for the moment, take the number of distinct groups as equal to the
955 : * total input size, ie, the worst case. This is too conservative, but
956 : * it's not clear how to get a decent estimate of the true size. One
957 : * should note as well the propensity of novices to write UNION rather
958 : * than UNION ALL even when they don't expect any duplicates...
959 : */
960 GIC 1766 : dNumGroups = path->rows;
961 :
962 : /* Decide whether to hash or sort */
963 1766 : if (choose_hashed_setop(root, groupList, path,
964 : dNumGroups, dNumGroups,
965 : "UNION"))
966 : {
967 ECB : /* Hashed aggregate plan --- no sort needed */
968 GIC 1614 : path = (Path *) create_agg_path(root,
969 : result_rel,
970 ECB : path,
971 : create_pathtarget(root, tlist),
972 : AGG_HASHED,
973 : AGGSPLIT_SIMPLE,
974 : groupList,
975 : NIL,
976 : NULL,
977 : dNumGroups);
978 : }
979 : else
980 : {
981 : /* Sort and Unique */
982 GIC 152 : if (groupList)
983 : path = (Path *)
984 143 : create_sort_path(root,
985 : result_rel,
986 : path,
987 : make_pathkeys_for_sortclauses(root,
988 : groupList,
989 ECB : tlist),
990 : -1.0);
991 CBC 152 : path = (Path *) create_upper_unique_path(root,
992 : result_rel,
993 : path,
994 GIC 152 : list_length(path->pathkeys),
995 : dNumGroups);
996 : }
997 :
998 CBC 1766 : return path;
999 : }
1000 :
1001 ECB : /*
1002 : * postprocess_setop_rel - perform steps required after adding paths
1003 : */
1004 : static void
1005 CBC 9299 : postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
1006 : {
1007 : /*
1008 : * We don't currently worry about allowing FDWs to contribute paths to
1009 : * this relation, but give extensions a chance.
1010 : */
1011 GIC 9299 : if (create_upper_paths_hook)
1012 LBC 0 : (*create_upper_paths_hook) (root, UPPERREL_SETOP,
1013 : NULL, rel, NULL);
1014 :
1015 : /* Select cheapest path */
1016 GIC 9299 : set_cheapest(rel);
1017 9299 : }
1018 ECB :
1019 EUB : /*
1020 : * choose_hashed_setop - should we use hashing for a set operation?
1021 : */
1022 : static bool
1023 CBC 2069 : choose_hashed_setop(PlannerInfo *root, List *groupClauses,
1024 ECB : Path *input_path,
1025 : double dNumGroups, double dNumOutputRows,
1026 : const char *construct)
1027 : {
1028 GIC 2069 : int numGroupCols = list_length(groupClauses);
1029 2069 : Size hash_mem_limit = get_hash_memory_limit();
1030 ECB : bool can_sort;
1031 : bool can_hash;
1032 : Size hashentrysize;
1033 : Path hashed_p;
1034 : Path sorted_p;
1035 : double tuple_fraction;
1036 :
1037 : /* Check whether the operators support sorting or hashing */
1038 GIC 2069 : can_sort = grouping_is_sortable(groupClauses);
1039 2069 : can_hash = grouping_is_hashable(groupClauses);
1040 2069 : if (can_hash && can_sort)
1041 : {
1042 : /* we have a meaningful choice to make, continue ... */
1043 : }
1044 369 : else if (can_hash)
1045 CBC 303 : return true;
1046 66 : else if (can_sort)
1047 66 : return false;
1048 : else
1049 UIC 0 : ereport(ERROR,
1050 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1051 ECB : /* translator: %s is UNION, INTERSECT, or EXCEPT */
1052 : errmsg("could not implement %s", construct),
1053 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
1054 :
1055 : /* Prefer sorting when enable_hashagg is off */
1056 GBC 1700 : if (!enable_hashagg)
1057 GIC 63 : return false;
1058 :
1059 : /*
1060 : * Don't do it if it doesn't look like the hashtable will fit into
1061 : * hash_mem.
1062 : */
1063 CBC 1637 : hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
1064 ECB :
1065 GIC 1637 : if (hashentrysize * dNumGroups > hash_mem_limit)
1066 UIC 0 : return false;
1067 :
1068 : /*
1069 : * See if the estimated cost is no more than doing it the other way.
1070 ECB : *
1071 : * We need to consider input_plan + hashagg versus input_plan + sort +
1072 : * group. Note that the actual result plan might involve a SetOp or
1073 EUB : * Unique node, not Agg or Group, but the cost estimates for Agg and Group
1074 : * should be close enough for our purposes here.
1075 : *
1076 : * These path variables are dummies that just hold cost fields; we don't
1077 : * make actual Paths for these steps.
1078 : */
1079 GIC 1637 : cost_agg(&hashed_p, root, AGG_HASHED, NULL,
1080 : numGroupCols, dNumGroups,
1081 : NIL,
1082 : input_path->startup_cost, input_path->total_cost,
1083 1637 : input_path->rows, input_path->pathtarget->width);
1084 :
1085 : /*
1086 ECB : * Now for the sorted case. Note that the input is *always* unsorted,
1087 : * since it was made by appending unrelated sub-relations together.
1088 : */
1089 GIC 1637 : sorted_p.startup_cost = input_path->startup_cost;
1090 CBC 1637 : sorted_p.total_cost = input_path->total_cost;
1091 : /* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
1092 GIC 1637 : cost_sort(&sorted_p, root, NIL, sorted_p.total_cost,
1093 1637 : input_path->rows, input_path->pathtarget->width,
1094 : 0.0, work_mem, -1.0);
1095 1637 : cost_group(&sorted_p, root, numGroupCols, dNumGroups,
1096 ECB : NIL,
1097 : sorted_p.startup_cost, sorted_p.total_cost,
1098 : input_path->rows);
1099 :
1100 : /*
1101 : * Now make the decision using the top-level tuple fraction. First we
1102 : * have to convert an absolute count (LIMIT) into fractional form.
1103 : */
1104 GIC 1637 : tuple_fraction = root->tuple_fraction;
1105 1637 : if (tuple_fraction >= 1.0)
1106 UIC 0 : tuple_fraction /= dNumOutputRows;
1107 :
1108 GIC 1637 : if (compare_fractional_path_costs(&hashed_p, &sorted_p,
1109 : tuple_fraction) < 0)
1110 : {
1111 ECB : /* Hashed is cheaper, so use it */
1112 CBC 1545 : return true;
1113 EUB : }
1114 GIC 92 : return false;
1115 ECB : }
1116 :
1117 : /*
1118 : * Generate targetlist for a set-operation plan node
1119 : *
1120 : * colTypes: OID list of set-op's result column datatypes
1121 : * colCollations: OID list of set-op's result column collations
1122 : * flag: -1 if no flag column needed, 0 or 1 to create a const flag column
1123 : * varno: varno to use in generated Vars
1124 : * hack_constants: true to copy up constants (see comments in code)
1125 : * input_tlist: targetlist of this node's input node
1126 : * refnames_tlist: targetlist to take column names from
1127 : * trivial_tlist: output parameter, set to true if targetlist is trivial
1128 : */
1129 : static List *
1130 GIC 6702 : generate_setop_tlist(List *colTypes, List *colCollations,
1131 : int flag,
1132 : Index varno,
1133 : bool hack_constants,
1134 : List *input_tlist,
1135 : List *refnames_tlist,
1136 : bool *trivial_tlist)
1137 : {
1138 6702 : List *tlist = NIL;
1139 CBC 6702 : int resno = 1;
1140 : ListCell *ctlc,
1141 : *cclc,
1142 : *itlc,
1143 : *rtlc;
1144 : TargetEntry *tle;
1145 : Node *expr;
1146 :
1147 GNC 6702 : *trivial_tlist = true; /* until proven differently */
1148 :
1149 CBC 24820 : forfour(ctlc, colTypes, cclc, colCollations,
1150 ECB : itlc, input_tlist, rtlc, refnames_tlist)
1151 : {
1152 GIC 18118 : Oid colType = lfirst_oid(ctlc);
1153 18118 : Oid colColl = lfirst_oid(cclc);
1154 18118 : TargetEntry *inputtle = (TargetEntry *) lfirst(itlc);
1155 18118 : TargetEntry *reftle = (TargetEntry *) lfirst(rtlc);
1156 :
1157 18118 : Assert(inputtle->resno == resno);
1158 CBC 18118 : Assert(reftle->resno == resno);
1159 GIC 18118 : Assert(!inputtle->resjunk);
1160 CBC 18118 : Assert(!reftle->resjunk);
1161 :
1162 : /*
1163 ECB : * Generate columns referencing input columns and having appropriate
1164 : * data types and column names. Insert datatype coercions where
1165 : * necessary.
1166 : *
1167 : * HACK: constants in the input's targetlist are copied up as-is
1168 : * rather than being referenced as subquery outputs. This is mainly
1169 : * to ensure that when we try to coerce them to the output column's
1170 : * datatype, the right things happen for UNKNOWN constants. But do
1171 : * this only at the first level of subquery-scan plans; we don't want
1172 : * phony constants appearing in the output tlists of upper-level
1173 : * nodes!
1174 : *
1175 : * Note that copying a constant doesn't in itself require us to mark
1176 : * the tlist nontrivial; see trivial_subqueryscan() in setrefs.c.
1177 : */
1178 GIC 18118 : if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
1179 6334 : expr = (Node *) inputtle->expr;
1180 : else
1181 47136 : expr = (Node *) makeVar(varno,
1182 11784 : inputtle->resno,
1183 11784 : exprType((Node *) inputtle->expr),
1184 11784 : exprTypmod((Node *) inputtle->expr),
1185 11784 : exprCollation((Node *) inputtle->expr),
1186 : 0);
1187 :
1188 18118 : if (exprType(expr) != colType)
1189 : {
1190 : /*
1191 : * Note: it's not really cool to be applying coerce_to_common_type
1192 ECB : * here; one notable point is that assign_expr_collations never
1193 : * gets run on any generated nodes. For the moment that's not a
1194 : * problem because we force the correct exposed collation below.
1195 : * It would likely be best to make the parser generate the correct
1196 : * output tlist for every set-op to begin with, though.
1197 : */
1198 CBC 614 : expr = coerce_to_common_type(NULL, /* no UNKNOWNs here */
1199 ECB : expr,
1200 : colType,
1201 : "UNION/INTERSECT/EXCEPT");
1202 GNC 614 : *trivial_tlist = false; /* the coercion makes it not trivial */
1203 ECB : }
1204 :
1205 : /*
1206 : * Ensure the tlist entry's exposed collation matches the set-op. This
1207 : * is necessary because plan_set_operations() reports the result
1208 : * ordering as a list of SortGroupClauses, which don't carry collation
1209 : * themselves but just refer to tlist entries. If we don't show the
1210 : * right collation then planner.c might do the wrong thing in
1211 : * higher-level queries.
1212 : *
1213 : * Note we use RelabelType, not CollateExpr, since this expression
1214 : * will reach the executor without any further processing.
1215 : */
1216 GIC 18118 : if (exprCollation(expr) != colColl)
1217 : {
1218 CBC 4989 : expr = applyRelabelType(expr,
1219 : exprType(expr), exprTypmod(expr), colColl,
1220 : COERCE_IMPLICIT_CAST, -1, false);
1221 GNC 4989 : *trivial_tlist = false; /* the relabel makes it not trivial */
1222 : }
1223 :
1224 GIC 36236 : tle = makeTargetEntry((Expr *) expr,
1225 18118 : (AttrNumber) resno++,
1226 18118 : pstrdup(reftle->resname),
1227 : false);
1228 :
1229 : /*
1230 : * By convention, all non-resjunk columns in a setop tree have
1231 : * ressortgroupref equal to their resno. In some cases the ref isn't
1232 : * needed, but this is a cleaner way than modifying the tlist later.
1233 : */
1234 CBC 18118 : tle->ressortgroupref = tle->resno;
1235 :
1236 18118 : tlist = lappend(tlist, tle);
1237 : }
1238 :
1239 6702 : if (flag >= 0)
1240 : {
1241 : /* Add a resjunk flag column */
1242 ECB : /* flag value is the given constant */
1243 CBC 606 : expr = (Node *) makeConst(INT4OID,
1244 ECB : -1,
1245 : InvalidOid,
1246 : sizeof(int32),
1247 : Int32GetDatum(flag),
1248 : false,
1249 : true);
1250 GIC 606 : tle = makeTargetEntry((Expr *) expr,
1251 606 : (AttrNumber) resno++,
1252 ECB : pstrdup("flag"),
1253 : true);
1254 CBC 606 : tlist = lappend(tlist, tle);
1255 GNC 606 : *trivial_tlist = false; /* the extra entry makes it not trivial */
1256 : }
1257 :
1258 CBC 6702 : return tlist;
1259 : }
1260 :
1261 : /*
1262 ECB : * Generate targetlist for a set-operation Append node
1263 : *
1264 : * colTypes: OID list of set-op's result column datatypes
1265 : * colCollations: OID list of set-op's result column collations
1266 : * flag: true to create a flag column copied up from subplans
1267 : * input_tlists: list of tlists for sub-plans of the Append
1268 : * refnames_tlist: targetlist to take column names from
1269 : *
1270 : * The entries in the Append's targetlist should always be simple Vars;
1271 : * we just have to make sure they have the right datatypes/typmods/collations.
1272 : * The Vars are always generated with varno 0.
1273 : *
1274 : * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
1275 : * cannot figure out a realistic width for the tlist we make here. But we
1276 : * ought to refactor this code to produce a PathTarget directly, anyway.
1277 : */
1278 : static List *
1279 GIC 2666 : generate_append_tlist(List *colTypes, List *colCollations,
1280 : bool flag,
1281 : List *input_tlists,
1282 : List *refnames_tlist)
1283 : {
1284 2666 : List *tlist = NIL;
1285 2666 : int resno = 1;
1286 : ListCell *curColType;
1287 : ListCell *curColCollation;
1288 : ListCell *ref_tl_item;
1289 : int colindex;
1290 : TargetEntry *tle;
1291 : Node *expr;
1292 : ListCell *tlistl;
1293 : int32 *colTypmods;
1294 :
1295 : /*
1296 : * First extract typmods to use.
1297 : *
1298 ECB : * If the inputs all agree on type and typmod of a particular column, use
1299 : * that typmod; else use -1.
1300 : */
1301 GIC 2666 : colTypmods = (int32 *) palloc(list_length(colTypes) * sizeof(int32));
1302 :
1303 CBC 9377 : foreach(tlistl, input_tlists)
1304 ECB : {
1305 GIC 6711 : List *subtlist = (List *) lfirst(tlistl);
1306 : ListCell *subtlistl;
1307 :
1308 6711 : curColType = list_head(colTypes);
1309 6711 : colindex = 0;
1310 25450 : foreach(subtlistl, subtlist)
1311 : {
1312 18739 : TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl);
1313 :
1314 18739 : if (subtle->resjunk)
1315 606 : continue;
1316 18133 : Assert(curColType != NULL);
1317 18133 : if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
1318 : {
1319 : /* If first subplan, copy the typmod; else compare */
1320 CBC 18133 : int32 subtypmod = exprTypmod((Node *) subtle->expr);
1321 :
1322 18133 : if (tlistl == list_head(input_tlists))
1323 GIC 6696 : colTypmods[colindex] = subtypmod;
1324 CBC 11437 : else if (subtypmod != colTypmods[colindex])
1325 GIC 6 : colTypmods[colindex] = -1;
1326 : }
1327 ECB : else
1328 : {
1329 : /* types disagree, so force typmod to -1 */
1330 UIC 0 : colTypmods[colindex] = -1;
1331 ECB : }
1332 GIC 18133 : curColType = lnext(colTypes, curColType);
1333 CBC 18133 : colindex++;
1334 ECB : }
1335 CBC 6711 : Assert(curColType == NULL);
1336 ECB : }
1337 :
1338 : /*
1339 : * Now we can build the tlist for the Append.
1340 : */
1341 CBC 2666 : colindex = 0;
1342 9362 : forthree(curColType, colTypes, curColCollation, colCollations,
1343 ECB : ref_tl_item, refnames_tlist)
1344 : {
1345 GIC 6696 : Oid colType = lfirst_oid(curColType);
1346 6696 : int32 colTypmod = colTypmods[colindex++];
1347 6696 : Oid colColl = lfirst_oid(curColCollation);
1348 6696 : TargetEntry *reftle = (TargetEntry *) lfirst(ref_tl_item);
1349 EUB :
1350 GIC 6696 : Assert(reftle->resno == resno);
1351 CBC 6696 : Assert(!reftle->resjunk);
1352 6696 : expr = (Node *) makeVar(0,
1353 : resno,
1354 ECB : colType,
1355 : colTypmod,
1356 : colColl,
1357 : 0);
1358 GIC 13392 : tle = makeTargetEntry((Expr *) expr,
1359 6696 : (AttrNumber) resno++,
1360 CBC 6696 : pstrdup(reftle->resname),
1361 ECB : false);
1362 :
1363 : /*
1364 : * By convention, all non-resjunk columns in a setop tree have
1365 : * ressortgroupref equal to their resno. In some cases the ref isn't
1366 : * needed, but this is a cleaner way than modifying the tlist later.
1367 : */
1368 GIC 6696 : tle->ressortgroupref = tle->resno;
1369 ECB :
1370 CBC 6696 : tlist = lappend(tlist, tle);
1371 ECB : }
1372 :
1373 GIC 2666 : if (flag)
1374 : {
1375 : /* Add a resjunk flag column */
1376 : /* flag value is shown as copied up from subplan */
1377 CBC 303 : expr = (Node *) makeVar(0,
1378 ECB : resno,
1379 : INT4OID,
1380 : -1,
1381 : InvalidOid,
1382 : 0);
1383 GIC 303 : tle = makeTargetEntry((Expr *) expr,
1384 303 : (AttrNumber) resno++,
1385 : pstrdup("flag"),
1386 : true);
1387 CBC 303 : tlist = lappend(tlist, tle);
1388 : }
1389 ECB :
1390 GIC 2666 : pfree(colTypmods);
1391 :
1392 CBC 2666 : return tlist;
1393 : }
1394 :
1395 : /*
1396 ECB : * generate_setop_grouplist
1397 : * Build a SortGroupClause list defining the sort/grouping properties
1398 : * of the setop's output columns.
1399 : *
1400 : * Parse analysis already determined the properties and built a suitable
1401 : * list, except that the entries do not have sortgrouprefs set because
1402 : * the parser output representation doesn't include a tlist for each
1403 : * setop. So what we need to do here is copy that list and install
1404 : * proper sortgrouprefs into it (copying those from the targetlist).
1405 : */
1406 : static List *
1407 GIC 2216 : generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
1408 : {
1409 CBC 2216 : List *grouplist = copyObject(op->groupClauses);
1410 : ListCell *lg;
1411 ECB : ListCell *lt;
1412 :
1413 GIC 2216 : lg = list_head(grouplist);
1414 7889 : foreach(lt, targetlist)
1415 : {
1416 5673 : TargetEntry *tle = (TargetEntry *) lfirst(lt);
1417 : SortGroupClause *sgc;
1418 :
1419 5673 : if (tle->resjunk)
1420 : {
1421 : /* resjunk columns should not have sortgrouprefs */
1422 303 : Assert(tle->ressortgroupref == 0);
1423 303 : continue; /* ignore resjunk columns */
1424 : }
1425 :
1426 ECB : /* non-resjunk columns should have sortgroupref = resno */
1427 GIC 5370 : Assert(tle->ressortgroupref == tle->resno);
1428 ECB :
1429 : /* non-resjunk columns should have grouping clauses */
1430 GIC 5370 : Assert(lg != NULL);
1431 5370 : sgc = (SortGroupClause *) lfirst(lg);
1432 CBC 5370 : lg = lnext(grouplist, lg);
1433 5370 : Assert(sgc->tleSortGroupRef == 0);
1434 :
1435 5370 : sgc->tleSortGroupRef = tle->ressortgroupref;
1436 : }
1437 GIC 2216 : Assert(lg == NULL);
1438 CBC 2216 : return grouplist;
1439 : }
|