LCOV - Differential Code Coverage HEAD vs 15 - src/backend/optimizer/prep/prepunion.c

LCOV - differential code coverage report

Current view:	top level - src/backend/optimizer/prep - prepunion.c (source / functions)		Coverage	Total	Hit	LBC	UIC	GBC	GIC	GNC	CBC	EUB	ECB
Current:	Differential Code Coverage HEAD vs 15	Lines:	96.2 %	370	356	3	11	2	234	6	114	12	239
Current Date:	2023-04-08 15:15:32	Functions:	100.0 %	12	12				12				12
Baseline:	15
Baseline Date:	2023-04-08 15:09:40
Legend:	Lines: hit not hit

           TLA  Line data    Source code

       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * prepunion.c
       4                 :  *    Routines to plan set-operation queries.  The filename is a leftover
       5                 :  *    from a time when only UNIONs were implemented.
       6                 :  *
       7                 :  * There are two code paths in the planner for set-operation queries.
       8                 :  * If a subquery consists entirely of simple UNION ALL operations, it
       9                 :  * is converted into an "append relation".  Otherwise, it is handled
      10                 :  * by the general code in this module (plan_set_operations and its
      11                 :  * subroutines).  There is some support code here for the append-relation
      12                 :  * case, but most of the heavy lifting for that is done elsewhere,
      13                 :  * notably in prepjointree.c and allpaths.c.
      14                 :  *
      15                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      16                 :  * Portions Copyright (c) 1994, Regents of the University of California
      17                 :  *
      18                 :  *
      19                 :  * IDENTIFICATION
      20                 :  *    src/backend/optimizer/prep/prepunion.c
      21                 :  *
      22                 :  *-------------------------------------------------------------------------
      23                 :  */
      24                 : #include "postgres.h"
      25                 : 
      26                 : #include "access/htup_details.h"
      27                 : #include "access/sysattr.h"
      28                 : #include "catalog/partition.h"
      29                 : #include "catalog/pg_inherits.h"
      30                 : #include "catalog/pg_type.h"
      31                 : #include "miscadmin.h"
      32                 : #include "nodes/makefuncs.h"
      33                 : #include "nodes/nodeFuncs.h"
      34                 : #include "optimizer/cost.h"
      35                 : #include "optimizer/pathnode.h"
      36                 : #include "optimizer/paths.h"
      37                 : #include "optimizer/planmain.h"
      38                 : #include "optimizer/planner.h"
      39                 : #include "optimizer/prep.h"
      40                 : #include "optimizer/tlist.h"
      41                 : #include "parser/parse_coerce.h"
      42                 : #include "parser/parsetree.h"
      43                 : #include "utils/lsyscache.h"
      44                 : #include "utils/rel.h"
      45                 : #include "utils/selfuncs.h"
      46                 : #include "utils/syscache.h"
      47                 : 
      48                 : 
      49                 : static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
      50                 :                                           List *colTypes, List *colCollations,
      51                 :                                           bool junkOK,
      52                 :                                           int flag, List *refnames_tlist,
      53                 :                                           List **pTargetList,
      54                 :                                           double *pNumGroups);
      55                 : static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
      56                 :                                            PlannerInfo *root,
      57                 :                                            List *refnames_tlist,
      58                 :                                            List **pTargetList);
      59                 : static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
      60                 :                                         List *refnames_tlist,
      61                 :                                         List **pTargetList);
      62                 : static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
      63                 :                                            List *refnames_tlist,
      64                 :                                            List **pTargetList);
      65                 : static List *plan_union_children(PlannerInfo *root,
      66                 :                                  SetOperationStmt *top_union,
      67                 :                                  List *refnames_tlist,
      68                 :                                  List **tlist_list);
      69                 : static Path *make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
      70                 :                                PlannerInfo *root);
      71                 : static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
      72                 : static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
      73                 :                                 Path *input_path,
      74                 :                                 double dNumGroups, double dNumOutputRows,
      75                 :                                 const char *construct);
      76                 : static List *generate_setop_tlist(List *colTypes, List *colCollations,
      77                 :                                   int flag,
      78                 :                                   Index varno,
      79                 :                                   bool hack_constants,
      80                 :                                   List *input_tlist,
      81                 :                                   List *refnames_tlist,
      82                 :                                   bool *trivial_tlist);
      83                 : static List *generate_append_tlist(List *colTypes, List *colCollations,
      84                 :                                    bool flag,
      85                 :                                    List *input_tlists,
      86                 :                                    List *refnames_tlist);
      87                 : static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
      88                 : 
      89                 : 
      90                 : /*
      91                 :  * plan_set_operations
      92                 :  *
      93                 :  *    Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
      94                 :  *
      95                 :  * This routine only deals with the setOperations tree of the given query.
      96                 :  * Any top-level ORDER BY requested in root->parse->sortClause will be handled
      97                 :  * when we return to grouping_planner; likewise for LIMIT.
      98                 :  *
      99                 :  * What we return is an "upperrel" RelOptInfo containing at least one Path
     100                 :  * that implements the set-operation tree.  In addition, root->processed_tlist
     101                 :  * receives a targetlist representing the output of the topmost setop node.
     102                 :  */
     103                 : RelOptInfo *
     104 GIC        2591 : plan_set_operations(PlannerInfo *root)
     105 ECB             : {
     106 GIC        2591 :     Query      *parse = root->parse;
     107 CBC        2591 :     SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations);
     108 ECB             :     Node       *node;
     109                 :     RangeTblEntry *leftmostRTE;
     110                 :     Query      *leftmostQuery;
     111                 :     RelOptInfo *setop_rel;
     112                 :     List       *top_tlist;
     113                 : 
     114 GIC        2591 :     Assert(topop);
     115 ECB             : 
     116                 :     /* check for unsupported stuff */
     117 GIC        2591 :     Assert(parse->jointree->fromlist == NIL);
     118 CBC        2591 :     Assert(parse->jointree->quals == NULL);
     119            2591 :     Assert(parse->groupClause == NIL);
     120            2591 :     Assert(parse->havingQual == NULL);
     121            2591 :     Assert(parse->windowClause == NIL);
     122            2591 :     Assert(parse->distinctClause == NIL);
     123 ECB             : 
     124                 :     /*
     125                 :      * In the outer query level, we won't have any true equivalences to deal
     126                 :      * with; but we do want to be able to make pathkeys, which will require
     127                 :      * single-member EquivalenceClasses.  Indicate that EC merging is complete
     128                 :      * so that pathkeys.c won't complain.
     129                 :      */
     130 GIC        2591 :     Assert(root->eq_classes == NIL);
     131 CBC        2591 :     root->ec_merging_done = true;
     132 ECB             : 
     133                 :     /*
     134                 :      * We'll need to build RelOptInfos for each of the leaf subqueries, which
     135                 :      * are RTE_SUBQUERY rangetable entries in this Query.  Prepare the index
     136                 :      * arrays for those, and for AppendRelInfos in case they're needed.
     137                 :      */
     138 GIC        2591 :     setup_simple_rel_arrays(root);
     139 ECB             : 
     140                 :     /*
     141                 :      * Find the leftmost component Query.  We need to use its column names for
     142                 :      * all generated tlists (else SELECT INTO won't work right).
     143                 :      */
     144 GIC        2591 :     node = topop->larg;
     145 CBC        4015 :     while (node && IsA(node, SetOperationStmt))
     146            1424 :         node = ((SetOperationStmt *) node)->larg;
     147            2591 :     Assert(node && IsA(node, RangeTblRef));
     148            2591 :     leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex];
     149            2591 :     leftmostQuery = leftmostRTE->subquery;
     150            2591 :     Assert(leftmostQuery != NULL);
     151 ECB             : 
     152                 :     /*
     153                 :      * If the topmost node is a recursive union, it needs special processing.
     154                 :      */
     155 GIC        2591 :     if (root->hasRecursion)
     156 ECB             :     {
     157 GIC         357 :         setop_rel = generate_recursion_path(topop, root,
     158 ECB             :                                             leftmostQuery->targetList,
     159                 :                                             &top_tlist);
     160                 :     }
     161                 :     else
     162                 :     {
     163                 :         /*
     164                 :          * Recurse on setOperations tree to generate paths for set ops. The
     165                 :          * final output paths should have just the column types shown as the
     166                 :          * output from the top-level node, plus possibly resjunk working
     167                 :          * columns (we can rely on upper-level nodes to deal with that).
     168                 :          */
     169 GIC        2234 :         setop_rel = recurse_set_operations((Node *) topop, root,
     170 ECB             :                                            topop->colTypes, topop->colCollations,
     171                 :                                            true, -1,
     172                 :                                            leftmostQuery->targetList,
     173                 :                                            &top_tlist,
     174                 :                                            NULL);
     175                 :     }
     176                 : 
     177                 :     /* Must return the built tlist into root->processed_tlist. */
     178 GIC        2588 :     root->processed_tlist = top_tlist;
     179 ECB             : 
     180 GIC        2588 :     return setop_rel;
     181 ECB             : }
     182                 : 
     183                 : /*
     184                 :  * recurse_set_operations
     185                 :  *    Recursively handle one step in a tree of set operations
     186                 :  *
     187                 :  * colTypes: OID list of set-op's result column datatypes
     188                 :  * colCollations: OID list of set-op's result column collations
     189                 :  * junkOK: if true, child resjunk columns may be left in the result
     190                 :  * flag: if >= 0, add a resjunk output column indicating value of flag
     191                 :  * refnames_tlist: targetlist to take column names from
     192                 :  *
     193                 :  * Returns a RelOptInfo for the subtree, as well as these output parameters:
     194                 :  * *pTargetList: receives the fully-fledged tlist for the subtree's top plan
     195                 :  * *pNumGroups: if not NULL, we estimate the number of distinct groups
     196                 :  *      in the result, and store it there
     197                 :  *
     198                 :  * The pTargetList output parameter is mostly redundant with the pathtarget
     199                 :  * of the returned RelOptInfo, but for the moment we need it because much of
     200                 :  * the logic in this file depends on flag columns being marked resjunk.
     201                 :  * Pending a redesign of how that works, this is the easy way out.
     202                 :  *
     203                 :  * We don't have to care about typmods here: the only allowed difference
     204                 :  * between set-op input and output typmods is input is a specific typmod
     205                 :  * and output is -1, and that does not require a coercion.
     206                 :  */
     207                 : static RelOptInfo *
     208 GIC        8945 : recurse_set_operations(Node *setOp, PlannerInfo *root,
     209 ECB             :                        List *colTypes, List *colCollations,
     210                 :                        bool junkOK,
     211                 :                        int flag, List *refnames_tlist,
     212                 :                        List **pTargetList,
     213                 :                        double *pNumGroups)
     214                 : {
     215 GIC        8945 :     RelOptInfo *rel = NULL;     /* keep compiler quiet */
     216 ECB             : 
     217                 :     /* Guard against stack overflow due to overly complex setop nests */
     218 GIC        8945 :     check_stack_depth();
     219 ECB             : 
     220 GIC        8945 :     if (IsA(setOp, RangeTblRef))
     221 ECB             :     {
     222 GIC        6636 :         RangeTblRef *rtr = (RangeTblRef *) setOp;
     223 CBC        6636 :         RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
     224            6636 :         Query      *subquery = rte->subquery;
     225 ECB             :         PlannerInfo *subroot;
     226                 :         RelOptInfo *final_rel;
     227                 :         Path       *subpath;
     228                 :         Path       *path;
     229                 :         List       *tlist;
     230                 :         bool        trivial_tlist;
     231                 : 
     232 GIC        6636 :         Assert(subquery != NULL);
     233                 : 
     234 ECB             :         /* Build a RelOptInfo for this leaf subquery. */
     235 GIC        6636 :         rel = build_simple_rel(root, rtr->rtindex, NULL);
     236                 : 
     237 ECB             :         /* plan_params should not be in use in current query level */
     238 GIC        6636 :         Assert(root->plan_params == NIL);
     239                 : 
     240 ECB             :         /* Generate a subroot and Paths for the subquery */
     241 GIC        6636 :         subroot = rel->subroot = subquery_planner(root->glob, subquery,
     242                 :                                                   root,
     243 ECB             :                                                   false,
     244                 :                                                   root->tuple_fraction);
     245                 : 
     246                 :         /*
     247                 :          * It should not be possible for the primitive query to contain any
     248                 :          * cross-references to other primitive queries in the setop tree.
     249                 :          */
     250 GIC        6636 :         if (root->plan_params)
     251 UIC           0 :             elog(ERROR, "unexpected outer reference in set operation subquery");
     252 ECB             : 
     253 EUB             :         /* Figure out the appropriate target list for this subquery. */
     254 GIC        6636 :         tlist = generate_setop_tlist(colTypes, colCollations,
     255                 :                                      flag,
     256 CBC        6636 :                                      rtr->rtindex,
     257                 :                                      true,
     258 ECB             :                                      subroot->processed_tlist,
     259                 :                                      refnames_tlist,
     260                 :                                      &trivial_tlist);
     261 GIC        6636 :         rel->reltarget = create_pathtarget(root, tlist);
     262                 : 
     263                 :         /* Return the fully-fledged tlist to caller, too */
     264 CBC        6636 :         *pTargetList = tlist;
     265                 : 
     266                 :         /*
     267 ECB             :          * Mark rel with estimated output rows, width, etc.  Note that we have
     268                 :          * to do this before generating outer-query paths, else
     269                 :          * cost_subqueryscan is not happy.
     270                 :          */
     271 GIC        6636 :         set_subquery_size_estimates(root, rel);
     272                 : 
     273                 :         /*
     274 ECB             :          * Since we may want to add a partial path to this relation, we must
     275                 :          * set its consider_parallel flag correctly.
     276                 :          */
     277 GIC        6636 :         final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
     278            6636 :         rel->consider_parallel = final_rel->consider_parallel;
     279                 : 
     280 ECB             :         /*
     281                 :          * For the moment, we consider only a single Path for the subquery.
     282                 :          * This should change soon (make it look more like
     283                 :          * set_subquery_pathlist).
     284                 :          */
     285 GIC        6636 :         subpath = get_cheapest_fractional_path(final_rel,
     286                 :                                                root->tuple_fraction);
     287                 : 
     288 ECB             :         /*
     289                 :          * Stick a SubqueryScanPath atop that.
     290                 :          *
     291                 :          * We don't bother to determine the subquery's output ordering since
     292                 :          * it won't be reflected in the set-op result anyhow; so just label
     293                 :          * the SubqueryScanPath with nil pathkeys.  (XXX that should change
     294                 :          * soon too, likely.)
     295                 :          */
     296 GIC        6636 :         path = (Path *) create_subqueryscan_path(root, rel, subpath,
     297                 :                                                  trivial_tlist,
     298                 :                                                  NIL, NULL);
     299                 : 
     300 CBC        6636 :         add_path(rel, path);
     301                 : 
     302                 :         /*
     303                 :          * If we have a partial path for the child relation, we can use that
     304 ECB             :          * to build a partial path for this relation.  But there's no point in
     305                 :          * considering any path but the cheapest.
     306                 :          */
     307 GIC        6636 :         if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
     308            3725 :             final_rel->partial_pathlist != NIL)
     309                 :         {
     310                 :             Path       *partial_subpath;
     311 ECB             :             Path       *partial_path;
     312                 : 
     313 GIC           6 :             partial_subpath = linitial(final_rel->partial_pathlist);
     314                 :             partial_path = (Path *)
     315               6 :                 create_subqueryscan_path(root, rel, partial_subpath,
     316                 :                                          trivial_tlist,
     317                 :                                          NIL, NULL);
     318 CBC           6 :             add_partial_path(rel, partial_path);
     319                 :         }
     320 ECB             : 
     321                 :         /*
     322                 :          * Estimate number of groups if caller wants it.  If the subquery used
     323                 :          * grouping or aggregation, its output is probably mostly unique
     324                 :          * anyway; otherwise do statistical estimation.
     325                 :          *
     326                 :          * XXX you don't really want to know about this: we do the estimation
     327                 :          * using the subquery's original targetlist expressions, not the
     328                 :          * subroot->processed_tlist which might seem more appropriate.  The
     329                 :          * reason is that if the subquery is itself a setop, it may return a
     330                 :          * processed_tlist containing "varno 0" Vars generated by
     331                 :          * generate_append_tlist, and those would confuse estimate_num_groups
     332                 :          * mightily.  We ought to get rid of the "varno 0" hack, but that
     333                 :          * requires a redesign of the parsetree representation of setops, so
     334                 :          * that there can be an RTE corresponding to each setop's output.
     335                 :          */
     336 GIC        6636 :         if (pNumGroups)
     337                 :         {
     338             591 :             if (subquery->groupClause || subquery->groupingSets ||
     339             591 :                 subquery->distinctClause ||
     340             585 :                 subroot->hasHavingQual || subquery->hasAggs)
     341 CBC           6 :                 *pNumGroups = subpath->rows;
     342                 :             else
     343             585 :                 *pNumGroups = estimate_num_groups(subroot,
     344 ECB             :                                                   get_tlist_exprs(subquery->targetList, false),
     345                 :                                                   subpath->rows,
     346                 :                                                   NULL,
     347                 :                                                   NULL);
     348                 :         }
     349                 :     }
     350 GIC        2309 :     else if (IsA(setOp, SetOperationStmt))
     351                 :     {
     352            2309 :         SetOperationStmt *op = (SetOperationStmt *) setOp;
     353                 : 
     354                 :         /* UNIONs are much different from INTERSECT/EXCEPT */
     355 CBC        2309 :         if (op->op == SETOP_UNION)
     356 GIC        2006 :             rel = generate_union_paths(op, root,
     357 ECB             :                                        refnames_tlist,
     358                 :                                        pTargetList);
     359                 :         else
     360 CBC         303 :             rel = generate_nonunion_paths(op, root,
     361 ECB             :                                           refnames_tlist,
     362                 :                                           pTargetList);
     363 GIC        2309 :         if (pNumGroups)
     364              15 :             *pNumGroups = rel->rows;
     365 ECB             : 
     366                 :         /*
     367                 :          * If necessary, add a Result node to project the caller-requested
     368                 :          * output columns.
     369                 :          *
     370                 :          * XXX you don't really want to know about this: setrefs.c will apply
     371                 :          * fix_upper_expr() to the Result node's tlist. This would fail if the
     372                 :          * Vars generated by generate_setop_tlist() were not exactly equal()
     373                 :          * to the corresponding tlist entries of the subplan. However, since
     374                 :          * the subplan was generated by generate_union_paths() or
     375                 :          * generate_nonunion_paths(), and hence its tlist was generated by
     376                 :          * generate_append_tlist(), this will work.  We just tell
     377                 :          * generate_setop_tlist() to use varno 0.
     378                 :          */
     379 GIC        2309 :         if (flag >= 0 ||
     380            2294 :             !tlist_same_datatypes(*pTargetList, colTypes, junkOK) ||
     381            2243 :             !tlist_same_collations(*pTargetList, colCollations, junkOK))
     382                 :         {
     383                 :             PathTarget *target;
     384                 :             bool        trivial_tlist;
     385 ECB             :             ListCell   *lc;
     386                 : 
     387 CBC          66 :             *pTargetList = generate_setop_tlist(colTypes, colCollations,
     388                 :                                                 flag,
     389                 :                                                 0,
     390                 :                                                 false,
     391                 :                                                 *pTargetList,
     392                 :                                                 refnames_tlist,
     393                 :                                                 &trivial_tlist);
     394              66 :             target = create_pathtarget(root, *pTargetList);
     395                 : 
     396                 :             /* Apply projection to each path */
     397 GIC         132 :             foreach(lc, rel->pathlist)
     398                 :             {
     399              66 :                 Path       *subpath = (Path *) lfirst(lc);
     400                 :                 Path       *path;
     401 ECB             : 
     402 GIC          66 :                 Assert(subpath->param_info == NULL);
     403              66 :                 path = apply_projection_to_path(root, subpath->parent,
     404 ECB             :                                                 subpath, target);
     405                 :                 /* If we had to add a Result, path is different from subpath */
     406 CBC          66 :                 if (path != subpath)
     407 GIC          66 :                     lfirst(lc) = path;
     408                 :             }
     409 ECB             : 
     410                 :             /* Apply projection to each partial path */
     411 GIC          66 :             foreach(lc, rel->partial_pathlist)
     412                 :             {
     413 LBC           0 :                 Path       *subpath = (Path *) lfirst(lc);
     414 ECB             :                 Path       *path;
     415                 : 
     416 UIC           0 :                 Assert(subpath->param_info == NULL);
     417                 : 
     418 ECB             :                 /* avoid apply_projection_to_path, in case of multiple refs */
     419 UIC           0 :                 path = (Path *) create_projection_path(root, subpath->parent,
     420 EUB             :                                                        subpath, target);
     421 UIC           0 :                 lfirst(lc) = path;
     422                 :             }
     423 EUB             :         }
     424                 :     }
     425                 :     else
     426                 :     {
     427 UIC           0 :         elog(ERROR, "unrecognized node type: %d",
     428 EUB             :              (int) nodeTag(setOp));
     429                 :         *pTargetList = NIL;
     430                 :     }
     431                 : 
     432 GIC        8945 :     postprocess_setop_rel(root, rel);
     433                 : 
     434 GBC        8945 :     return rel;
     435                 : }
     436                 : 
     437                 : /*
     438                 :  * Generate paths for a recursive UNION node
     439 ECB             :  */
     440                 : static RelOptInfo *
     441 CBC         357 : generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
     442                 :                         List *refnames_tlist,
     443                 :                         List **pTargetList)
     444                 : {
     445                 :     RelOptInfo *result_rel;
     446                 :     Path       *path;
     447                 :     RelOptInfo *lrel,
     448 ECB             :                *rrel;
     449                 :     Path       *lpath;
     450                 :     Path       *rpath;
     451                 :     List       *lpath_tlist;
     452                 :     List       *rpath_tlist;
     453                 :     List       *tlist;
     454                 :     List       *groupList;
     455                 :     double      dNumGroups;
     456                 : 
     457                 :     /* Parser should have rejected other cases */
     458 GIC         357 :     if (setOp->op != SETOP_UNION)
     459 UIC           0 :         elog(ERROR, "only UNION queries can be recursive");
     460                 :     /* Worktable ID should be assigned */
     461 GIC         357 :     Assert(root->wt_param_id >= 0);
     462                 : 
     463                 :     /*
     464                 :      * Unlike a regular UNION node, process the left and right inputs
     465 ECB             :      * separately without any intention of combining them into one Append.
     466 EUB             :      */
     467 GIC         357 :     lrel = recurse_set_operations(setOp->larg, root,
     468 ECB             :                                   setOp->colTypes, setOp->colCollations,
     469                 :                                   false, -1,
     470                 :                                   refnames_tlist,
     471                 :                                   &lpath_tlist,
     472                 :                                   NULL);
     473 GIC         357 :     lpath = lrel->cheapest_total_path;
     474 ECB             :     /* The right path will want to look at the left one ... */
     475 GIC         357 :     root->non_recursive_path = lpath;
     476             357 :     rrel = recurse_set_operations(setOp->rarg, root,
     477                 :                                   setOp->colTypes, setOp->colCollations,
     478                 :                                   false, -1,
     479                 :                                   refnames_tlist,
     480 ECB             :                                   &rpath_tlist,
     481                 :                                   NULL);
     482 CBC         357 :     rpath = rrel->cheapest_total_path;
     483             357 :     root->non_recursive_path = NULL;
     484                 : 
     485                 :     /*
     486                 :      * Generate tlist for RecursiveUnion path node --- same as in Append cases
     487                 :      */
     488 GIC         357 :     tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations, false,
     489 CBC         357 :                                   list_make2(lpath_tlist, rpath_tlist),
     490 ECB             :                                   refnames_tlist);
     491                 : 
     492 GIC         357 :     *pTargetList = tlist;
     493                 : 
     494                 :     /* Build result relation. */
     495 CBC         357 :     result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
     496             357 :                                  bms_union(lrel->relids, rrel->relids));
     497 GIC         357 :     result_rel->reltarget = create_pathtarget(root, tlist);
     498                 : 
     499 ECB             :     /*
     500                 :      * If UNION, identify the grouping operators
     501                 :      */
     502 CBC         357 :     if (setOp->all)
     503 ECB             :     {
     504 CBC         210 :         groupList = NIL;
     505 GIC         210 :         dNumGroups = 0;
     506                 :     }
     507                 :     else
     508                 :     {
     509 ECB             :         /* Identify the grouping semantics */
     510 GIC         147 :         groupList = generate_setop_grouplist(setOp, tlist);
     511 ECB             : 
     512                 :         /* We only support hashing here */
     513 GIC         147 :         if (!grouping_is_hashable(groupList))
     514               3 :             ereport(ERROR,
     515                 :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     516                 :                      errmsg("could not implement recursive UNION"),
     517 ECB             :                      errdetail("All column datatypes must be hashable.")));
     518                 : 
     519                 :         /*
     520                 :          * For the moment, take the number of distinct groups as equal to the
     521                 :          * total input size, ie, the worst case.
     522                 :          */
     523 GIC         144 :         dNumGroups = lpath->rows + rpath->rows * 10;
     524                 :     }
     525                 : 
     526                 :     /*
     527                 :      * And make the path node.
     528                 :      */
     529             354 :     path = (Path *) create_recursiveunion_path(root,
     530 ECB             :                                                result_rel,
     531                 :                                                lpath,
     532                 :                                                rpath,
     533 GIC         354 :                                                result_rel->reltarget,
     534                 :                                                groupList,
     535                 :                                                root->wt_param_id,
     536 ECB             :                                                dNumGroups);
     537                 : 
     538 GIC         354 :     add_path(result_rel, path);
     539             354 :     postprocess_setop_rel(root, result_rel);
     540 CBC         354 :     return result_rel;
     541                 : }
     542                 : 
     543                 : /*
     544                 :  * Generate paths for a UNION or UNION ALL node
     545 ECB             :  */
     546                 : static RelOptInfo *
     547 CBC        2006 : generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
     548                 :                      List *refnames_tlist,
     549                 :                      List **pTargetList)
     550                 : {
     551 GIC        2006 :     Relids      relids = NULL;
     552                 :     RelOptInfo *result_rel;
     553            2006 :     double      save_fraction = root->tuple_fraction;
     554 ECB             :     ListCell   *lc;
     555 GIC        2006 :     List       *pathlist = NIL;
     556            2006 :     List       *partial_pathlist = NIL;
     557            2006 :     bool        partial_paths_valid = true;
     558 CBC        2006 :     bool        consider_parallel = true;
     559                 :     List       *rellist;
     560 ECB             :     List       *tlist_list;
     561                 :     List       *tlist;
     562                 :     Path       *path;
     563                 : 
     564                 :     /*
     565                 :      * If plain UNION, tell children to fetch all tuples.
     566                 :      *
     567                 :      * Note: in UNION ALL, we pass the top-level tuple_fraction unmodified to
     568                 :      * each arm of the UNION ALL.  One could make a case for reducing the
     569                 :      * tuple fraction for later arms (discounting by the expected size of the
     570                 :      * earlier arms' results) but it seems not worth the trouble. The normal
     571                 :      * case where tuple_fraction isn't already zero is a LIMIT at top level,
     572                 :      * and passing it down as-is is usually enough to get the desired result
     573                 :      * of preferring fast-start plans.
     574                 :      */
     575 GIC        2006 :     if (!op->all)
     576            1763 :         root->tuple_fraction = 0.0;
     577                 : 
     578                 :     /*
     579                 :      * If any of my children are identical UNION nodes (same op, all-flag, and
     580                 :      * colTypes) then they can be merged into this node so that we generate
     581                 :      * only one Append and unique-ification for the lot.  Recurse to find such
     582 ECB             :      * nodes and compute their children's paths.
     583                 :      */
     584 GIC        2006 :     rellist = plan_union_children(root, op, refnames_tlist, &tlist_list);
     585                 : 
     586                 :     /*
     587                 :      * Generate tlist for Append plan node.
     588                 :      *
     589                 :      * The tlist for an Append plan isn't important as far as the Append is
     590                 :      * concerned, but we must make it look real anyway for the benefit of the
     591 ECB             :      * next plan level up.
     592                 :      */
     593 GIC        2006 :     tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
     594                 :                                   tlist_list, refnames_tlist);
     595                 : 
     596            2006 :     *pTargetList = tlist;
     597                 : 
     598                 :     /* Build path lists and relid set. */
     599            7397 :     foreach(lc, rellist)
     600 ECB             :     {
     601 GIC        5391 :         RelOptInfo *rel = lfirst(lc);
     602                 : 
     603 CBC        5391 :         pathlist = lappend(pathlist, rel->cheapest_total_path);
     604                 : 
     605 GIC        5391 :         if (consider_parallel)
     606 ECB             :         {
     607 GIC        3577 :             if (!rel->consider_parallel)
     608 ECB             :             {
     609 GIC        1700 :                 consider_parallel = false;
     610 CBC        1700 :                 partial_paths_valid = false;
     611                 :             }
     612            1877 :             else if (rel->partial_pathlist == NIL)
     613 GIC        1871 :                 partial_paths_valid = false;
     614 ECB             :             else
     615 GIC           6 :                 partial_pathlist = lappend(partial_pathlist,
     616 CBC           6 :                                            linitial(rel->partial_pathlist));
     617 ECB             :         }
     618                 : 
     619 CBC        5391 :         relids = bms_union(relids, rel->relids);
     620 ECB             :     }
     621                 : 
     622                 :     /* Build result relation. */
     623 CBC        2006 :     result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
     624 GIC        2006 :     result_rel->reltarget = create_pathtarget(root, tlist);
     625            2006 :     result_rel->consider_parallel = consider_parallel;
     626 ECB             : 
     627                 :     /*
     628                 :      * Append the child results together.
     629                 :      */
     630 CBC        2006 :     path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
     631 ECB             :                                        NIL, NULL, 0, false, -1);
     632                 : 
     633                 :     /*
     634                 :      * For UNION ALL, we just need the Append path.  For UNION, need to add
     635                 :      * node(s) to remove duplicates.
     636                 :      */
     637 CBC        2006 :     if (!op->all)
     638 GIC        1763 :         path = make_union_unique(op, path, tlist, root);
     639                 : 
     640            2006 :     add_path(result_rel, path);
     641                 : 
     642                 :     /*
     643                 :      * Estimate number of groups.  For now we just assume the output is unique
     644 ECB             :      * --- this is certainly true for the UNION case, and we want worst-case
     645                 :      * estimates anyway.
     646                 :      */
     647 CBC        2006 :     result_rel->rows = path->rows;
     648                 : 
     649                 :     /*
     650                 :      * Now consider doing the same thing using the partial paths plus Append
     651                 :      * plus Gather.
     652                 :      */
     653 GIC        2006 :     if (partial_paths_valid)
     654 ECB             :     {
     655                 :         Path       *ppath;
     656 GIC           3 :         int         parallel_workers = 0;
     657                 : 
     658                 :         /* Find the highest number of workers requested for any subpath. */
     659 CBC           9 :         foreach(lc, partial_pathlist)
     660                 :         {
     661 GNC           6 :             Path       *subpath = lfirst(lc);
     662 ECB             : 
     663 GNC           6 :             parallel_workers = Max(parallel_workers,
     664                 :                                    subpath->parallel_workers);
     665                 :         }
     666 CBC           3 :         Assert(parallel_workers > 0);
     667                 : 
     668 ECB             :         /*
     669                 :          * If the use of parallel append is permitted, always request at least
     670                 :          * log2(# of children) paths.  We assume it can be useful to have
     671                 :          * extra workers in this case because they will be spread out across
     672                 :          * the children.  The precise formula is just a guess; see
     673                 :          * add_paths_to_append_rel.
     674                 :          */
     675 GIC           3 :         if (enable_parallel_append)
     676                 :         {
     677               3 :             parallel_workers = Max(parallel_workers,
     678                 :                                    pg_leftmost_one_pos32(list_length(partial_pathlist)) + 1);
     679               3 :             parallel_workers = Min(parallel_workers,
     680                 :                                    max_parallel_workers_per_gather);
     681                 :         }
     682 CBC           3 :         Assert(parallel_workers > 0);
     683                 : 
     684 ECB             :         ppath = (Path *)
     685 GIC           3 :             create_append_path(root, result_rel, NIL, partial_pathlist,
     686 ECB             :                                NIL, NULL,
     687                 :                                parallel_workers, enable_parallel_append,
     688                 :                                -1);
     689                 :         ppath = (Path *)
     690 GIC           3 :             create_gather_path(root, result_rel, ppath,
     691               3 :                                result_rel->reltarget, NULL, NULL);
     692 CBC           3 :         if (!op->all)
     693 GIC           3 :             ppath = make_union_unique(op, ppath, tlist, root);
     694               3 :         add_path(result_rel, ppath);
     695                 :     }
     696                 : 
     697 ECB             :     /* Undo effects of possibly forcing tuple_fraction to 0 */
     698 CBC        2006 :     root->tuple_fraction = save_fraction;
     699 ECB             : 
     700 CBC        2006 :     return result_rel;
     701 ECB             : }
     702                 : 
     703                 : /*
     704                 :  * Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
     705                 :  */
     706                 : static RelOptInfo *
     707 CBC         303 : generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
     708                 :                         List *refnames_tlist,
     709                 :                         List **pTargetList)
     710                 : {
     711                 :     RelOptInfo *result_rel;
     712                 :     RelOptInfo *lrel,
     713                 :                *rrel;
     714             303 :     double      save_fraction = root->tuple_fraction;
     715                 :     Path       *lpath,
     716                 :                *rpath,
     717                 :                *path;
     718                 :     List       *lpath_tlist,
     719                 :                *rpath_tlist,
     720                 :                *tlist_list,
     721 ECB             :                *tlist,
     722                 :                *groupList,
     723                 :                *pathlist;
     724                 :     double      dLeftGroups,
     725                 :                 dRightGroups,
     726                 :                 dNumGroups,
     727                 :                 dNumOutputRows;
     728                 :     bool        use_hash;
     729                 :     SetOpCmd    cmd;
     730                 :     int         firstFlag;
     731                 : 
     732                 :     /*
     733                 :      * Tell children to fetch all tuples.
     734                 :      */
     735 GIC         303 :     root->tuple_fraction = 0.0;
     736                 : 
     737                 :     /* Recurse on children, ensuring their outputs are marked */
     738             303 :     lrel = recurse_set_operations(op->larg, root,
     739                 :                                   op->colTypes, op->colCollations,
     740                 :                                   false, 0,
     741                 :                                   refnames_tlist,
     742 ECB             :                                   &lpath_tlist,
     743                 :                                   &dLeftGroups);
     744 GIC         303 :     lpath = lrel->cheapest_total_path;
     745 CBC         303 :     rrel = recurse_set_operations(op->rarg, root,
     746                 :                                   op->colTypes, op->colCollations,
     747                 :                                   false, 1,
     748                 :                                   refnames_tlist,
     749                 :                                   &rpath_tlist,
     750                 :                                   &dRightGroups);
     751             303 :     rpath = rrel->cheapest_total_path;
     752 ECB             : 
     753                 :     /* Undo effects of forcing tuple_fraction to 0 */
     754 GIC         303 :     root->tuple_fraction = save_fraction;
     755                 : 
     756                 :     /*
     757                 :      * For EXCEPT, we must put the left input first.  For INTERSECT, either
     758 ECB             :      * order should give the same results, and we prefer to put the smaller
     759                 :      * input first in order to minimize the size of the hash table in the
     760                 :      * hashing case.  "Smaller" means the one with the fewer groups.
     761                 :      */
     762 GIC         303 :     if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups)
     763             288 :     {
     764             288 :         pathlist = list_make2(lpath, rpath);
     765             288 :         tlist_list = list_make2(lpath_tlist, rpath_tlist);
     766             288 :         firstFlag = 0;
     767                 :     }
     768                 :     else
     769 ECB             :     {
     770 CBC          15 :         pathlist = list_make2(rpath, lpath);
     771              15 :         tlist_list = list_make2(rpath_tlist, lpath_tlist);
     772              15 :         firstFlag = 1;
     773 ECB             :     }
     774                 : 
     775                 :     /*
     776                 :      * Generate tlist for Append plan node.
     777                 :      *
     778                 :      * The tlist for an Append plan isn't important as far as the Append is
     779                 :      * concerned, but we must make it look real anyway for the benefit of the
     780                 :      * next plan level up.  In fact, it has to be real enough that the flag
     781                 :      * column is shown as a variable not a constant, else setrefs.c will get
     782                 :      * confused.
     783                 :      */
     784 GIC         303 :     tlist = generate_append_tlist(op->colTypes, op->colCollations, true,
     785                 :                                   tlist_list, refnames_tlist);
     786                 : 
     787             303 :     *pTargetList = tlist;
     788                 : 
     789                 :     /* Build result relation. */
     790             303 :     result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
     791 CBC         303 :                                  bms_union(lrel->relids, rrel->relids));
     792 GIC         303 :     result_rel->reltarget = create_pathtarget(root, tlist);
     793                 : 
     794 ECB             :     /*
     795                 :      * Append the child results together.
     796                 :      */
     797 CBC         303 :     path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
     798 ECB             :                                        NIL, NULL, 0, false, -1);
     799                 : 
     800                 :     /* Identify the grouping semantics */
     801 GIC         303 :     groupList = generate_setop_grouplist(op, tlist);
     802                 : 
     803                 :     /*
     804 ECB             :      * Estimate number of distinct groups that we'll need hashtable entries
     805                 :      * for; this is the size of the left-hand input for EXCEPT, or the smaller
     806                 :      * input for INTERSECT.  Also estimate the number of eventual output rows.
     807                 :      * In non-ALL cases, we estimate each group produces one output row; in
     808                 :      * ALL cases use the relevant relation size.  These are worst-case
     809                 :      * estimates, of course, but we need to be conservative.
     810                 :      */
     811 GIC         303 :     if (op->op == SETOP_EXCEPT)
     812                 :     {
     813             195 :         dNumGroups = dLeftGroups;
     814             195 :         dNumOutputRows = op->all ? lpath->rows : dNumGroups;
     815                 :     }
     816                 :     else
     817                 :     {
     818 CBC         108 :         dNumGroups = Min(dLeftGroups, dRightGroups);
     819 GIC         108 :         dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
     820 ECB             :     }
     821                 : 
     822                 :     /*
     823                 :      * Decide whether to hash or sort, and add a sort node if needed.
     824                 :      */
     825 CBC         303 :     use_hash = choose_hashed_setop(root, groupList, path,
     826 ECB             :                                    dNumGroups, dNumOutputRows,
     827 GIC         303 :                                    (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
     828                 : 
     829             303 :     if (groupList && !use_hash)
     830              54 :         path = (Path *) create_sort_path(root,
     831                 :                                          result_rel,
     832 ECB             :                                          path,
     833                 :                                          make_pathkeys_for_sortclauses(root,
     834                 :                                                                        groupList,
     835                 :                                                                        tlist),
     836                 :                                          -1.0);
     837                 : 
     838                 :     /*
     839                 :      * Finally, add a SetOp path node to generate the correct output.
     840                 :      */
     841 GIC         303 :     switch (op->op)
     842                 :     {
     843             108 :         case SETOP_INTERSECT:
     844             108 :             cmd = op->all ? SETOPCMD_INTERSECT_ALL : SETOPCMD_INTERSECT;
     845             108 :             break;
     846             195 :         case SETOP_EXCEPT:
     847             195 :             cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
     848 CBC         195 :             break;
     849 UIC           0 :         default:
     850 LBC           0 :             elog(ERROR, "unrecognized set op: %d", (int) op->op);
     851 ECB             :             cmd = SETOPCMD_INTERSECT;   /* keep compiler quiet */
     852                 :             break;
     853                 :     }
     854 CBC         303 :     path = (Path *) create_setop_path(root,
     855 ECB             :                                       result_rel,
     856 EUB             :                                       path,
     857                 :                                       cmd,
     858                 :                                       use_hash ? SETOP_HASHED : SETOP_SORTED,
     859                 :                                       groupList,
     860 GIC         303 :                                       list_length(op->colTypes) + 1,
     861 ECB             :                                       use_hash ? firstFlag : -1,
     862                 :                                       dNumGroups,
     863                 :                                       dNumOutputRows);
     864                 : 
     865 GIC         303 :     result_rel->rows = path->rows;
     866             303 :     add_path(result_rel, path);
     867 CBC         303 :     return result_rel;
     868                 : }
     869                 : 
     870                 : /*
     871                 :  * Pull up children of a UNION node that are identically-propertied UNIONs.
     872 ECB             :  *
     873                 :  * NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
     874                 :  * output rows will be lost anyway.
     875                 :  *
     876                 :  * NOTE: currently, we ignore collations while determining if a child has
     877                 :  * the same properties.  This is semantically sound only so long as all
     878                 :  * collations have the same notion of equality.  It is valid from an
     879                 :  * implementation standpoint because we don't care about the ordering of
     880                 :  * a UNION child's result: UNION ALL results are always unordered, and
     881                 :  * generate_union_paths will force a fresh sort if the top level is a UNION.
     882                 :  */
     883                 : static List *
     884 GIC        2006 : plan_union_children(PlannerInfo *root,
     885                 :                     SetOperationStmt *top_union,
     886                 :                     List *refnames_tlist,
     887                 :                     List **tlist_list)
     888                 : {
     889            2006 :     List       *pending_rels = list_make1(top_union);
     890            2006 :     List       *result = NIL;
     891 ECB             :     List       *child_tlist;
     892                 : 
     893 GIC        2006 :     *tlist_list = NIL;
     894                 : 
     895           10782 :     while (pending_rels != NIL)
     896 ECB             :     {
     897 CBC        8776 :         Node       *setOp = linitial(pending_rels);
     898                 : 
     899 GIC        8776 :         pending_rels = list_delete_first(pending_rels);
     900 ECB             : 
     901 GIC        8776 :         if (IsA(setOp, SetOperationStmt))
     902 ECB             :         {
     903 GIC        3442 :             SetOperationStmt *op = (SetOperationStmt *) setOp;
     904 ECB             : 
     905 GIC        3442 :             if (op->op == top_union->op &&
     906 CBC        6788 :                 (op->all == top_union->all || op->all) &&
     907 GIC        3391 :                 equal(op->colTypes, top_union->colTypes))
     908 ECB             :             {
     909                 :                 /* Same UNION, so fold children into parent */
     910 CBC        3385 :                 pending_rels = lcons(op->rarg, pending_rels);
     911 GIC        3385 :                 pending_rels = lcons(op->larg, pending_rels);
     912 CBC        3385 :                 continue;
     913 ECB             :             }
     914                 :         }
     915                 : 
     916                 :         /*
     917                 :          * Not same, so plan this child separately.
     918                 :          *
     919                 :          * Note we disallow any resjunk columns in child results.  This is
     920                 :          * necessary since the Append node that implements the union won't do
     921                 :          * any projection, and upper levels will get confused if some of our
     922                 :          * output tuples have junk and some don't.  This case only arises when
     923                 :          * we have an EXCEPT or INTERSECT as child, else there won't be
     924                 :          * resjunk anyway.
     925                 :          */
     926 GIC        5391 :         result = lappend(result, recurse_set_operations(setOp, root,
     927                 :                                                         top_union->colTypes,
     928                 :                                                         top_union->colCollations,
     929                 :                                                         false, -1,
     930                 :                                                         refnames_tlist,
     931                 :                                                         &child_tlist,
     932                 :                                                         NULL));
     933 CBC        5391 :         *tlist_list = lappend(*tlist_list, child_tlist);
     934                 :     }
     935                 : 
     936 GIC        2006 :     return result;
     937                 : }
     938                 : 
     939                 : /*
     940 ECB             :  * Add nodes to the given path tree to unique-ify the result of a UNION.
     941                 :  */
     942                 : static Path *
     943 CBC        1766 : make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
     944                 :                   PlannerInfo *root)
     945                 : {
     946 GIC        1766 :     RelOptInfo *result_rel = fetch_upper_rel(root, UPPERREL_SETOP, NULL);
     947                 :     List       *groupList;
     948                 :     double      dNumGroups;
     949                 : 
     950 ECB             :     /* Identify the grouping semantics */
     951 GIC        1766 :     groupList = generate_setop_grouplist(op, tlist);
     952                 : 
     953 ECB             :     /*
     954                 :      * XXX for the moment, take the number of distinct groups as equal to the
     955                 :      * total input size, ie, the worst case.  This is too conservative, but
     956                 :      * it's not clear how to get a decent estimate of the true size.  One
     957                 :      * should note as well the propensity of novices to write UNION rather
     958                 :      * than UNION ALL even when they don't expect any duplicates...
     959                 :      */
     960 GIC        1766 :     dNumGroups = path->rows;
     961                 : 
     962                 :     /* Decide whether to hash or sort */
     963            1766 :     if (choose_hashed_setop(root, groupList, path,
     964                 :                             dNumGroups, dNumGroups,
     965                 :                             "UNION"))
     966                 :     {
     967 ECB             :         /* Hashed aggregate plan --- no sort needed */
     968 GIC        1614 :         path = (Path *) create_agg_path(root,
     969                 :                                         result_rel,
     970 ECB             :                                         path,
     971                 :                                         create_pathtarget(root, tlist),
     972                 :                                         AGG_HASHED,
     973                 :                                         AGGSPLIT_SIMPLE,
     974                 :                                         groupList,
     975                 :                                         NIL,
     976                 :                                         NULL,
     977                 :                                         dNumGroups);
     978                 :     }
     979                 :     else
     980                 :     {
     981                 :         /* Sort and Unique */
     982 GIC         152 :         if (groupList)
     983                 :             path = (Path *)
     984             143 :                 create_sort_path(root,
     985                 :                                  result_rel,
     986                 :                                  path,
     987                 :                                  make_pathkeys_for_sortclauses(root,
     988                 :                                                                groupList,
     989 ECB             :                                                                tlist),
     990                 :                                  -1.0);
     991 CBC         152 :         path = (Path *) create_upper_unique_path(root,
     992                 :                                                  result_rel,
     993                 :                                                  path,
     994 GIC         152 :                                                  list_length(path->pathkeys),
     995                 :                                                  dNumGroups);
     996                 :     }
     997                 : 
     998 CBC        1766 :     return path;
     999                 : }
    1000                 : 
    1001 ECB             : /*
    1002                 :  * postprocess_setop_rel - perform steps required after adding paths
    1003                 :  */
    1004                 : static void
    1005 CBC        9299 : postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)
    1006                 : {
    1007                 :     /*
    1008                 :      * We don't currently worry about allowing FDWs to contribute paths to
    1009                 :      * this relation, but give extensions a chance.
    1010                 :      */
    1011 GIC        9299 :     if (create_upper_paths_hook)
    1012 LBC           0 :         (*create_upper_paths_hook) (root, UPPERREL_SETOP,
    1013                 :                                     NULL, rel, NULL);
    1014                 : 
    1015                 :     /* Select cheapest path */
    1016 GIC        9299 :     set_cheapest(rel);
    1017            9299 : }
    1018 ECB             : 
    1019 EUB             : /*
    1020                 :  * choose_hashed_setop - should we use hashing for a set operation?
    1021                 :  */
    1022                 : static bool
    1023 CBC        2069 : choose_hashed_setop(PlannerInfo *root, List *groupClauses,
    1024 ECB             :                     Path *input_path,
    1025                 :                     double dNumGroups, double dNumOutputRows,
    1026                 :                     const char *construct)
    1027                 : {
    1028 GIC        2069 :     int         numGroupCols = list_length(groupClauses);
    1029            2069 :     Size        hash_mem_limit = get_hash_memory_limit();
    1030 ECB             :     bool        can_sort;
    1031                 :     bool        can_hash;
    1032                 :     Size        hashentrysize;
    1033                 :     Path        hashed_p;
    1034                 :     Path        sorted_p;
    1035                 :     double      tuple_fraction;
    1036                 : 
    1037                 :     /* Check whether the operators support sorting or hashing */
    1038 GIC        2069 :     can_sort = grouping_is_sortable(groupClauses);
    1039            2069 :     can_hash = grouping_is_hashable(groupClauses);
    1040            2069 :     if (can_hash && can_sort)
    1041                 :     {
    1042                 :         /* we have a meaningful choice to make, continue ... */
    1043                 :     }
    1044             369 :     else if (can_hash)
    1045 CBC         303 :         return true;
    1046              66 :     else if (can_sort)
    1047              66 :         return false;
    1048                 :     else
    1049 UIC           0 :         ereport(ERROR,
    1050                 :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1051 ECB             :         /* translator: %s is UNION, INTERSECT, or EXCEPT */
    1052                 :                  errmsg("could not implement %s", construct),
    1053                 :                  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
    1054                 : 
    1055                 :     /* Prefer sorting when enable_hashagg is off */
    1056 GBC        1700 :     if (!enable_hashagg)
    1057 GIC          63 :         return false;
    1058                 : 
    1059                 :     /*
    1060                 :      * Don't do it if it doesn't look like the hashtable will fit into
    1061                 :      * hash_mem.
    1062                 :      */
    1063 CBC        1637 :     hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
    1064 ECB             : 
    1065 GIC        1637 :     if (hashentrysize * dNumGroups > hash_mem_limit)
    1066 UIC           0 :         return false;
    1067                 : 
    1068                 :     /*
    1069                 :      * See if the estimated cost is no more than doing it the other way.
    1070 ECB             :      *
    1071                 :      * We need to consider input_plan + hashagg versus input_plan + sort +
    1072                 :      * group.  Note that the actual result plan might involve a SetOp or
    1073 EUB             :      * Unique node, not Agg or Group, but the cost estimates for Agg and Group
    1074                 :      * should be close enough for our purposes here.
    1075                 :      *
    1076                 :      * These path variables are dummies that just hold cost fields; we don't
    1077                 :      * make actual Paths for these steps.
    1078                 :      */
    1079 GIC        1637 :     cost_agg(&hashed_p, root, AGG_HASHED, NULL,
    1080                 :              numGroupCols, dNumGroups,
    1081                 :              NIL,
    1082                 :              input_path->startup_cost, input_path->total_cost,
    1083            1637 :              input_path->rows, input_path->pathtarget->width);
    1084                 : 
    1085                 :     /*
    1086 ECB             :      * Now for the sorted case.  Note that the input is *always* unsorted,
    1087                 :      * since it was made by appending unrelated sub-relations together.
    1088                 :      */
    1089 GIC        1637 :     sorted_p.startup_cost = input_path->startup_cost;
    1090 CBC        1637 :     sorted_p.total_cost = input_path->total_cost;
    1091                 :     /* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
    1092 GIC        1637 :     cost_sort(&sorted_p, root, NIL, sorted_p.total_cost,
    1093            1637 :               input_path->rows, input_path->pathtarget->width,
    1094                 :               0.0, work_mem, -1.0);
    1095            1637 :     cost_group(&sorted_p, root, numGroupCols, dNumGroups,
    1096 ECB             :                NIL,
    1097                 :                sorted_p.startup_cost, sorted_p.total_cost,
    1098                 :                input_path->rows);
    1099                 : 
    1100                 :     /*
    1101                 :      * Now make the decision using the top-level tuple fraction.  First we
    1102                 :      * have to convert an absolute count (LIMIT) into fractional form.
    1103                 :      */
    1104 GIC        1637 :     tuple_fraction = root->tuple_fraction;
    1105            1637 :     if (tuple_fraction >= 1.0)
    1106 UIC           0 :         tuple_fraction /= dNumOutputRows;
    1107                 : 
    1108 GIC        1637 :     if (compare_fractional_path_costs(&hashed_p, &sorted_p,
    1109                 :                                       tuple_fraction) < 0)
    1110                 :     {
    1111 ECB             :         /* Hashed is cheaper, so use it */
    1112 CBC        1545 :         return true;
    1113 EUB             :     }
    1114 GIC          92 :     return false;
    1115 ECB             : }
    1116                 : 
    1117                 : /*
    1118                 :  * Generate targetlist for a set-operation plan node
    1119                 :  *
    1120                 :  * colTypes: OID list of set-op's result column datatypes
    1121                 :  * colCollations: OID list of set-op's result column collations
    1122                 :  * flag: -1 if no flag column needed, 0 or 1 to create a const flag column
    1123                 :  * varno: varno to use in generated Vars
    1124                 :  * hack_constants: true to copy up constants (see comments in code)
    1125                 :  * input_tlist: targetlist of this node's input node
    1126                 :  * refnames_tlist: targetlist to take column names from
    1127                 :  * trivial_tlist: output parameter, set to true if targetlist is trivial
    1128                 :  */
    1129                 : static List *
    1130 GIC        6702 : generate_setop_tlist(List *colTypes, List *colCollations,
    1131                 :                      int flag,
    1132                 :                      Index varno,
    1133                 :                      bool hack_constants,
    1134                 :                      List *input_tlist,
    1135                 :                      List *refnames_tlist,
    1136                 :                      bool *trivial_tlist)
    1137                 : {
    1138            6702 :     List       *tlist = NIL;
    1139 CBC        6702 :     int         resno = 1;
    1140                 :     ListCell   *ctlc,
    1141                 :                *cclc,
    1142                 :                *itlc,
    1143                 :                *rtlc;
    1144                 :     TargetEntry *tle;
    1145                 :     Node       *expr;
    1146                 : 
    1147 GNC        6702 :     *trivial_tlist = true;      /* until proven differently */
    1148                 : 
    1149 CBC       24820 :     forfour(ctlc, colTypes, cclc, colCollations,
    1150 ECB             :             itlc, input_tlist, rtlc, refnames_tlist)
    1151                 :     {
    1152 GIC       18118 :         Oid         colType = lfirst_oid(ctlc);
    1153           18118 :         Oid         colColl = lfirst_oid(cclc);
    1154           18118 :         TargetEntry *inputtle = (TargetEntry *) lfirst(itlc);
    1155           18118 :         TargetEntry *reftle = (TargetEntry *) lfirst(rtlc);
    1156                 : 
    1157           18118 :         Assert(inputtle->resno == resno);
    1158 CBC       18118 :         Assert(reftle->resno == resno);
    1159 GIC       18118 :         Assert(!inputtle->resjunk);
    1160 CBC       18118 :         Assert(!reftle->resjunk);
    1161                 : 
    1162                 :         /*
    1163 ECB             :          * Generate columns referencing input columns and having appropriate
    1164                 :          * data types and column names.  Insert datatype coercions where
    1165                 :          * necessary.
    1166                 :          *
    1167                 :          * HACK: constants in the input's targetlist are copied up as-is
    1168                 :          * rather than being referenced as subquery outputs.  This is mainly
    1169                 :          * to ensure that when we try to coerce them to the output column's
    1170                 :          * datatype, the right things happen for UNKNOWN constants.  But do
    1171                 :          * this only at the first level of subquery-scan plans; we don't want
    1172                 :          * phony constants appearing in the output tlists of upper-level
    1173                 :          * nodes!
    1174                 :          *
    1175                 :          * Note that copying a constant doesn't in itself require us to mark
    1176                 :          * the tlist nontrivial; see trivial_subqueryscan() in setrefs.c.
    1177                 :          */
    1178 GIC       18118 :         if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
    1179            6334 :             expr = (Node *) inputtle->expr;
    1180                 :         else
    1181           47136 :             expr = (Node *) makeVar(varno,
    1182           11784 :                                     inputtle->resno,
    1183           11784 :                                     exprType((Node *) inputtle->expr),
    1184           11784 :                                     exprTypmod((Node *) inputtle->expr),
    1185           11784 :                                     exprCollation((Node *) inputtle->expr),
    1186                 :                                     0);
    1187                 : 
    1188           18118 :         if (exprType(expr) != colType)
    1189                 :         {
    1190                 :             /*
    1191                 :              * Note: it's not really cool to be applying coerce_to_common_type
    1192 ECB             :              * here; one notable point is that assign_expr_collations never
    1193                 :              * gets run on any generated nodes.  For the moment that's not a
    1194                 :              * problem because we force the correct exposed collation below.
    1195                 :              * It would likely be best to make the parser generate the correct
    1196                 :              * output tlist for every set-op to begin with, though.
    1197                 :              */
    1198 CBC         614 :             expr = coerce_to_common_type(NULL,  /* no UNKNOWNs here */
    1199 ECB             :                                          expr,
    1200                 :                                          colType,
    1201                 :                                          "UNION/INTERSECT/EXCEPT");
    1202 GNC         614 :             *trivial_tlist = false; /* the coercion makes it not trivial */
    1203 ECB             :         }
    1204                 : 
    1205                 :         /*
    1206                 :          * Ensure the tlist entry's exposed collation matches the set-op. This
    1207                 :          * is necessary because plan_set_operations() reports the result
    1208                 :          * ordering as a list of SortGroupClauses, which don't carry collation
    1209                 :          * themselves but just refer to tlist entries.  If we don't show the
    1210                 :          * right collation then planner.c might do the wrong thing in
    1211                 :          * higher-level queries.
    1212                 :          *
    1213                 :          * Note we use RelabelType, not CollateExpr, since this expression
    1214                 :          * will reach the executor without any further processing.
    1215                 :          */
    1216 GIC       18118 :         if (exprCollation(expr) != colColl)
    1217                 :         {
    1218 CBC        4989 :             expr = applyRelabelType(expr,
    1219                 :                                     exprType(expr), exprTypmod(expr), colColl,
    1220                 :                                     COERCE_IMPLICIT_CAST, -1, false);
    1221 GNC        4989 :             *trivial_tlist = false; /* the relabel makes it not trivial */
    1222                 :         }
    1223                 : 
    1224 GIC       36236 :         tle = makeTargetEntry((Expr *) expr,
    1225           18118 :                               (AttrNumber) resno++,
    1226           18118 :                               pstrdup(reftle->resname),
    1227                 :                               false);
    1228                 : 
    1229                 :         /*
    1230                 :          * By convention, all non-resjunk columns in a setop tree have
    1231                 :          * ressortgroupref equal to their resno.  In some cases the ref isn't
    1232                 :          * needed, but this is a cleaner way than modifying the tlist later.
    1233                 :          */
    1234 CBC       18118 :         tle->ressortgroupref = tle->resno;
    1235                 : 
    1236           18118 :         tlist = lappend(tlist, tle);
    1237                 :     }
    1238                 : 
    1239            6702 :     if (flag >= 0)
    1240                 :     {
    1241                 :         /* Add a resjunk flag column */
    1242 ECB             :         /* flag value is the given constant */
    1243 CBC         606 :         expr = (Node *) makeConst(INT4OID,
    1244 ECB             :                                   -1,
    1245                 :                                   InvalidOid,
    1246                 :                                   sizeof(int32),
    1247                 :                                   Int32GetDatum(flag),
    1248                 :                                   false,
    1249                 :                                   true);
    1250 GIC         606 :         tle = makeTargetEntry((Expr *) expr,
    1251             606 :                               (AttrNumber) resno++,
    1252 ECB             :                               pstrdup("flag"),
    1253                 :                               true);
    1254 CBC         606 :         tlist = lappend(tlist, tle);
    1255 GNC         606 :         *trivial_tlist = false; /* the extra entry makes it not trivial */
    1256                 :     }
    1257                 : 
    1258 CBC        6702 :     return tlist;
    1259                 : }
    1260                 : 
    1261                 : /*
    1262 ECB             :  * Generate targetlist for a set-operation Append node
    1263                 :  *
    1264                 :  * colTypes: OID list of set-op's result column datatypes
    1265                 :  * colCollations: OID list of set-op's result column collations
    1266                 :  * flag: true to create a flag column copied up from subplans
    1267                 :  * input_tlists: list of tlists for sub-plans of the Append
    1268                 :  * refnames_tlist: targetlist to take column names from
    1269                 :  *
    1270                 :  * The entries in the Append's targetlist should always be simple Vars;
    1271                 :  * we just have to make sure they have the right datatypes/typmods/collations.
    1272                 :  * The Vars are always generated with varno 0.
    1273                 :  *
    1274                 :  * XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
    1275                 :  * cannot figure out a realistic width for the tlist we make here.  But we
    1276                 :  * ought to refactor this code to produce a PathTarget directly, anyway.
    1277                 :  */
    1278                 : static List *
    1279 GIC        2666 : generate_append_tlist(List *colTypes, List *colCollations,
    1280                 :                       bool flag,
    1281                 :                       List *input_tlists,
    1282                 :                       List *refnames_tlist)
    1283                 : {
    1284            2666 :     List       *tlist = NIL;
    1285            2666 :     int         resno = 1;
    1286                 :     ListCell   *curColType;
    1287                 :     ListCell   *curColCollation;
    1288                 :     ListCell   *ref_tl_item;
    1289                 :     int         colindex;
    1290                 :     TargetEntry *tle;
    1291                 :     Node       *expr;
    1292                 :     ListCell   *tlistl;
    1293                 :     int32      *colTypmods;
    1294                 : 
    1295                 :     /*
    1296                 :      * First extract typmods to use.
    1297                 :      *
    1298 ECB             :      * If the inputs all agree on type and typmod of a particular column, use
    1299                 :      * that typmod; else use -1.
    1300                 :      */
    1301 GIC        2666 :     colTypmods = (int32 *) palloc(list_length(colTypes) * sizeof(int32));
    1302                 : 
    1303 CBC        9377 :     foreach(tlistl, input_tlists)
    1304 ECB             :     {
    1305 GIC        6711 :         List       *subtlist = (List *) lfirst(tlistl);
    1306                 :         ListCell   *subtlistl;
    1307                 : 
    1308            6711 :         curColType = list_head(colTypes);
    1309            6711 :         colindex = 0;
    1310           25450 :         foreach(subtlistl, subtlist)
    1311                 :         {
    1312           18739 :             TargetEntry *subtle = (TargetEntry *) lfirst(subtlistl);
    1313                 : 
    1314           18739 :             if (subtle->resjunk)
    1315             606 :                 continue;
    1316           18133 :             Assert(curColType != NULL);
    1317           18133 :             if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
    1318                 :             {
    1319                 :                 /* If first subplan, copy the typmod; else compare */
    1320 CBC       18133 :                 int32       subtypmod = exprTypmod((Node *) subtle->expr);
    1321                 : 
    1322           18133 :                 if (tlistl == list_head(input_tlists))
    1323 GIC        6696 :                     colTypmods[colindex] = subtypmod;
    1324 CBC       11437 :                 else if (subtypmod != colTypmods[colindex])
    1325 GIC           6 :                     colTypmods[colindex] = -1;
    1326                 :             }
    1327 ECB             :             else
    1328                 :             {
    1329                 :                 /* types disagree, so force typmod to -1 */
    1330 UIC           0 :                 colTypmods[colindex] = -1;
    1331 ECB             :             }
    1332 GIC       18133 :             curColType = lnext(colTypes, curColType);
    1333 CBC       18133 :             colindex++;
    1334 ECB             :         }
    1335 CBC        6711 :         Assert(curColType == NULL);
    1336 ECB             :     }
    1337                 : 
    1338                 :     /*
    1339                 :      * Now we can build the tlist for the Append.
    1340                 :      */
    1341 CBC        2666 :     colindex = 0;
    1342            9362 :     forthree(curColType, colTypes, curColCollation, colCollations,
    1343 ECB             :              ref_tl_item, refnames_tlist)
    1344                 :     {
    1345 GIC        6696 :         Oid         colType = lfirst_oid(curColType);
    1346            6696 :         int32       colTypmod = colTypmods[colindex++];
    1347            6696 :         Oid         colColl = lfirst_oid(curColCollation);
    1348            6696 :         TargetEntry *reftle = (TargetEntry *) lfirst(ref_tl_item);
    1349 EUB             : 
    1350 GIC        6696 :         Assert(reftle->resno == resno);
    1351 CBC        6696 :         Assert(!reftle->resjunk);
    1352            6696 :         expr = (Node *) makeVar(0,
    1353                 :                                 resno,
    1354 ECB             :                                 colType,
    1355                 :                                 colTypmod,
    1356                 :                                 colColl,
    1357                 :                                 0);
    1358 GIC       13392 :         tle = makeTargetEntry((Expr *) expr,
    1359            6696 :                               (AttrNumber) resno++,
    1360 CBC        6696 :                               pstrdup(reftle->resname),
    1361 ECB             :                               false);
    1362                 : 
    1363                 :         /*
    1364                 :          * By convention, all non-resjunk columns in a setop tree have
    1365                 :          * ressortgroupref equal to their resno.  In some cases the ref isn't
    1366                 :          * needed, but this is a cleaner way than modifying the tlist later.
    1367                 :          */
    1368 GIC        6696 :         tle->ressortgroupref = tle->resno;
    1369 ECB             : 
    1370 CBC        6696 :         tlist = lappend(tlist, tle);
    1371 ECB             :     }
    1372                 : 
    1373 GIC        2666 :     if (flag)
    1374                 :     {
    1375                 :         /* Add a resjunk flag column */
    1376                 :         /* flag value is shown as copied up from subplan */
    1377 CBC         303 :         expr = (Node *) makeVar(0,
    1378 ECB             :                                 resno,
    1379                 :                                 INT4OID,
    1380                 :                                 -1,
    1381                 :                                 InvalidOid,
    1382                 :                                 0);
    1383 GIC         303 :         tle = makeTargetEntry((Expr *) expr,
    1384             303 :                               (AttrNumber) resno++,
    1385                 :                               pstrdup("flag"),
    1386                 :                               true);
    1387 CBC         303 :         tlist = lappend(tlist, tle);
    1388                 :     }
    1389 ECB             : 
    1390 GIC        2666 :     pfree(colTypmods);
    1391                 : 
    1392 CBC        2666 :     return tlist;
    1393                 : }
    1394                 : 
    1395                 : /*
    1396 ECB             :  * generate_setop_grouplist
    1397                 :  *      Build a SortGroupClause list defining the sort/grouping properties
    1398                 :  *      of the setop's output columns.
    1399                 :  *
    1400                 :  * Parse analysis already determined the properties and built a suitable
    1401                 :  * list, except that the entries do not have sortgrouprefs set because
    1402                 :  * the parser output representation doesn't include a tlist for each
    1403                 :  * setop.  So what we need to do here is copy that list and install
    1404                 :  * proper sortgrouprefs into it (copying those from the targetlist).
    1405                 :  */
    1406                 : static List *
    1407 GIC        2216 : generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
    1408                 : {
    1409 CBC        2216 :     List       *grouplist = copyObject(op->groupClauses);
    1410                 :     ListCell   *lg;
    1411 ECB             :     ListCell   *lt;
    1412                 : 
    1413 GIC        2216 :     lg = list_head(grouplist);
    1414            7889 :     foreach(lt, targetlist)
    1415                 :     {
    1416            5673 :         TargetEntry *tle = (TargetEntry *) lfirst(lt);
    1417                 :         SortGroupClause *sgc;
    1418                 : 
    1419            5673 :         if (tle->resjunk)
    1420                 :         {
    1421                 :             /* resjunk columns should not have sortgrouprefs */
    1422             303 :             Assert(tle->ressortgroupref == 0);
    1423             303 :             continue;           /* ignore resjunk columns */
    1424                 :         }
    1425                 : 
    1426 ECB             :         /* non-resjunk columns should have sortgroupref = resno */
    1427 GIC        5370 :         Assert(tle->ressortgroupref == tle->resno);
    1428 ECB             : 
    1429                 :         /* non-resjunk columns should have grouping clauses */
    1430 GIC        5370 :         Assert(lg != NULL);
    1431            5370 :         sgc = (SortGroupClause *) lfirst(lg);
    1432 CBC        5370 :         lg = lnext(grouplist, lg);
    1433            5370 :         Assert(sgc->tleSortGroupRef == 0);
    1434                 : 
    1435            5370 :         sgc->tleSortGroupRef = tle->ressortgroupref;
    1436                 :     }
    1437 GIC        2216 :     Assert(lg == NULL);
    1438 CBC        2216 :     return grouplist;
    1439                 : }

Generated by: LCOV version v1.16-55-g56c0a2a