Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * planagg.c
4 : * Special planning for aggregate queries.
5 : *
6 : * This module tries to replace MIN/MAX aggregate functions by subqueries
7 : * of the form
8 : * (SELECT col FROM tab
9 : * WHERE col IS NOT NULL AND existing-quals
10 : * ORDER BY col ASC/DESC
11 : * LIMIT 1)
12 : * Given a suitable index on tab.col, this can be much faster than the
13 : * generic scan-all-the-rows aggregation plan. We can handle multiple
14 : * MIN/MAX aggregates by generating multiple subqueries, and their
15 : * orderings can be different. However, if the query contains any
16 : * non-optimizable aggregates, there's no point since we'll have to
17 : * scan all the rows anyway.
18 : *
19 : *
20 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
21 : * Portions Copyright (c) 1994, Regents of the University of California
22 : *
23 : *
24 : * IDENTIFICATION
25 : * src/backend/optimizer/plan/planagg.c
26 : *
27 : *-------------------------------------------------------------------------
28 : */
29 : #include "postgres.h"
30 :
31 : #include "access/htup_details.h"
32 : #include "catalog/pg_aggregate.h"
33 : #include "catalog/pg_type.h"
34 : #include "nodes/makefuncs.h"
35 : #include "nodes/nodeFuncs.h"
36 : #include "optimizer/clauses.h"
37 : #include "optimizer/cost.h"
38 : #include "optimizer/optimizer.h"
39 : #include "optimizer/pathnode.h"
40 : #include "optimizer/paths.h"
41 : #include "optimizer/planmain.h"
42 : #include "optimizer/subselect.h"
43 : #include "optimizer/tlist.h"
44 : #include "parser/parse_clause.h"
45 : #include "parser/parsetree.h"
46 : #include "rewrite/rewriteManip.h"
47 : #include "utils/lsyscache.h"
48 : #include "utils/syscache.h"
49 :
50 : static bool can_minmax_aggs(PlannerInfo *root, List **context);
51 : static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
52 : Oid eqop, Oid sortop, bool nulls_first);
53 : static void minmax_qp_callback(PlannerInfo *root, void *extra);
54 : static Oid fetch_agg_sort_op(Oid aggfnoid);
55 :
56 :
57 : /*
58 : * preprocess_minmax_aggregates - preprocess MIN/MAX aggregates
59 : *
60 : * Check to see whether the query contains MIN/MAX aggregate functions that
61 : * might be optimizable via indexscans. If it does, and all the aggregates
62 : * are potentially optimizable, then create a MinMaxAggPath and add it to
63 : * the (UPPERREL_GROUP_AGG, NULL) upperrel.
64 : *
65 : * This should be called by grouping_planner() just before it's ready to call
66 : * query_planner(), because we generate indexscan paths by cloning the
67 : * planner's state and invoking query_planner() on a modified version of
68 : * the query parsetree. Thus, all preprocessing needed before query_planner()
69 : * must already be done. This relies on the list of aggregates in
70 : * root->agginfos, so preprocess_aggrefs() must have been called already, too.
71 : */
72 : void
1474 tgl 73 CBC 16002 : preprocess_minmax_aggregates(PlannerInfo *root)
74 : {
6517 75 16002 : Query *parse = root->parse;
76 : FromExpr *jtnode;
77 : RangeTblRef *rtr;
78 : RangeTblEntry *rte;
79 : List *aggs_list;
80 : RelOptInfo *grouped_rel;
81 : ListCell *lc;
82 :
83 : /* minmax_aggs list should be empty at this point */
4539 84 16002 : Assert(root->minmax_aggs == NIL);
85 :
86 : /* Nothing to do if query has no aggregates */
6517 87 16002 : if (!parse->hasAggs)
4539 88 15812 : return;
89 :
2118 90 16002 : Assert(!parse->setOperations); /* shouldn't get here if a setop */
91 16002 : Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */
92 :
93 : /*
94 : * Reject unoptimizable cases.
95 : *
96 : * We don't handle GROUP BY or windowing, because our current
97 : * implementations of grouping require looking at all the rows anyway, and
98 : * so there's not much point in optimizing MIN/MAX.
99 : */
2589 100 16002 : if (parse->groupClause || list_length(parse->groupingSets) > 1 ||
101 14279 : parse->hasWindowFuncs)
4539 102 1726 : return;
103 :
104 : /*
105 : * Reject if query contains any CTEs; there's no way to build an indexscan
106 : * on one so we couldn't succeed here. (If the CTEs are unreferenced,
107 : * that's not true, but it doesn't seem worth expending cycles to check.)
108 : */
2308 109 14276 : if (parse->cteList)
110 12 : return;
111 :
112 : /*
113 : * We also restrict the query to reference exactly one table, since join
114 : * conditions can't be handled reasonably. (We could perhaps handle a
115 : * query containing cartesian-product joins, but it hardly seems worth the
116 : * trouble.) However, the single table could be buried in several levels
117 : * of FromExpr due to subqueries. Note the "single" table could be an
118 : * inheritance parent, too, including the case of a UNION ALL subquery
119 : * that's been flattened to an appendrel.
120 : */
5906 121 14264 : jtnode = parse->jointree;
122 27188 : while (IsA(jtnode, FromExpr))
123 : {
124 14281 : if (list_length(jtnode->fromlist) != 1)
4539 125 1357 : return;
5906 126 12924 : jtnode = linitial(jtnode->fromlist);
127 : }
128 12907 : if (!IsA(jtnode, RangeTblRef))
4539 129 470 : return;
5906 130 12437 : rtr = (RangeTblRef *) jtnode;
5832 131 12437 : rte = planner_rt_fetch(rtr->rtindex, root);
4041 132 12437 : if (rte->rtekind == RTE_RELATION)
133 : /* ordinary relation, ok */ ;
134 1544 : else if (rte->rtekind == RTE_SUBQUERY && rte->inh)
135 : /* flattened UNION ALL subquery, ok */ ;
136 : else
4539 137 1517 : return;
138 :
139 : /*
140 : * Examine all the aggregates and verify all are MIN/MAX aggregates. Stop
141 : * as soon as we find one that isn't.
142 : */
6572 143 10920 : aggs_list = NIL;
866 heikki.linnakangas 144 10920 : if (!can_minmax_aggs(root, &aggs_list))
4539 tgl 145 10578 : return;
146 :
147 : /*
148 : * OK, there is at least the possibility of performing the optimization.
149 : * Build an access path for each aggregate. If any of the aggregates
150 : * prove to be non-indexable, give up; there is no point in optimizing
151 : * just some of them.
152 : */
153 550 : foreach(lc, aggs_list)
154 : {
155 360 : MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
156 : Oid eqop;
157 : bool reverse;
158 :
159 : /*
160 : * We'll need the equality operator that goes with the aggregate's
161 : * ordering operator.
162 : */
4401 163 360 : eqop = get_equality_op_for_ordering_op(mminfo->aggsortop, &reverse);
4382 bruce 164 360 : if (!OidIsValid(eqop)) /* shouldn't happen */
4401 tgl 165 UBC 0 : elog(ERROR, "could not find equality operator for ordering operator %u",
166 : mminfo->aggsortop);
167 :
168 : /*
169 : * We can use either an ordering that gives NULLS FIRST or one that
170 : * gives NULLS LAST; furthermore there's unlikely to be much
171 : * performance difference between them, so it doesn't seem worth
172 : * costing out both ways if we get a hit on the first one. NULLS
173 : * FIRST is more likely to be available if the operator is a
174 : * reverse-sort operator, so try that first if reverse.
175 : */
4401 tgl 176 CBC 360 : if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse))
177 208 : continue;
178 152 : if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse))
4401 tgl 179 UBC 0 : continue;
180 :
181 : /* No indexable path for this aggregate, so fail */
4401 tgl 182 CBC 152 : return;
183 : }
184 :
185 : /*
186 : * OK, we can do the query this way. Prepare to create a MinMaxAggPath
187 : * node.
188 : *
189 : * First, create an output Param node for each agg. (If we end up not
190 : * using the MinMaxAggPath, we'll waste a PARAM_EXEC slot for each agg,
191 : * which is not worth worrying about. We can't wait till create_plan time
192 : * to decide whether to make the Param, unfortunately.)
193 : */
2589 194 398 : foreach(lc, aggs_list)
195 : {
4539 196 208 : MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
197 :
2589 198 208 : mminfo->param =
199 208 : SS_make_initplan_output_param(root,
200 208 : exprType((Node *) mminfo->target),
201 : -1,
2118 202 208 : exprCollation((Node *) mminfo->target));
203 : }
204 :
205 : /*
206 : * Create a MinMaxAggPath node with the appropriate estimated costs and
207 : * other needed data, and add it to the UPPERREL_GROUP_AGG upperrel, where
208 : * it will compete against the standard aggregate implementation. (It
209 : * will likely always win, but we need not assume that here.)
210 : *
211 : * Note: grouping_planner won't have created this upperrel yet, but it's
212 : * fine for us to create it first. We will not have inserted the correct
213 : * consider_parallel value in it, but MinMaxAggPath paths are currently
214 : * never parallel-safe anyway, so that doesn't matter. Likewise, it
215 : * doesn't matter that we haven't filled FDW-related fields in the rel.
216 : * Also, because there are no rowmarks, we know that the processed_tlist
217 : * doesn't need to change anymore, so making the pathtarget now is safe.
218 : */
2589 219 190 : grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
220 190 : add_path(grouped_rel, (Path *)
221 190 : create_minmaxagg_path(root, grouped_rel,
222 : create_pathtarget(root,
223 : root->processed_tlist),
224 : aggs_list,
225 190 : (List *) parse->havingQual));
226 : }
227 :
228 : /*
229 : * can_minmax_aggs
230 : * Examine all the aggregates in the query, and check if they are
231 : * all MIN/MAX aggregates. If so, build a list of MinMaxAggInfo
232 : * nodes for them.
233 : *
234 : * Returns false if a non-MIN/MAX aggregate is found, true otherwise.
235 : */
236 : static bool
866 heikki.linnakangas 237 GIC 10920 : can_minmax_aggs(PlannerInfo *root, List **context)
238 : {
239 : ListCell *lc;
240 :
241 : /*
242 : * This function used to have to scan the query for itself, but now we can
243 : * just thumb through the AggInfo list made by preprocess_aggrefs.
244 : */
866 heikki.linnakangas 245 CBC 11451 : foreach(lc, root->agginfos)
246 : {
264 tgl 247 GNC 11109 : AggInfo *agginfo = lfirst_node(AggInfo, lc);
250 drowley 248 11109 : Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs);
249 : Oid aggsortop;
250 : TargetEntry *curTarget;
251 : MinMaxAggInfo *mminfo;
252 :
6572 tgl 253 CBC 11109 : Assert(aggref->agglevelsup == 0);
3554 noah 254 11109 : if (list_length(aggref->args) != 1)
866 heikki.linnakangas 255 10578 : return false; /* it couldn't be MIN/MAX */
256 :
257 : /*
258 : * ORDER BY is usually irrelevant for MIN/MAX, but it can change the
259 : * outcome if the aggsortop's operator class recognizes non-identical
260 : * values as equal. For example, 4.0 and 4.00 are equal according to
261 : * numeric_ops, yet distinguishable. If MIN() receives more than one
262 : * value equal to 4.0 and no value less than 4.0, it is unspecified
263 : * which of those equal values MIN() returns. An ORDER BY expression
264 : * that differs for each of those equal values of the argument
265 : * expression makes the result predictable once again. This is a
266 : * niche requirement, and we do not implement it with subquery paths.
267 : * In any case, this test lets us reject ordered-set aggregates
268 : * quickly.
269 : */
3554 noah 270 7073 : if (aggref->aggorder != NIL)
866 heikki.linnakangas 271 211 : return false;
272 : /* note: we do not care if DISTINCT is mentioned ... */
273 :
274 : /*
275 : * We might implement the optimization when a FILTER clause is present
276 : * by adding the filter to the quals of the generated subquery. For
277 : * now, just punt.
278 : */
3554 noah 279 6862 : if (aggref->aggfilter != NULL)
866 heikki.linnakangas 280 221 : return false;
281 :
6572 tgl 282 6641 : aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
283 6641 : if (!OidIsValid(aggsortop))
866 heikki.linnakangas 284 6107 : return false; /* not a MIN/MAX aggregate */
285 :
3554 noah 286 534 : curTarget = (TargetEntry *) linitial(aggref->args);
287 :
4539 tgl 288 534 : if (contain_mutable_functions((Node *) curTarget->expr))
866 heikki.linnakangas 289 3 : return false; /* not potentially indexable */
290 :
4539 tgl 291 531 : if (type_is_rowtype(exprType((Node *) curTarget->expr)))
866 heikki.linnakangas 292 UBC 0 : return false; /* IS NOT NULL would have weird semantics */
293 :
4539 tgl 294 CBC 531 : mminfo = makeNode(MinMaxAggInfo);
295 531 : mminfo->aggfnoid = aggref->aggfnoid;
296 531 : mminfo->aggsortop = aggsortop;
297 531 : mminfo->target = curTarget->expr;
4382 bruce 298 531 : mminfo->subroot = NULL; /* don't compute path yet */
4401 tgl 299 531 : mminfo->path = NULL;
300 531 : mminfo->pathcost = 0;
301 531 : mminfo->param = NULL;
302 :
4539 303 531 : *context = lappend(*context, mminfo);
304 : }
866 heikki.linnakangas 305 342 : return true;
306 : }
307 :
308 : /*
309 : * build_minmax_path
310 : * Given a MIN/MAX aggregate, try to build an indexscan Path it can be
311 : * optimized with.
312 : *
313 : * If successful, stash the best path in *mminfo and return true.
314 : * Otherwise, return false.
315 : */
316 : static bool
4401 tgl 317 512 : build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
318 : Oid eqop, Oid sortop, bool nulls_first)
319 : {
320 : PlannerInfo *subroot;
321 : Query *parse;
322 : TargetEntry *tle;
323 : List *tlist;
324 : NullTest *ntest;
325 : SortGroupClause *sortcl;
326 : RelOptInfo *final_rel;
327 : Path *sorted_path;
328 : Cost path_cost;
329 : double path_fraction;
330 :
331 : /*
332 : * We are going to construct what is effectively a sub-SELECT query, so
333 : * clone the current query level's state and adjust it to make it look
334 : * like a subquery. Any outer references will now be one level higher
335 : * than before. (This means that when we are done, there will be no Vars
336 : * of level 1, which is why the subquery can become an initplan.)
337 : */
338 512 : subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo));
339 512 : memcpy(subroot, root, sizeof(PlannerInfo));
2589 340 512 : subroot->query_level++;
341 512 : subroot->parent_root = root;
342 : /* reset subplan-related stuff */
2798 343 512 : subroot->plan_params = NIL;
344 512 : subroot->outer_params = NULL;
345 512 : subroot->init_plans = NIL;
866 heikki.linnakangas 346 512 : subroot->agginfos = NIL;
347 512 : subroot->aggtransinfos = NIL;
348 :
2222 peter_e 349 512 : subroot->parse = parse = copyObject(root->parse);
2589 tgl 350 512 : IncrementVarSublevelsUp((Node *) parse, 1, 1);
351 :
352 : /* append_rel_list might contain outer Vars? */
2222 peter_e 353 512 : subroot->append_rel_list = copyObject(root->append_rel_list);
2589 tgl 354 512 : IncrementVarSublevelsUp((Node *) subroot->append_rel_list, 1, 1);
355 : /* There shouldn't be any OJ info to translate, as yet */
4401 356 512 : Assert(subroot->join_info_list == NIL);
357 : /* and we haven't made equivalence classes, either */
2589 358 512 : Assert(subroot->eq_classes == NIL);
359 : /* and we haven't created PlaceHolderInfos, either */
4401 360 512 : Assert(subroot->placeholder_list == NIL);
361 :
362 : /*----------
363 : * Generate modified query of the form
364 : * (SELECT col FROM tab
365 : * WHERE col IS NOT NULL AND existing-quals
366 : * ORDER BY col ASC/DESC
367 : * LIMIT 1)
368 : *----------
369 : */
370 : /* single tlist entry that is the aggregate target */
371 512 : tle = makeTargetEntry(copyObject(mminfo->target),
372 : (AttrNumber) 1,
373 : pstrdup("agg_target"),
374 : false);
2589 375 512 : tlist = list_make1(tle);
376 512 : subroot->processed_tlist = parse->targetList = tlist;
377 :
378 : /* No HAVING, no DISTINCT, no aggregates anymore */
4401 379 512 : parse->havingQual = NULL;
380 512 : subroot->hasHavingQual = false;
381 512 : parse->distinctClause = NIL;
382 512 : parse->hasDistinctOn = false;
383 512 : parse->hasAggs = false;
384 :
385 : /* Build "target IS NOT NULL" expression */
386 512 : ntest = makeNode(NullTest);
387 512 : ntest->nulltesttype = IS_NOT_NULL;
388 512 : ntest->arg = copyObject(mminfo->target);
389 : /* we checked it wasn't a rowtype in find_minmax_aggs_walker */
390 512 : ntest->argisrow = false;
2968 391 512 : ntest->location = -1;
392 :
393 : /* User might have had that in WHERE already */
4401 394 512 : if (!list_member((List *) parse->jointree->quals, ntest))
395 512 : parse->jointree->quals = (Node *)
396 512 : lcons(ntest, (List *) parse->jointree->quals);
397 :
398 : /* Build suitable ORDER BY clause */
399 512 : sortcl = makeNode(SortGroupClause);
1474 400 512 : sortcl->tleSortGroupRef = assignSortGroupRef(tle, subroot->processed_tlist);
4401 401 512 : sortcl->eqop = eqop;
402 512 : sortcl->sortop = sortop;
403 512 : sortcl->nulls_first = nulls_first;
4382 bruce 404 512 : sortcl->hashable = false; /* no need to make this accurate */
4401 tgl 405 512 : parse->sortClause = list_make1(sortcl);
406 :
407 : /* set up expressions for LIMIT 1 */
408 512 : parse->limitOffset = NULL;
4398 409 512 : parse->limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
410 : sizeof(int64),
411 : Int64GetDatum(1), false,
412 : FLOAT8PASSBYVAL);
413 :
414 : /*
415 : * Generate the best paths for this query, telling query_planner that we
416 : * have LIMIT 1.
417 : */
3534 418 512 : subroot->tuple_fraction = 1.0;
419 512 : subroot->limit_tuples = 1.0;
420 :
1474 421 512 : final_rel = query_planner(subroot, minmax_qp_callback, NULL);
422 :
423 : /*
424 : * Since we didn't go through subquery_planner() to handle the subquery,
425 : * we have to do some of the same cleanup it would do, in particular cope
426 : * with params and initplans used within this subquery. (This won't
427 : * matter if we end up not using the subplan.)
428 : */
2589 429 512 : SS_identify_outer_params(subroot);
430 512 : SS_charge_for_initplans(subroot, final_rel);
431 :
432 : /*
433 : * Get the best presorted path, that being the one that's cheapest for
434 : * fetching just one row. If there's no such path, fail.
435 : */
3534 436 512 : if (final_rel->rows > 1.0)
437 506 : path_fraction = 1.0 / final_rel->rows;
438 : else
439 6 : path_fraction = 1.0;
440 :
441 : sorted_path =
442 512 : get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
443 : subroot->query_pathkeys,
444 : NULL,
445 : path_fraction);
4401 446 512 : if (!sorted_path)
3534 447 304 : return false;
448 :
449 : /*
450 : * The path might not return exactly what we want, so fix that. (We
451 : * assume that this won't change any conclusions about which was the
452 : * cheapest path.)
453 : */
2589 454 208 : sorted_path = apply_projection_to_path(subroot, final_rel, sorted_path,
455 : create_pathtarget(subroot,
456 : subroot->processed_tlist));
457 :
458 : /*
459 : * Determine cost to get just the first row of the presorted path.
460 : *
461 : * Note: cost calculation here should match
462 : * compare_fractional_path_costs().
463 : */
4401 464 208 : path_cost = sorted_path->startup_cost +
465 208 : path_fraction * (sorted_path->total_cost - sorted_path->startup_cost);
466 :
467 : /* Save state for further processing */
468 208 : mminfo->subroot = subroot;
469 208 : mminfo->path = sorted_path;
470 208 : mminfo->pathcost = path_cost;
471 :
472 208 : return true;
473 : }
474 :
475 : /*
476 : * Compute query_pathkeys and other pathkeys during query_planner()
477 : */
478 : static void
3632 479 512 : minmax_qp_callback(PlannerInfo *root, void *extra)
480 : {
481 512 : root->group_pathkeys = NIL;
482 512 : root->window_pathkeys = NIL;
483 512 : root->distinct_pathkeys = NIL;
484 :
485 512 : root->sort_pathkeys =
486 512 : make_pathkeys_for_sortclauses(root,
487 512 : root->parse->sortClause,
488 512 : root->parse->targetList);
489 :
490 512 : root->query_pathkeys = root->sort_pathkeys;
491 512 : }
492 :
493 : /*
494 : * Get the OID of the sort operator, if any, associated with an aggregate.
495 : * Returns InvalidOid if there is no such operator.
496 : */
497 : static Oid
6572 498 6641 : fetch_agg_sort_op(Oid aggfnoid)
499 : {
500 : HeapTuple aggTuple;
501 : Form_pg_aggregate aggform;
502 : Oid aggsortop;
503 :
504 : /* fetch aggregate entry from pg_aggregate */
4802 rhaas 505 6641 : aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(aggfnoid));
6572 tgl 506 6641 : if (!HeapTupleIsValid(aggTuple))
6572 tgl 507 UBC 0 : return InvalidOid;
6572 tgl 508 CBC 6641 : aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
509 6641 : aggsortop = aggform->aggsortop;
510 6641 : ReleaseSysCache(aggTuple);
511 :
512 6641 : return aggsortop;
513 : }
|