Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * execPartition.c
4 : * Support routines for partitioning.
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : * IDENTIFICATION
10 : * src/backend/executor/execPartition.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include "access/table.h"
17 : #include "access/tableam.h"
18 : #include "catalog/partition.h"
19 : #include "catalog/pg_inherits.h"
20 : #include "catalog/pg_type.h"
21 : #include "executor/execPartition.h"
22 : #include "executor/executor.h"
23 : #include "executor/nodeModifyTable.h"
24 : #include "foreign/fdwapi.h"
25 : #include "mb/pg_wchar.h"
26 : #include "miscadmin.h"
27 : #include "nodes/makefuncs.h"
28 : #include "partitioning/partbounds.h"
29 : #include "partitioning/partdesc.h"
30 : #include "partitioning/partprune.h"
31 : #include "rewrite/rewriteManip.h"
32 : #include "utils/acl.h"
33 : #include "utils/lsyscache.h"
34 : #include "utils/partcache.h"
35 : #include "utils/rls.h"
36 : #include "utils/ruleutils.h"
37 :
38 :
39 : /*-----------------------
40 : * PartitionTupleRouting - Encapsulates all information required to
41 : * route a tuple inserted into a partitioned table to one of its leaf
42 : * partitions.
43 : *
44 : * partition_root
45 : * The partitioned table that's the target of the command.
46 : *
47 : * partition_dispatch_info
48 : * Array of 'max_dispatch' elements containing a pointer to a
49 : * PartitionDispatch object for every partitioned table touched by tuple
50 : * routing. The entry for the target partitioned table is *always*
51 : * present in the 0th element of this array. See comment for
52 : * PartitionDispatchData->indexes for details on how this array is
53 : * indexed.
54 : *
55 : * nonleaf_partitions
56 : * Array of 'max_dispatch' elements containing pointers to fake
57 : * ResultRelInfo objects for nonleaf partitions, useful for checking
58 : * the partition constraint.
59 : *
60 : * num_dispatch
61 : * The current number of items stored in the 'partition_dispatch_info'
62 : * array. Also serves as the index of the next free array element for
63 : * new PartitionDispatch objects that need to be stored.
64 : *
65 : * max_dispatch
66 : * The current allocated size of the 'partition_dispatch_info' array.
67 : *
68 : * partitions
69 : * Array of 'max_partitions' elements containing a pointer to a
70 : * ResultRelInfo for every leaf partition touched by tuple routing.
71 : * Some of these are pointers to ResultRelInfos which are borrowed out of
72 : * the owning ModifyTableState node. The remainder have been built
73 : * especially for tuple routing. See comment for
74 : * PartitionDispatchData->indexes for details on how this array is
75 : * indexed.
76 : *
77 : * is_borrowed_rel
78 : * Array of 'max_partitions' booleans recording whether a given entry
79 : * in 'partitions' is a ResultRelInfo pointer borrowed from the owning
80 : * ModifyTableState node, rather than being built here.
81 : *
82 : * num_partitions
83 : * The current number of items stored in the 'partitions' array. Also
84 : * serves as the index of the next free array element for new
85 : * ResultRelInfo objects that need to be stored.
86 : *
87 : * max_partitions
88 : * The current allocated size of the 'partitions' array.
89 : *
90 : * memcxt
91 : * Memory context used to allocate subsidiary structs.
92 : *-----------------------
93 : */
94 : struct PartitionTupleRouting
95 : {
96 : Relation partition_root;
97 : PartitionDispatch *partition_dispatch_info;
98 : ResultRelInfo **nonleaf_partitions;
99 : int num_dispatch;
100 : int max_dispatch;
101 : ResultRelInfo **partitions;
102 : bool *is_borrowed_rel;
103 : int num_partitions;
104 : int max_partitions;
105 : MemoryContext memcxt;
106 : };
107 :
108 : /*-----------------------
109 : * PartitionDispatch - information about one partitioned table in a partition
110 : * hierarchy required to route a tuple to any of its partitions. A
111 : * PartitionDispatch is always encapsulated inside a PartitionTupleRouting
112 : * struct and stored inside its 'partition_dispatch_info' array.
113 : *
114 : * reldesc
115 : * Relation descriptor of the table
116 : *
117 : * key
118 : * Partition key information of the table
119 : *
120 : * keystate
121 : * Execution state required for expressions in the partition key
122 : *
123 : * partdesc
124 : * Partition descriptor of the table
125 : *
126 : * tupslot
127 : * A standalone TupleTableSlot initialized with this table's tuple
128 : * descriptor, or NULL if no tuple conversion between the parent is
129 : * required.
130 : *
131 : * tupmap
132 : * TupleConversionMap to convert from the parent's rowtype to this table's
133 : * rowtype (when extracting the partition key of a tuple just before
134 : * routing it through this table). A NULL value is stored if no tuple
135 : * conversion is required.
136 : *
137 : * indexes
138 : * Array of partdesc->nparts elements. For leaf partitions the index
139 : * corresponds to the partition's ResultRelInfo in the encapsulating
140 : * PartitionTupleRouting's partitions array. For partitioned partitions,
141 : * the index corresponds to the PartitionDispatch for it in its
142 : * partition_dispatch_info array. -1 indicates we've not yet allocated
143 : * anything in PartitionTupleRouting for the partition.
144 : *-----------------------
145 : */
146 : typedef struct PartitionDispatchData
147 : {
148 : Relation reldesc;
149 : PartitionKey key;
150 : List *keystate; /* list of ExprState */
151 : PartitionDesc partdesc;
152 : TupleTableSlot *tupslot;
153 : AttrMap *tupmap;
154 : int indexes[FLEXIBLE_ARRAY_MEMBER];
155 : } PartitionDispatchData;
156 :
157 :
158 : static ResultRelInfo *ExecInitPartitionInfo(ModifyTableState *mtstate,
159 : EState *estate, PartitionTupleRouting *proute,
160 : PartitionDispatch dispatch,
161 : ResultRelInfo *rootResultRelInfo,
162 : int partidx);
163 : static void ExecInitRoutingInfo(ModifyTableState *mtstate,
164 : EState *estate,
165 : PartitionTupleRouting *proute,
166 : PartitionDispatch dispatch,
167 : ResultRelInfo *partRelInfo,
168 : int partidx,
169 : bool is_borrowed_rel);
170 : static PartitionDispatch ExecInitPartitionDispatchInfo(EState *estate,
171 : PartitionTupleRouting *proute,
172 : Oid partoid, PartitionDispatch parent_pd,
173 : int partidx, ResultRelInfo *rootResultRelInfo);
174 : static void FormPartitionKeyDatum(PartitionDispatch pd,
175 : TupleTableSlot *slot,
176 : EState *estate,
177 : Datum *values,
178 : bool *isnull);
179 : static int get_partition_for_tuple(PartitionDispatch pd, Datum *values,
180 : bool *isnull);
181 : static char *ExecBuildSlotPartitionKeyDescription(Relation rel,
182 : Datum *values,
183 : bool *isnull,
184 : int maxfieldlen);
185 : static List *adjust_partition_colnos(List *colnos, ResultRelInfo *leaf_part_rri);
186 : static List *adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap);
187 : static PartitionPruneState *CreatePartitionPruneState(PlanState *planstate,
188 : PartitionPruneInfo *pruneinfo);
189 : static void InitPartitionPruneContext(PartitionPruneContext *context,
190 : List *pruning_steps,
191 : PartitionDesc partdesc,
192 : PartitionKey partkey,
193 : PlanState *planstate,
194 : ExprContext *econtext);
195 : static void PartitionPruneFixSubPlanMap(PartitionPruneState *prunestate,
196 : Bitmapset *initially_valid_subplans,
197 : int n_total_subplans);
198 : static void find_matching_subplans_recurse(PartitionPruningData *prunedata,
199 : PartitionedRelPruningData *pprune,
200 : bool initial_prune,
201 : Bitmapset **validsubplans);
202 :
203 :
204 : /*
205 : * ExecSetupPartitionTupleRouting - sets up information needed during
206 : * tuple routing for partitioned tables, encapsulates it in
207 : * PartitionTupleRouting, and returns it.
208 : *
209 : * Callers must use the returned PartitionTupleRouting during calls to
210 : * ExecFindPartition(). The actual ResultRelInfo for a partition is only
211 : * allocated when the partition is found for the first time.
212 : *
213 : * The current memory context is used to allocate this struct and all
214 : * subsidiary structs that will be allocated from it later on. Typically
215 : * it should be estate->es_query_cxt.
216 : */
217 : PartitionTupleRouting *
733 tgl 218 CBC 3155 : ExecSetupPartitionTupleRouting(EState *estate, Relation rel)
219 : {
220 : PartitionTupleRouting *proute;
221 :
222 : /*
223 : * Here we attempt to expend as little effort as possible in setting up
224 : * the PartitionTupleRouting. Each partition's ResultRelInfo is built on
225 : * demand, only when we actually need to route a tuple to that partition.
226 : * The reason for this is that a common case is for INSERT to insert a
227 : * single tuple into a partitioned table and this must be fast.
228 : */
1921 rhaas 229 3155 : proute = (PartitionTupleRouting *) palloc0(sizeof(PartitionTupleRouting));
1605 alvherre 230 3155 : proute->partition_root = rel;
231 3155 : proute->memcxt = CurrentMemoryContext;
232 : /* Rest of members initialized by zeroing */
233 :
234 : /*
235 : * Initialize this table's PartitionDispatch object. Here we pass in the
236 : * parent as NULL as we don't need to care about any parent of the target
237 : * partitioned table.
238 : */
1494 rhaas 239 3155 : ExecInitPartitionDispatchInfo(estate, proute, RelationGetRelid(rel),
240 : NULL, 0, NULL);
241 :
1921 242 3155 : return proute;
243 : }
244 :
245 : /*
246 : * ExecFindPartition -- Return the ResultRelInfo for the leaf partition that
247 : * the tuple contained in *slot should belong to.
248 : *
249 : * If the partition's ResultRelInfo does not yet exist in 'proute' then we set
250 : * one up or reuse one from mtstate's resultRelInfo array. When reusing a
251 : * ResultRelInfo from the mtstate we verify that the relation is a valid
252 : * target for INSERTs and initialize tuple routing information.
253 : *
254 : * rootResultRelInfo is the relation named in the query.
255 : *
256 : * estate must be non-NULL; we'll need it to compute any expressions in the
257 : * partition keys. Also, its per-tuple contexts are used as evaluation
258 : * scratch space.
259 : *
260 : * If no leaf partition is found, this routine errors out with the appropriate
261 : * error message. An error may also be raised if the found target partition
262 : * is not a valid target for an INSERT.
263 : */
264 : ResultRelInfo *
1605 alvherre 265 467670 : ExecFindPartition(ModifyTableState *mtstate,
266 : ResultRelInfo *rootResultRelInfo,
267 : PartitionTupleRouting *proute,
268 : TupleTableSlot *slot, EState *estate)
269 : {
270 467670 : PartitionDispatch *pd = proute->partition_dispatch_info;
271 : Datum values[PARTITION_MAX_KEYS];
272 : bool isnull[PARTITION_MAX_KEYS];
273 : Relation rel;
274 : PartitionDispatch dispatch;
275 : PartitionDesc partdesc;
1971 rhaas 276 467670 : ExprContext *ecxt = GetPerTupleExprContext(estate);
943 alvherre 277 467670 : TupleTableSlot *ecxt_scantuple_saved = ecxt->ecxt_scantuple;
278 467670 : TupleTableSlot *rootslot = slot;
1712 279 467670 : TupleTableSlot *myslot = NULL;
280 : MemoryContext oldcxt;
943 281 467670 : ResultRelInfo *rri = NULL;
282 :
283 : /* use per-tuple context here to avoid leaking memory */
1712 284 467670 : oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
285 :
286 : /*
287 : * First check the root table's partition constraint, if any. No point in
288 : * routing the tuple if it doesn't belong in the root table itself.
289 : */
935 tgl 290 467670 : if (rootResultRelInfo->ri_RelationDesc->rd_rel->relispartition)
1605 alvherre 291 2248 : ExecPartitionCheck(rootResultRelInfo, slot, estate, true);
292 :
293 : /* start with the root partitioned table */
1712 294 467654 : dispatch = pd[0];
943 295 991459 : while (dispatch != NULL)
296 : {
1605 297 523889 : int partidx = -1;
298 : bool is_leaf;
299 :
300 523889 : CHECK_FOR_INTERRUPTS();
301 :
1712 302 523889 : rel = dispatch->reldesc;
1605 303 523889 : partdesc = dispatch->partdesc;
304 :
305 : /*
306 : * Extract partition key from tuple. Expression evaluation machinery
307 : * that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
308 : * point to the correct tuple slot. The slot might have changed from
309 : * what was used for the parent table if the table of the current
310 : * partitioning level has different tuple descriptor from the parent.
311 : * So update ecxt_scantuple accordingly.
312 : */
1971 rhaas 313 523889 : ecxt->ecxt_scantuple = slot;
1712 alvherre 314 523889 : FormPartitionKeyDatum(dispatch, slot, estate, values, isnull);
315 :
316 : /*
317 : * If this partitioned table has no partitions or no partition for
318 : * these values, error out.
319 : */
1605 320 1047757 : if (partdesc->nparts == 0 ||
367 321 523868 : (partidx = get_partition_for_tuple(dispatch, values, isnull)) < 0)
322 : {
323 : char *val_desc;
324 :
1605 325 74 : val_desc = ExecBuildSlotPartitionKeyDescription(rel,
326 : values, isnull, 64);
327 74 : Assert(OidIsValid(RelationGetRelid(rel)));
328 74 : ereport(ERROR,
329 : (errcode(ERRCODE_CHECK_VIOLATION),
330 : errmsg("no partition of relation \"%s\" found for row",
331 : RelationGetRelationName(rel)),
332 : val_desc ?
333 : errdetail("Partition key of the failing row contains %s.",
334 : val_desc) : 0,
335 : errtable(rel)));
336 : }
337 :
902 heikki.linnakangas 338 523815 : is_leaf = partdesc->is_leaf[partidx];
339 523815 : if (is_leaf)
340 : {
341 : /*
342 : * We've reached the leaf -- hurray, we're done. Look to see if
343 : * we've already got a ResultRelInfo for this partition.
344 : */
1605 alvherre 345 467579 : if (likely(dispatch->indexes[partidx] >= 0))
346 : {
347 : /* ResultRelInfo already built */
348 463655 : Assert(dispatch->indexes[partidx] < proute->num_partitions);
349 463655 : rri = proute->partitions[dispatch->indexes[partidx]];
350 : }
351 : else
352 : {
353 : /*
354 : * If the partition is known in the owning ModifyTableState
355 : * node, we can re-use that ResultRelInfo instead of creating
356 : * a new one with ExecInitPartitionInfo().
357 : */
733 tgl 358 3924 : rri = ExecLookupResultRelByOid(mtstate,
359 3924 : partdesc->oids[partidx],
360 : true, false);
361 3924 : if (rri)
362 : {
363 : /* Verify this ResultRelInfo allows INSERTs */
364 193 : CheckValidResultRel(rri, CMD_INSERT);
365 :
366 : /*
367 : * Initialize information needed to insert this and
368 : * subsequent tuples routed to this partition.
369 : */
370 193 : ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
371 : rri, partidx, true);
372 : }
373 : else
374 : {
375 : /* We need to create a new one. */
1605 alvherre 376 3731 : rri = ExecInitPartitionInfo(mtstate, estate, proute,
377 : dispatch,
378 : rootResultRelInfo, partidx);
379 : }
380 : }
943 381 467570 : Assert(rri != NULL);
382 :
383 : /* Signal to terminate the loop */
384 467570 : dispatch = NULL;
385 : }
386 : else
387 : {
388 : /*
389 : * Partition is a sub-partitioned table; get the PartitionDispatch
390 : */
1605 391 56236 : if (likely(dispatch->indexes[partidx] >= 0))
392 : {
393 : /* Already built. */
394 55666 : Assert(dispatch->indexes[partidx] < proute->num_dispatch);
395 :
943 396 55666 : rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
397 :
398 : /*
399 : * Move down to the next partition level and search again
400 : * until we find a leaf partition that matches this tuple
401 : */
1605 402 55666 : dispatch = pd[dispatch->indexes[partidx]];
403 : }
404 : else
405 : {
406 : /* Not yet built. Do that now. */
407 : PartitionDispatch subdispatch;
408 :
409 : /*
410 : * Create the new PartitionDispatch. We pass the current one
411 : * in as the parent PartitionDispatch
412 : */
790 heikki.linnakangas 413 570 : subdispatch = ExecInitPartitionDispatchInfo(estate,
414 : proute,
1605 alvherre 415 570 : partdesc->oids[partidx],
416 : dispatch, partidx,
417 : mtstate->rootResultRelInfo);
418 570 : Assert(dispatch->indexes[partidx] >= 0 &&
419 : dispatch->indexes[partidx] < proute->num_dispatch);
420 :
943 421 570 : rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
1605 422 570 : dispatch = subdispatch;
423 : }
424 :
425 : /*
426 : * Convert the tuple to the new parent's layout, if different from
427 : * the previous parent.
428 : */
943 429 56236 : if (dispatch->tupslot)
430 : {
431 30840 : AttrMap *map = dispatch->tupmap;
432 30840 : TupleTableSlot *tempslot = myslot;
433 :
434 30840 : myslot = dispatch->tupslot;
435 30840 : slot = execute_attr_map_slot(map, slot, myslot);
436 :
437 30840 : if (tempslot != NULL)
438 147 : ExecClearTuple(tempslot);
439 : }
440 : }
441 :
442 : /*
443 : * If this partition is the default one, we must check its partition
444 : * constraint now, which may have changed concurrently due to
445 : * partitions being added to the parent.
446 : *
447 : * (We do this here, and do not rely on ExecInsert doing it, because
448 : * we don't want to miss doing it for non-leaf partitions.)
449 : */
450 523806 : if (partidx == partdesc->boundinfo->default_index)
451 : {
452 : /*
453 : * The tuple must match the partition's layout for the constraint
454 : * expression to be evaluated successfully. If the partition is
455 : * sub-partitioned, that would already be the case due to the code
456 : * above, but for a leaf partition the tuple still matches the
457 : * parent's layout.
458 : *
459 : * Note that we have a map to convert from root to current
460 : * partition, but not from immediate parent to current partition.
461 : * So if we have to convert, do it from the root slot; if not, use
462 : * the root slot as-is.
463 : */
902 heikki.linnakangas 464 255 : if (is_leaf)
465 : {
128 alvherre 466 GNC 233 : TupleConversionMap *map = ExecGetRootToChildMap(rri, estate);
467 :
943 alvherre 468 CBC 233 : if (map)
469 57 : slot = execute_attr_map_slot(map->attrMap, rootslot,
470 : rri->ri_PartitionTupleSlot);
471 : else
472 176 : slot = rootslot;
473 : }
474 :
475 255 : ExecPartitionCheck(rri, slot, estate, true);
476 : }
477 : }
478 :
479 : /* Release the tuple in the lowest parent's dedicated slot. */
480 467570 : if (myslot != NULL)
481 30677 : ExecClearTuple(myslot);
482 : /* and restore ecxt's scantuple */
483 467570 : ecxt->ecxt_scantuple = ecxt_scantuple_saved;
484 467570 : MemoryContextSwitchTo(oldcxt);
485 :
486 467570 : return rri;
487 : }
488 :
489 : /*
490 : * ExecInitPartitionInfo
491 : * Lock the partition and initialize ResultRelInfo. Also setup other
492 : * information for the partition and store it in the next empty slot in
493 : * the proute->partitions array.
494 : *
495 : * Returns the ResultRelInfo
496 : */
497 : static ResultRelInfo *
1605 498 3731 : ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
499 : PartitionTupleRouting *proute,
500 : PartitionDispatch dispatch,
501 : ResultRelInfo *rootResultRelInfo,
502 : int partidx)
503 : {
1829 rhaas 504 3731 : ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
745 alvherre 505 3731 : Oid partOid = dispatch->partdesc->oids[partidx];
506 : Relation partrel;
790 heikki.linnakangas 507 3731 : int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
1819 alvherre 508 3731 : Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
509 : ResultRelInfo *leaf_part_rri;
510 : MemoryContext oldcxt;
1208 michael 511 3731 : AttrMap *part_attmap = NULL;
512 : bool found_whole_row;
513 :
1605 alvherre 514 3731 : oldcxt = MemoryContextSwitchTo(proute->memcxt);
515 :
745 516 3731 : partrel = table_open(partOid, RowExclusiveLock);
517 :
1840 518 3731 : leaf_part_rri = makeNode(ResultRelInfo);
1872 rhaas 519 3731 : InitResultRelInfo(leaf_part_rri,
520 : partrel,
521 : 0,
522 : rootResultRelInfo,
523 : estate->es_instrument);
524 :
525 : /*
526 : * Verify result relation is a valid target for an INSERT. An UPDATE of a
527 : * partition-key becomes a DELETE+INSERT operation, so this check is still
528 : * required when the operation is CMD_UPDATE.
529 : */
1804 530 3731 : CheckValidResultRel(leaf_part_rri, CMD_INSERT);
531 :
532 : /*
533 : * Open partition indices. The user may have asked to check for conflicts
534 : * within this leaf partition and do "nothing" instead of throwing an
535 : * error. Be prepared in that case by initializing the index information
536 : * needed by ExecInsert() to perform speculative insertions.
537 : */
1872 538 3728 : if (partrel->rd_rel->relhasindex &&
539 723 : leaf_part_rri->ri_IndexRelationDescs == NULL)
540 723 : ExecOpenIndices(leaf_part_rri,
1847 alvherre 541 1361 : (node != NULL &&
542 638 : node->onConflictAction != ONCONFLICT_NONE));
543 :
544 : /*
545 : * Build WITH CHECK OPTION constraints for the partition. Note that we
546 : * didn't build the withCheckOptionList for partitions within the planner,
547 : * but simple translation of varattnos will suffice. This only occurs for
548 : * the INSERT case or in the case of UPDATE/MERGE tuple routing where we
549 : * didn't find a result rel to reuse.
550 : */
1872 rhaas 551 3728 : if (node && node->withCheckOptionLists != NIL)
552 : {
553 : List *wcoList;
554 48 : List *wcoExprs = NIL;
555 : ListCell *ll;
556 :
557 : /*
558 : * In the case of INSERT on a partitioned table, there is only one
559 : * plan. Likewise, there is only one WCO list, not one per partition.
560 : * For UPDATE/MERGE, there are as many WCO lists as there are plans.
561 : */
562 48 : Assert((node->operation == CMD_INSERT &&
563 : list_length(node->withCheckOptionLists) == 1 &&
564 : list_length(node->resultRelations) == 1) ||
565 : (node->operation == CMD_UPDATE &&
566 : list_length(node->withCheckOptionLists) ==
567 : list_length(node->resultRelations)) ||
568 : (node->operation == CMD_MERGE &&
569 : list_length(node->withCheckOptionLists) ==
570 : list_length(node->resultRelations)));
571 :
572 : /*
573 : * Use the WCO list of the first plan as a reference to calculate
574 : * attno's for the WCO list of this partition. In the INSERT case,
575 : * that refers to the root partitioned table, whereas in the UPDATE
576 : * tuple routing case, that refers to the first partition in the
577 : * mtstate->resultRelInfo array. In any case, both that relation and
578 : * this partition should have the same columns, so we should be able
579 : * to map attributes successfully.
580 : */
581 48 : wcoList = linitial(node->withCheckOptionLists);
582 :
583 : /*
584 : * Convert Vars in it to contain this partition's attribute numbers.
585 : */
586 : part_attmap =
1208 michael 587 48 : build_attrmap_by_name(RelationGetDescr(partrel),
588 : RelationGetDescr(firstResultRel),
589 : false);
590 : wcoList = (List *)
1816 alvherre 591 GIC 48 : map_variable_attnos((Node *) wcoList,
1816 alvherre 592 ECB : firstVarno, 0,
593 : part_attmap,
1816 alvherre 594 GIC 48 : RelationGetForm(partrel)->reltype,
1816 alvherre 595 ECB : &found_whole_row);
596 : /* We ignore the value of found_whole_row. */
597 :
1872 rhaas 598 GIC 135 : foreach(ll, wcoList)
1872 rhaas 599 ECB : {
629 peter 600 GIC 87 : WithCheckOption *wco = lfirst_node(WithCheckOption, ll);
1872 rhaas 601 CBC 87 : ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual),
1861 andres 602 ECB : &mtstate->ps);
603 :
1872 rhaas 604 GIC 87 : wcoExprs = lappend(wcoExprs, wcoExpr);
1872 rhaas 605 ECB : }
606 :
1872 rhaas 607 GIC 48 : leaf_part_rri->ri_WithCheckOptions = wcoList;
1872 rhaas 608 CBC 48 : leaf_part_rri->ri_WithCheckOptionExprs = wcoExprs;
1872 rhaas 609 ECB : }
610 :
611 : /*
612 : * Build the RETURNING projection for the partition. Note that we didn't
613 : * build the returningList for partitions within the planner, but simple
614 : * translation of varattnos will suffice. This only occurs for the INSERT
615 : * case or in the case of UPDATE tuple routing where we didn't find a
616 : * result rel to reuse.
617 : */
1872 rhaas 618 GIC 3728 : if (node && node->returningLists != NIL)
1872 rhaas 619 ECB : {
620 : TupleTableSlot *slot;
621 : ExprContext *econtext;
622 : List *returningList;
623 :
624 : /* See the comment above for WCO lists. */
625 : /* (except no RETURNING support for MERGE yet) */
1872 rhaas 626 GIC 73 : Assert((node->operation == CMD_INSERT &&
1872 rhaas 627 ECB : list_length(node->returningLists) == 1 &&
628 : list_length(node->resultRelations) == 1) ||
629 : (node->operation == CMD_UPDATE &&
630 : list_length(node->returningLists) ==
631 : list_length(node->resultRelations)));
632 :
633 : /*
634 : * Use the RETURNING list of the first plan as a reference to
635 : * calculate attno's for the RETURNING list of this partition. See
636 : * the comment above for WCO lists for more details on why this is
637 : * okay.
638 : */
1872 rhaas 639 GIC 73 : returningList = linitial(node->returningLists);
1872 rhaas 640 ECB :
641 : /*
642 : * Convert Vars in it to contain this partition's attribute numbers.
643 : */
1208 michael 644 GIC 73 : if (part_attmap == NULL)
1208 michael 645 ECB : part_attmap =
1208 michael 646 GIC 73 : build_attrmap_by_name(RelationGetDescr(partrel),
647 : RelationGetDescr(firstResultRel),
648 : false);
649 : returningList = (List *)
1816 alvherre 650 73 : map_variable_attnos((Node *) returningList,
651 : firstVarno, 0,
1208 michael 652 ECB : part_attmap,
1816 alvherre 653 GIC 73 : RelationGetForm(partrel)->reltype,
654 : &found_whole_row);
1816 alvherre 655 ECB : /* We ignore the value of found_whole_row. */
656 :
1829 rhaas 657 GIC 73 : leaf_part_rri->ri_returningList = returningList;
658 :
1872 rhaas 659 ECB : /*
660 : * Initialize the projection itself.
661 : *
662 : * Use the slot and the expression context that would have been set up
663 : * in ExecInitModifyTable() for projection's output.
664 : */
1872 rhaas 665 GIC 73 : Assert(mtstate->ps.ps_ResultTupleSlot != NULL);
666 73 : slot = mtstate->ps.ps_ResultTupleSlot;
1872 rhaas 667 CBC 73 : Assert(mtstate->ps.ps_ExprContext != NULL);
668 73 : econtext = mtstate->ps.ps_ExprContext;
669 73 : leaf_part_rri->ri_projectReturning =
670 73 : ExecBuildProjectionInfo(returningList, econtext, slot,
1872 rhaas 671 ECB : &mtstate->ps, RelationGetDescr(partrel));
672 : }
673 :
674 : /* Set up information needed for routing tuples to the partition. */
1605 alvherre 675 GIC 3728 : ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
676 : leaf_part_rri, partidx, false);
1804 rhaas 677 ECB :
678 : /*
679 : * If there is an ON CONFLICT clause, initialize state for it.
680 : */
1840 alvherre 681 GIC 3728 : if (node && node->onConflictAction != ONCONFLICT_NONE)
682 : {
1840 alvherre 683 CBC 111 : TupleDesc partrelDesc = RelationGetDescr(partrel);
1840 alvherre 684 GIC 111 : ExprContext *econtext = mtstate->ps.ps_ExprContext;
1840 alvherre 685 ECB : ListCell *lc;
1840 alvherre 686 CBC 111 : List *arbiterIndexes = NIL;
687 :
1840 alvherre 688 ECB : /*
689 : * If there is a list of arbiter indexes, map it to a list of indexes
690 : * in the partition. We do that by scanning the partition's index
691 : * list and searching for ancestry relationships to each index in the
692 : * ancestor table.
693 : */
235 tgl 694 GNC 111 : if (rootResultRelInfo->ri_onConflictArbiterIndexes != NIL)
695 : {
1840 alvherre 696 ECB : List *childIdxs;
697 :
1840 alvherre 698 GIC 86 : childIdxs = RelationGetIndexList(leaf_part_rri->ri_RelationDesc);
699 :
1840 alvherre 700 CBC 178 : foreach(lc, childIdxs)
701 : {
702 92 : Oid childIdx = lfirst_oid(lc);
703 : List *ancestors;
1840 alvherre 704 ECB : ListCell *lc2;
705 :
1840 alvherre 706 GIC 92 : ancestors = get_partition_ancestors(childIdx);
1605 707 184 : foreach(lc2, rootResultRelInfo->ri_onConflictArbiterIndexes)
1840 alvherre 708 ECB : {
1840 alvherre 709 CBC 92 : if (list_member_oid(ancestors, lfirst_oid(lc2)))
1840 alvherre 710 GIC 86 : arbiterIndexes = lappend_oid(arbiterIndexes, childIdx);
1840 alvherre 711 ECB : }
1840 alvherre 712 CBC 92 : list_free(ancestors);
713 : }
1840 alvherre 714 ECB : }
715 :
716 : /*
717 : * If the resulting lists are of inequal length, something is wrong.
718 : * (This shouldn't happen, since arbiter index selection should not
719 : * pick up an invalid index.)
720 : */
1605 alvherre 721 GIC 222 : if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) !=
1840 722 111 : list_length(arbiterIndexes))
1840 alvherre 723 LBC 0 : elog(ERROR, "invalid arbiter index list");
1840 alvherre 724 CBC 111 : leaf_part_rri->ri_onConflictArbiterIndexes = arbiterIndexes;
1840 alvherre 725 EUB :
1840 alvherre 726 ECB : /*
727 : * In the DO UPDATE case, we have some more state to initialize.
728 : */
1840 alvherre 729 GIC 111 : if (node->onConflictAction == ONCONFLICT_UPDATE)
730 : {
699 tgl 731 CBC 83 : OnConflictSetState *onconfl = makeNode(OnConflictSetState);
732 : TupleConversionMap *map;
1605 alvherre 733 ECB :
128 alvherre 734 GNC 83 : map = ExecGetRootToChildMap(leaf_part_rri, estate);
735 :
1840 alvherre 736 CBC 83 : Assert(node->onConflictSet != NIL);
1605 alvherre 737 GIC 83 : Assert(rootResultRelInfo->ri_onConflict != NULL);
1840 alvherre 738 ECB :
699 tgl 739 CBC 83 : leaf_part_rri->ri_onConflict = onconfl;
740 :
1495 andres 741 ECB : /*
742 : * Need a separate existing slot for each partition, as the
743 : * partition could be of a different AM, even if the tuple
744 : * descriptors match.
745 : */
699 tgl 746 GIC 83 : onconfl->oc_Existing =
1490 andres 747 83 : table_slot_create(leaf_part_rri->ri_RelationDesc,
1490 andres 748 CBC 83 : &mtstate->ps.state->es_tupleTable);
1495 andres 749 ECB :
1840 alvherre 750 : /*
751 : * If the partition's tuple descriptor matches exactly the root
752 : * parent (the common case), we can re-use most of the parent's ON
753 : * CONFLICT SET state, skipping a bunch of work. Otherwise, we
754 : * need to create state specific to this partition.
755 : */
1840 alvherre 756 GIC 83 : if (map == NULL)
757 : {
1495 andres 758 ECB : /*
759 : * It's safe to reuse these from the partition root, as we
760 : * only process one tuple at a time (therefore we won't
761 : * overwrite needed data in slots), and the results of
762 : * projections are independent of the underlying storage.
763 : * Projections and where clauses themselves don't store state
764 : * / are independent of the underlying storage.
765 : */
699 tgl 766 GIC 45 : onconfl->oc_ProjSlot =
1495 andres 767 45 : rootResultRelInfo->ri_onConflict->oc_ProjSlot;
699 tgl 768 CBC 45 : onconfl->oc_ProjInfo =
1495 andres 769 45 : rootResultRelInfo->ri_onConflict->oc_ProjInfo;
699 tgl 770 45 : onconfl->oc_WhereClause =
1495 andres 771 45 : rootResultRelInfo->ri_onConflict->oc_WhereClause;
1495 andres 772 ECB : }
1840 alvherre 773 : else
774 : {
775 : List *onconflset;
776 : List *onconflcols;
777 :
778 : /*
779 : * Translate expressions in onConflictSet to account for
780 : * different attribute numbers. For that, map partition
781 : * varattnos twice: first to catch the EXCLUDED
782 : * pseudo-relation (INNER_VAR), and second to handle the main
783 : * target relation (firstVarno).
784 : */
699 tgl 785 GIC 38 : onconflset = copyObject(node->onConflictSet);
1208 michael 786 CBC 38 : if (part_attmap == NULL)
1208 michael 787 ECB : part_attmap =
1208 michael 788 GIC 35 : build_attrmap_by_name(RelationGetDescr(partrel),
789 : RelationGetDescr(firstResultRel),
790 : false);
791 : onconflset = (List *)
1816 alvherre 792 38 : map_variable_attnos((Node *) onconflset,
793 : INNER_VAR, 0,
1208 michael 794 ECB : part_attmap,
1816 alvherre 795 GIC 38 : RelationGetForm(partrel)->reltype,
796 : &found_whole_row);
1816 alvherre 797 ECB : /* We ignore the value of found_whole_row. */
798 : onconflset = (List *)
1816 alvherre 799 GIC 38 : map_variable_attnos((Node *) onconflset,
800 : firstVarno, 0,
1208 michael 801 ECB : part_attmap,
1816 alvherre 802 GIC 38 : RelationGetForm(partrel)->reltype,
803 : &found_whole_row);
1816 alvherre 804 ECB : /* We ignore the value of found_whole_row. */
805 :
806 : /* Finally, adjust the target colnos to match the partition. */
699 tgl 807 GIC 38 : onconflcols = adjust_partition_colnos(node->onConflictCols,
808 : leaf_part_rri);
1840 alvherre 809 ECB :
810 : /* create the tuple slot for the UPDATE SET projection */
699 tgl 811 GIC 38 : onconfl->oc_ProjSlot =
812 38 : table_slot_create(partrel,
699 tgl 813 CBC 38 : &mtstate->ps.state->es_tupleTable);
1495 andres 814 ECB :
815 : /* build UPDATE SET projection state */
699 tgl 816 GIC 38 : onconfl->oc_ProjInfo =
817 38 : ExecBuildUpdateProjection(onconflset,
699 tgl 818 ECB : true,
819 : onconflcols,
820 : partrelDesc,
821 : econtext,
822 : onconfl->oc_ProjSlot,
823 : &mtstate->ps);
824 :
825 : /*
826 : * If there is a WHERE clause, initialize state where it will
827 : * be evaluated, mapping the attribute numbers appropriately.
828 : * As with onConflictSet, we need to map partition varattnos
829 : * to the partition's tupdesc.
830 : */
1840 alvherre 831 GIC 38 : if (node->onConflictWhere)
832 : {
1840 alvherre 833 ECB : List *clause;
834 :
1840 alvherre 835 GIC 15 : clause = copyObject((List *) node->onConflictWhere);
836 : clause = (List *)
1816 alvherre 837 CBC 15 : map_variable_attnos((Node *) clause,
838 : INNER_VAR, 0,
1208 michael 839 ECB : part_attmap,
1816 alvherre 840 GIC 15 : RelationGetForm(partrel)->reltype,
841 : &found_whole_row);
1816 alvherre 842 ECB : /* We ignore the value of found_whole_row. */
843 : clause = (List *)
1816 alvherre 844 GIC 15 : map_variable_attnos((Node *) clause,
845 : firstVarno, 0,
1208 michael 846 ECB : part_attmap,
1816 alvherre 847 GIC 15 : RelationGetForm(partrel)->reltype,
848 : &found_whole_row);
1816 alvherre 849 ECB : /* We ignore the value of found_whole_row. */
699 tgl 850 GIC 15 : onconfl->oc_WhereClause =
1840 alvherre 851 15 : ExecInitQual((List *) clause, &mtstate->ps);
1840 alvherre 852 ECB : }
853 : }
854 : }
855 : }
856 :
857 : /*
858 : * Since we've just initialized this ResultRelInfo, it's not in any list
859 : * attached to the estate as yet. Add it, so that it can be found later.
860 : *
861 : * Note that the entries in this list appear in no predetermined order,
862 : * because partition result rels are initialized as and when they're
863 : * needed.
864 : */
1605 alvherre 865 GIC 3728 : MemoryContextSwitchTo(estate->es_query_cxt);
866 3728 : estate->es_tuple_routing_result_relations =
1605 alvherre 867 CBC 3728 : lappend(estate->es_tuple_routing_result_relations,
1605 alvherre 868 ECB : leaf_part_rri);
1840 869 :
870 : /*
871 : * Initialize information about this partition that's needed to handle
872 : * MERGE. We take the "first" result relation's mergeActionList as
873 : * reference and make copy for this relation, converting stuff that
874 : * references attribute numbers to match this relation's.
875 : *
876 : * This duplicates much of the logic in ExecInitMerge(), so something
877 : * changes there, look here too.
878 : */
377 alvherre 879 GIC 3728 : if (node && node->operation == CMD_MERGE)
880 : {
377 alvherre 881 CBC 22 : List *firstMergeActionList = linitial(node->mergeActionLists);
882 : ListCell *lc;
883 22 : ExprContext *econtext = mtstate->ps.ps_ExprContext;
884 :
885 22 : if (part_attmap == NULL)
886 : part_attmap =
887 19 : build_attrmap_by_name(RelationGetDescr(partrel),
888 : RelationGetDescr(firstResultRel),
889 : false);
377 alvherre 890 ECB :
377 alvherre 891 GIC 22 : if (unlikely(!leaf_part_rri->ri_projectNewInfoValid))
892 22 : ExecInitMergeTupleSlots(mtstate, leaf_part_rri);
893 :
377 alvherre 894 CBC 69 : foreach(lc, firstMergeActionList)
377 alvherre 895 ECB : {
896 : /* Make a copy for this relation to be safe. */
377 alvherre 897 CBC 47 : MergeAction *action = copyObject(lfirst(lc));
898 : MergeActionState *action_state;
899 : List **list;
377 alvherre 900 ECB :
901 : /* Generate the action's state for this relation */
377 alvherre 902 GIC 47 : action_state = makeNode(MergeActionState);
903 47 : action_state->mas_action = action;
904 :
377 alvherre 905 ECB : /* And put the action in the appropriate list */
377 alvherre 906 CBC 47 : if (action->matched)
377 alvherre 907 GIC 28 : list = &leaf_part_rri->ri_matchedMergeAction;
908 : else
377 alvherre 909 CBC 19 : list = &leaf_part_rri->ri_notMatchedMergeAction;
910 47 : *list = lappend(*list, action_state);
911 :
912 47 : switch (action->commandType)
377 alvherre 913 ECB : {
377 alvherre 914 GIC 19 : case CMD_INSERT:
377 alvherre 915 ECB :
916 : /*
917 : * ExecCheckPlanOutput() already done on the targetlist
918 : * when "first" result relation initialized and it is same
919 : * for all result relations.
920 : */
377 alvherre 921 GIC 19 : action_state->mas_proj =
922 19 : ExecBuildProjectionInfo(action->targetList, econtext,
923 : leaf_part_rri->ri_newTupleSlot,
377 alvherre 924 ECB : &mtstate->ps,
925 : RelationGetDescr(partrel));
377 alvherre 926 GIC 19 : break;
927 22 : case CMD_UPDATE:
928 :
377 alvherre 929 ECB : /*
930 : * Convert updateColnos from "first" result relation
931 : * attribute numbers to this result rel's.
932 : */
377 alvherre 933 GIC 22 : if (part_attmap)
934 22 : action->updateColnos =
935 22 : adjust_partition_colnos_using_map(action->updateColnos,
377 alvherre 936 ECB : part_attmap);
377 alvherre 937 CBC 22 : action_state->mas_proj =
938 22 : ExecBuildUpdateProjection(action->targetList,
939 : true,
377 alvherre 940 ECB : action->updateColnos,
377 alvherre 941 CBC 22 : RelationGetDescr(leaf_part_rri->ri_RelationDesc),
942 : econtext,
943 : leaf_part_rri->ri_newTupleSlot,
377 alvherre 944 ECB : NULL);
377 alvherre 945 GIC 22 : break;
946 6 : case CMD_DELETE:
947 6 : break;
377 alvherre 948 ECB :
377 alvherre 949 LBC 0 : default:
950 0 : elog(ERROR, "unknown action in MERGE WHEN clause");
951 : }
377 alvherre 952 EUB :
953 : /* found_whole_row intentionally ignored. */
377 alvherre 954 GIC 47 : action->qual =
955 47 : map_variable_attnos(action->qual,
956 : firstVarno, 0,
377 alvherre 957 ECB : part_attmap,
377 alvherre 958 CBC 47 : RelationGetForm(partrel)->reltype,
959 : &found_whole_row);
377 alvherre 960 GIC 47 : action_state->mas_whenqual =
377 alvherre 961 CBC 47 : ExecInitQual((List *) action->qual, &mtstate->ps);
962 : }
377 alvherre 963 ECB : }
1605 alvherre 964 CBC 3728 : MemoryContextSwitchTo(oldcxt);
965 :
1872 rhaas 966 GIC 3728 : return leaf_part_rri;
1872 rhaas 967 ECB : }
968 :
1829 969 : /*
970 : * ExecInitRoutingInfo
971 : * Set up information needed for translating tuples between root
972 : * partitioned table format and partition format, and keep track of it
973 : * in PartitionTupleRouting.
974 : */
975 : static void
1829 rhaas 976 GIC 3921 : ExecInitRoutingInfo(ModifyTableState *mtstate,
977 : EState *estate,
978 : PartitionTupleRouting *proute,
1605 alvherre 979 ECB : PartitionDispatch dispatch,
980 : ResultRelInfo *partRelInfo,
981 : int partidx,
982 : bool is_borrowed_rel)
983 : {
984 : MemoryContext oldcxt;
985 : int rri_index;
986 :
1605 alvherre 987 GIC 3921 : oldcxt = MemoryContextSwitchTo(proute->memcxt);
988 :
1829 rhaas 989 ECB : /*
990 : * Set up tuple conversion between root parent and the partition if the
991 : * two have different rowtypes. If conversion is indeed required, also
992 : * initialize a slot dedicated to storing this partition's converted
993 : * tuples. Various operations that are applied to tuples after routing,
994 : * such as checking constraints, will refer to this slot.
995 : */
128 alvherre 996 GNC 3921 : if (ExecGetRootToChildMap(partRelInfo, estate) != NULL)
997 : {
1650 andres 998 GIC 600 : Relation partrel = partRelInfo->ri_RelationDesc;
1650 andres 999 ECB :
1000 : /*
1001 : * This pins the partition's TupleDesc, which will be released at the
1002 : * end of the command.
1003 : */
902 heikki.linnakangas 1004 GIC 600 : partRelInfo->ri_PartitionTupleSlot =
1490 andres 1005 600 : table_slot_create(partrel, &estate->es_tupleTable);
1006 : }
1007 : else
902 heikki.linnakangas 1008 CBC 3321 : partRelInfo->ri_PartitionTupleSlot = NULL;
1605 alvherre 1009 ECB :
1829 rhaas 1010 : /*
1011 : * If the partition is a foreign table, let the FDW init itself for
1012 : * routing tuples to the partition.
1013 : */
1829 rhaas 1014 GIC 3921 : if (partRelInfo->ri_FdwRoutine != NULL &&
1015 41 : partRelInfo->ri_FdwRoutine->BeginForeignInsert != NULL)
1016 41 : partRelInfo->ri_FdwRoutine->BeginForeignInsert(mtstate, partRelInfo);
1017 :
1018 : /*
697 tgl 1019 ECB : * Determine if the FDW supports batch insert and determine the batch size
1020 : * (a FDW may support batching, but it may be disabled for the
809 tomas.vondra 1021 : * server/table or for this particular query).
1022 : *
1023 : * If the FDW does not support batching, we set the batch size to 1.
1024 : */
110 efujita 1025 GNC 3915 : if (partRelInfo->ri_FdwRoutine != NULL &&
809 tomas.vondra 1026 CBC 35 : partRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize &&
809 tomas.vondra 1027 GIC 35 : partRelInfo->ri_FdwRoutine->ExecForeignBatchInsert)
809 tomas.vondra 1028 CBC 35 : partRelInfo->ri_BatchSize =
809 tomas.vondra 1029 GIC 35 : partRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize(partRelInfo);
1030 : else
1031 3880 : partRelInfo->ri_BatchSize = 1;
1032 :
809 tomas.vondra 1033 CBC 3915 : Assert(partRelInfo->ri_BatchSize >= 1);
1034 :
1466 andres 1035 3915 : partRelInfo->ri_CopyMultiInsertBuffer = NULL;
1036 :
1037 : /*
1605 alvherre 1038 ECB : * Keep track of it in the PartitionTupleRouting->partitions array.
1039 : */
1605 alvherre 1040 CBC 3915 : Assert(dispatch->indexes[partidx] == -1);
1041 :
1042 3915 : rri_index = proute->num_partitions++;
1829 rhaas 1043 ECB :
1605 alvherre 1044 : /* Allocate or enlarge the array, as needed */
1605 alvherre 1045 CBC 3915 : if (proute->num_partitions >= proute->max_partitions)
1605 alvherre 1046 ECB : {
1605 alvherre 1047 GIC 3027 : if (proute->max_partitions == 0)
1048 : {
1049 3024 : proute->max_partitions = 8;
1605 alvherre 1050 CBC 3024 : proute->partitions = (ResultRelInfo **)
1051 3024 : palloc(sizeof(ResultRelInfo *) * proute->max_partitions);
733 tgl 1052 3024 : proute->is_borrowed_rel = (bool *)
1053 3024 : palloc(sizeof(bool) * proute->max_partitions);
1605 alvherre 1054 ECB : }
1055 : else
1056 : {
1605 alvherre 1057 GIC 3 : proute->max_partitions *= 2;
1058 3 : proute->partitions = (ResultRelInfo **)
1059 3 : repalloc(proute->partitions, sizeof(ResultRelInfo *) *
1605 alvherre 1060 CBC 3 : proute->max_partitions);
733 tgl 1061 3 : proute->is_borrowed_rel = (bool *)
1062 3 : repalloc(proute->is_borrowed_rel, sizeof(bool) *
733 tgl 1063 GIC 3 : proute->max_partitions);
1605 alvherre 1064 ECB : }
1065 : }
1066 :
1605 alvherre 1067 GIC 3915 : proute->partitions[rri_index] = partRelInfo;
733 tgl 1068 3915 : proute->is_borrowed_rel[rri_index] = is_borrowed_rel;
1605 alvherre 1069 3915 : dispatch->indexes[partidx] = rri_index;
1070 :
1071 3915 : MemoryContextSwitchTo(oldcxt);
1829 rhaas 1072 3915 : }
1073 :
1074 : /*
1075 : * ExecInitPartitionDispatchInfo
1076 : * Lock the partitioned table (if not locked already) and initialize
1508 rhaas 1077 ECB : * PartitionDispatch for a partitioned table and store it in the next
1078 : * available slot in the proute->partition_dispatch_info array. Also,
1079 : * record the index into this array in the parent_pd->indexes[] array in
1080 : * the partidx element so that we can properly retrieve the newly created
1081 : * PartitionDispatch later.
1082 : */
1083 : static PartitionDispatch
1494 rhaas 1084 GIC 3725 : ExecInitPartitionDispatchInfo(EState *estate,
1085 : PartitionTupleRouting *proute, Oid partoid,
1086 : PartitionDispatch parent_pd, int partidx,
1087 : ResultRelInfo *rootResultRelInfo)
1088 : {
1089 : Relation rel;
1090 : PartitionDesc partdesc;
1091 : PartitionDispatch pd;
1092 : int dispatchidx;
1093 : MemoryContext oldcxt;
1094 :
1095 : /*
745 alvherre 1096 ECB : * For data modification, it is better that executor does not include
717 1097 : * partitions being detached, except when running in snapshot-isolation
1098 : * mode. This means that a read-committed transaction immediately gets a
1099 : * "no partition for tuple" error when a tuple is inserted into a
1100 : * partition that's being detached concurrently, but a transaction in
1101 : * repeatable-read mode can still use such a partition.
1102 : */
1494 rhaas 1103 GIC 3725 : if (estate->es_partition_directory == NULL)
1104 3149 : estate->es_partition_directory =
745 alvherre 1105 3149 : CreatePartitionDirectory(estate->es_query_cxt,
1106 : !IsolationUsesXactSnapshot());
1107 :
1605 alvherre 1108 CBC 3725 : oldcxt = MemoryContextSwitchTo(proute->memcxt);
1605 alvherre 1109 ECB :
1110 : /*
1508 rhaas 1111 : * Only sub-partitioned tables need to be locked here. The root
1112 : * partitioned table will already have been locked as it's referenced in
1113 : * the query's rtable.
1114 : */
1605 alvherre 1115 CBC 3725 : if (partoid != RelationGetRelid(proute->partition_root))
1508 rhaas 1116 570 : rel = table_open(partoid, RowExclusiveLock);
1605 alvherre 1117 ECB : else
1605 alvherre 1118 CBC 3155 : rel = proute->partition_root;
1494 rhaas 1119 3725 : partdesc = PartitionDirectoryLookup(estate->es_partition_directory, rel);
1605 alvherre 1120 ECB :
1605 alvherre 1121 GIC 3725 : pd = (PartitionDispatch) palloc(offsetof(PartitionDispatchData, indexes) +
1605 alvherre 1122 CBC 3725 : partdesc->nparts * sizeof(int));
1605 alvherre 1123 GIC 3725 : pd->reldesc = rel;
1124 3725 : pd->key = RelationGetPartitionKey(rel);
1125 3725 : pd->keystate = NIL;
1126 3725 : pd->partdesc = partdesc;
1127 3725 : if (parent_pd != NULL)
1128 : {
1129 570 : TupleDesc tupdesc = RelationGetDescr(rel);
1130 :
1131 : /*
1132 : * For sub-partitioned tables where the column order differs from its
1605 alvherre 1133 ECB : * direct parent partitioned table, we must store a tuple table slot
1134 : * initialized with its tuple descriptor and a tuple conversion map to
1135 : * convert a tuple from its parent's rowtype to its own. This is to
1136 : * make sure that we are looking at the correct row using the correct
1137 : * tuple descriptor when computing its partition key for tuple
1138 : * routing.
1139 : */
1208 michael 1140 GIC 570 : pd->tupmap = build_attrmap_by_name_if_req(RelationGetDescr(parent_pd->reldesc),
1141 : tupdesc,
1142 : false);
1605 alvherre 1143 CBC 570 : pd->tupslot = pd->tupmap ?
1500 andres 1144 570 : MakeSingleTupleTableSlot(tupdesc, &TTSOpsVirtual) : NULL;
1145 : }
1146 : else
1147 : {
1148 : /* Not required for the root partitioned table */
1605 alvherre 1149 GIC 3155 : pd->tupmap = NULL;
1150 3155 : pd->tupslot = NULL;
1605 alvherre 1151 ECB : }
1152 :
1153 : /*
1154 : * Initialize with -1 to signify that the corresponding partition's
1155 : * ResultRelInfo or PartitionDispatch has not been created yet.
1156 : */
1605 alvherre 1157 CBC 3725 : memset(pd->indexes, -1, sizeof(int) * partdesc->nparts);
1158 :
1605 alvherre 1159 ECB : /* Track in PartitionTupleRouting for later use */
1605 alvherre 1160 GIC 3725 : dispatchidx = proute->num_dispatch++;
1906 rhaas 1161 ECB :
1605 alvherre 1162 : /* Allocate or enlarge the array, as needed */
1605 alvherre 1163 CBC 3725 : if (proute->num_dispatch >= proute->max_dispatch)
1605 alvherre 1164 ECB : {
1605 alvherre 1165 CBC 3155 : if (proute->max_dispatch == 0)
1166 : {
1605 alvherre 1167 GIC 3155 : proute->max_dispatch = 4;
1168 3155 : proute->partition_dispatch_info = (PartitionDispatch *)
1605 alvherre 1169 GBC 3155 : palloc(sizeof(PartitionDispatch) * proute->max_dispatch);
943 1170 3155 : proute->nonleaf_partitions = (ResultRelInfo **)
1171 3155 : palloc(sizeof(ResultRelInfo *) * proute->max_dispatch);
1605 alvherre 1172 EUB : }
1173 : else
1174 : {
1605 alvherre 1175 UBC 0 : proute->max_dispatch *= 2;
1605 alvherre 1176 UIC 0 : proute->partition_dispatch_info = (PartitionDispatch *)
1177 0 : repalloc(proute->partition_dispatch_info,
1605 alvherre 1178 LBC 0 : sizeof(PartitionDispatch) * proute->max_dispatch);
943 alvherre 1179 UIC 0 : proute->nonleaf_partitions = (ResultRelInfo **)
1180 0 : repalloc(proute->nonleaf_partitions,
1181 0 : sizeof(ResultRelInfo *) * proute->max_dispatch);
1182 : }
1183 : }
1605 alvherre 1184 GIC 3725 : proute->partition_dispatch_info[dispatchidx] = pd;
1906 rhaas 1185 ECB :
1186 : /*
943 alvherre 1187 : * If setting up a PartitionDispatch for a sub-partitioned table, we may
1188 : * also need a minimally valid ResultRelInfo for checking the partition
1189 : * constraint later; set that up now.
1190 : */
943 alvherre 1191 GIC 3725 : if (parent_pd)
1192 : {
943 alvherre 1193 CBC 570 : ResultRelInfo *rri = makeNode(ResultRelInfo);
1194 :
790 heikki.linnakangas 1195 GIC 570 : InitResultRelInfo(rri, rel, 0, rootResultRelInfo, 0);
943 alvherre 1196 570 : proute->nonleaf_partitions[dispatchidx] = rri;
1197 : }
1198 : else
943 alvherre 1199 CBC 3155 : proute->nonleaf_partitions[dispatchidx] = NULL;
1200 :
1605 alvherre 1201 ECB : /*
1202 : * Finally, if setting up a PartitionDispatch for a sub-partitioned table,
1203 : * install a downlink in the parent to allow quick descent.
1204 : */
1605 alvherre 1205 CBC 3725 : if (parent_pd)
1206 : {
1207 570 : Assert(parent_pd->indexes[partidx] == -1);
1605 alvherre 1208 GIC 570 : parent_pd->indexes[partidx] = dispatchidx;
1209 : }
1210 :
1211 3725 : MemoryContextSwitchTo(oldcxt);
1212 :
1213 3725 : return pd;
1214 : }
1215 :
1216 : /*
1921 rhaas 1217 ECB : * ExecCleanupTupleRouting -- Clean up objects allocated for partition tuple
1218 : * routing.
1219 : *
1220 : * Close all the partitioned tables, leaf partitions, and their indices.
1221 : */
1222 : void
1829 rhaas 1223 GIC 2816 : ExecCleanupTupleRouting(ModifyTableState *mtstate,
1224 : PartitionTupleRouting *proute)
1225 : {
1226 : int i;
1227 :
1228 : /*
1921 rhaas 1229 ECB : * Remember, proute->partition_dispatch_info[0] corresponds to the root
1230 : * partitioned table, which we must not try to close, because it is the
1231 : * main target table of the query that will be closed by callers such as
1232 : * ExecEndPlan() or DoCopy(). Also, tupslot is NULL for the root
1233 : * partitioned table.
1234 : */
1921 rhaas 1235 CBC 3280 : for (i = 1; i < proute->num_dispatch; i++)
1921 rhaas 1236 ECB : {
1921 rhaas 1237 GIC 464 : PartitionDispatch pd = proute->partition_dispatch_info[i];
1238 :
1539 andres 1239 CBC 464 : table_close(pd->reldesc, NoLock);
1240 :
1605 alvherre 1241 464 : if (pd->tupslot)
1605 alvherre 1242 GIC 221 : ExecDropSingleTupleTableSlot(pd->tupslot);
1243 : }
1921 rhaas 1244 ECB :
1921 rhaas 1245 CBC 6495 : for (i = 0; i < proute->num_partitions; i++)
1921 rhaas 1246 ECB : {
1921 rhaas 1247 GIC 3679 : ResultRelInfo *resultRelInfo = proute->partitions[i];
1248 :
1249 : /* Allow any FDWs to shut down */
1455 efujita 1250 3679 : if (resultRelInfo->ri_FdwRoutine != NULL &&
1251 33 : resultRelInfo->ri_FdwRoutine->EndForeignInsert != NULL)
1252 33 : resultRelInfo->ri_FdwRoutine->EndForeignInsert(mtstate->ps.state,
1455 efujita 1253 ECB : resultRelInfo);
1254 :
1255 : /*
733 tgl 1256 : * Close it if it's not one of the result relations borrowed from the
1257 : * owning ModifyTableState; those will be closed by ExecEndPlan().
1258 : */
733 tgl 1259 CBC 3679 : if (proute->is_borrowed_rel[i])
733 tgl 1260 GIC 169 : continue;
1261 :
1921 rhaas 1262 3510 : ExecCloseIndices(resultRelInfo);
1539 andres 1263 3510 : table_close(resultRelInfo->ri_RelationDesc, NoLock);
1264 : }
1971 rhaas 1265 2816 : }
1266 :
1267 : /* ----------------
1268 : * FormPartitionKeyDatum
1269 : * Construct values[] and isnull[] arrays for the partition key
1270 : * of a tuple.
1271 : *
1272 : * pd Partition dispatch object of the partitioned table
1273 : * slot Heap tuple from which to extract partition key
1274 : * estate executor state for evaluating any partition key
1275 : * expressions (must be non-NULL)
1276 : * values Array of partition key Datums (output area)
1277 : * isnull Array of is-null indicators (output area)
1971 rhaas 1278 ECB : *
1279 : * the ecxt_scantuple slot of estate's per-tuple expr context must point to
1280 : * the heap tuple passed in.
1281 : * ----------------
1282 : */
1283 : static void
1971 rhaas 1284 GIC 523889 : FormPartitionKeyDatum(PartitionDispatch pd,
1285 : TupleTableSlot *slot,
1286 : EState *estate,
1971 rhaas 1287 ECB : Datum *values,
1288 : bool *isnull)
1289 : {
1290 : ListCell *partexpr_item;
1291 : int i;
1292 :
1971 rhaas 1293 GIC 523889 : if (pd->key->partexprs != NIL && pd->keystate == NIL)
1971 rhaas 1294 ECB : {
1295 : /* Check caller has set up context correctly */
1971 rhaas 1296 GIC 258 : Assert(estate != NULL &&
1971 rhaas 1297 ECB : GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1298 :
1299 : /* First time through, set up expression evaluation state */
1971 rhaas 1300 CBC 258 : pd->keystate = ExecPrepareExprList(pd->key->partexprs, estate);
1301 : }
1302 :
1971 rhaas 1303 GIC 523889 : partexpr_item = list_head(pd->keystate);
1971 rhaas 1304 CBC 1059028 : for (i = 0; i < pd->key->partnatts; i++)
1305 : {
1971 rhaas 1306 GIC 535139 : AttrNumber keycol = pd->key->partattrs[i];
1971 rhaas 1307 ECB : Datum datum;
1308 : bool isNull;
1309 :
1971 rhaas 1310 GIC 535139 : if (keycol != 0)
1311 : {
1971 rhaas 1312 ECB : /* Plain column; get the value directly from the heap tuple */
1971 rhaas 1313 GBC 521339 : datum = slot_getattr(slot, keycol, &isNull);
1971 rhaas 1314 ECB : }
1315 : else
1316 : {
1317 : /* Expression; need to evaluate it */
1971 rhaas 1318 GIC 13800 : if (partexpr_item == NULL)
1971 rhaas 1319 LBC 0 : elog(ERROR, "wrong number of partition key expressions");
1971 rhaas 1320 CBC 13800 : datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item),
1971 rhaas 1321 GIC 13800 : GetPerTupleExprContext(estate),
1322 : &isNull);
1364 tgl 1323 CBC 13800 : partexpr_item = lnext(pd->keystate, partexpr_item);
1971 rhaas 1324 EUB : }
1971 rhaas 1325 CBC 535139 : values[i] = datum;
1971 rhaas 1326 GIC 535139 : isnull[i] = isNull;
1327 : }
1328 :
1329 523889 : if (partexpr_item != NULL)
1971 rhaas 1330 UIC 0 : elog(ERROR, "wrong number of partition key expressions");
1971 rhaas 1331 GIC 523889 : }
1332 :
1333 : /*
1334 : * The number of times the same partition must be found in a row before we
1335 : * switch from a binary search for the given values to just checking if the
1336 : * values belong to the last found partition. This must be above 0.
1337 : */
1338 : #define PARTITION_CACHED_FIND_THRESHOLD 16
1339 :
1340 : /*
1341 : * get_partition_for_tuple
1342 : * Finds partition of relation which accepts the partition key specified
1343 : * in values and isnull.
1344 : *
1345 : * Calling this function can be quite expensive when LIST and RANGE
1346 : * partitioned tables have many partitions. This is due to the binary search
1347 : * that's done to find the correct partition. Many of the use cases for LIST
1348 : * and RANGE partitioned tables make it likely that the same partition is
1349 : * found in subsequent ExecFindPartition() calls. This is especially true for
1350 : * cases such as RANGE partitioned tables on a TIMESTAMP column where the
1351 : * partition key is the current time. When asked to find a partition for a
1352 : * RANGE or LIST partitioned table, we record the partition index and datum
1353 : * offset we've found for the given 'values' in the PartitionDesc (which is
1354 : * stored in relcache), and if we keep finding the same partition
1355 : * PARTITION_CACHED_FIND_THRESHOLD times in a row, then we'll enable caching
1356 : * logic and instead of performing a binary search to find the correct
1357 : * partition, we'll just double-check that 'values' still belong to the last
1358 : * found partition, and if so, we'll return that partition index, thus
1359 : * skipping the need for the binary search. If we fail to match the last
1360 : * partition when double checking, then we fall back on doing a binary search.
1361 : * In this case, unless we find 'values' belong to the DEFAULT partition,
1362 : * we'll reset the number of times we've hit the same partition so that we
1363 : * don't attempt to use the cache again until we've found that partition at
1364 : * least PARTITION_CACHED_FIND_THRESHOLD times in a row.
1365 : *
1366 : * For cases where the partition changes on each lookup, the amount of
1367 : * additional work required just amounts to recording the last found partition
1368 : * and bound offset then resetting the found counter. This is cheap and does
1369 : * not appear to cause any meaningful slowdowns for such cases.
1370 : *
1371 : * No caching of partitions is done when the last found partition is the
1372 : * DEFAULT or NULL partition. For the case of the DEFAULT partition, there
1373 : * is no bound offset storing the matching datum, so we cannot confirm the
1374 : * indexes match. For the NULL partition, this is just so cheap, there's no
1375 : * sense in caching.
1376 : *
1377 : * Return value is index of the partition (>= 0 and < partdesc->nparts) if one
1378 : * found or -1 if none found.
1379 : */
1380 : static int
367 alvherre 1381 523868 : get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull)
1382 : {
250 drowley 1383 GNC 523868 : int bound_offset = -1;
1821 alvherre 1384 GIC 523868 : int part_index = -1;
367 1385 523868 : PartitionKey key = pd->key;
1386 523868 : PartitionDesc partdesc = pd->partdesc;
1761 tgl 1387 523868 : PartitionBoundInfo boundinfo = partdesc->boundinfo;
1388 :
1389 : /*
1390 : * In the switch statement below, when we perform a cached lookup for
1391 : * RANGE and LIST partitioned tables, if we find that the last found
1392 : * partition matches the 'values', we return the partition index right
1393 : * away. We do this instead of breaking out of the switch as we don't
1394 : * want to execute the code about the DEFAULT partition or do any updates
1395 : * for any of the cache-related fields. That would be a waste of effort
1396 : * as we already know it's not the DEFAULT partition and have no need to
1397 : * increment the number of times we found the same partition any higher
1398 : * than PARTITION_CACHED_FIND_THRESHOLD.
1399 : */
1400 :
1401 : /* Route as appropriate based on partitioning strategy. */
1821 alvherre 1402 523868 : switch (key->strategy)
1403 : {
1404 106268 : case PARTITION_STRATEGY_HASH:
1405 : {
1406 : uint64 rowHash;
1407 :
1408 : /* hash partitioning is too cheap to bother caching */
1761 tgl 1409 106268 : rowHash = compute_partition_hash_value(key->partnatts,
1410 : key->partsupfunc,
1411 : key->partcollation,
1412 : values, isnull);
1413 :
1414 : /*
1415 : * HASH partitions can't have a DEFAULT partition and we don't
1416 : * do any caching work for them, so just return the part index
1417 : */
250 drowley 1418 GNC 106268 : return boundinfo->indexes[rowHash % boundinfo->nindexes];
1419 : }
1420 :
1821 alvherre 1421 GIC 54922 : case PARTITION_STRATEGY_LIST:
1422 54922 : if (isnull[0])
1423 : {
1424 : /* this is far too cheap to bother doing any caching */
1761 tgl 1425 54 : if (partition_bound_accepts_nulls(boundinfo))
1426 : {
1427 : /*
1428 : * When there is a NULL partition we just return that
1429 : * directly. We don't have a bound_offset so it's not
1430 : * valid to drop into the code after the switch which
1431 : * checks and updates the cache fields. We perhaps should
1432 : * be invalidating the details of the last cached
1433 : * partition but there's no real need to. Keeping those
1434 : * fields set gives a chance at matching to the cached
1435 : * partition on the next lookup.
1436 : */
250 drowley 1437 GNC 45 : return boundinfo->null_index;
1438 : }
1439 : }
1440 : else
1441 : {
1442 : bool equal;
1443 :
1444 54868 : if (partdesc->last_found_count >= PARTITION_CACHED_FIND_THRESHOLD)
1445 : {
1446 11946 : int last_datum_offset = partdesc->last_found_datum_index;
1447 11946 : Datum lastDatum = boundinfo->datums[last_datum_offset][0];
1448 : int32 cmpval;
1449 :
1450 : /* does the last found datum index match this datum? */
1451 11946 : cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
1452 11946 : key->partcollation[0],
1453 : lastDatum,
1454 : values[0]));
1455 :
1456 11946 : if (cmpval == 0)
1457 11769 : return boundinfo->indexes[last_datum_offset];
1458 :
1459 : /* fall-through and do a manual lookup */
1460 : }
1821 alvherre 1461 ECB :
1821 alvherre 1462 GIC 43099 : bound_offset = partition_list_bsearch(key->partsupfunc,
1821 alvherre 1463 ECB : key->partcollation,
1761 tgl 1464 : boundinfo,
1821 alvherre 1465 : values[0], &equal);
1821 alvherre 1466 CBC 43099 : if (bound_offset >= 0 && equal)
1761 tgl 1467 42930 : part_index = boundinfo->indexes[bound_offset];
1468 : }
1821 alvherre 1469 GIC 43108 : break;
1470 :
1471 362678 : case PARTITION_STRATEGY_RANGE:
1472 : {
1473 362678 : bool equal = false,
1474 362678 : range_partkey_has_null = false;
1475 : int i;
1476 :
1477 : /*
1478 : * No range includes NULL, so this will be accepted by the
1479 : * default partition if there is one, and otherwise rejected.
1480 : */
1481 736540 : for (i = 0; i < key->partnatts; i++)
1821 alvherre 1482 ECB : {
1821 alvherre 1483 GIC 373889 : if (isnull[i])
1821 alvherre 1484 ECB : {
1821 alvherre 1485 GIC 27 : range_partkey_has_null = true;
1486 27 : break;
1487 : }
1488 : }
1821 alvherre 1489 ECB :
1490 : /* NULLs belong in the DEFAULT partition */
250 drowley 1491 GNC 362678 : if (range_partkey_has_null)
1492 27 : break;
1493 :
1494 362651 : if (partdesc->last_found_count >= PARTITION_CACHED_FIND_THRESHOLD)
1495 : {
1496 119481 : int last_datum_offset = partdesc->last_found_datum_index;
1497 119481 : Datum *lastDatums = boundinfo->datums[last_datum_offset];
1498 119481 : PartitionRangeDatumKind *kind = boundinfo->kind[last_datum_offset];
1499 : int32 cmpval;
1500 :
1501 : /* check if the value is >= to the lower bound */
1502 119481 : cmpval = partition_rbound_datum_cmp(key->partsupfunc,
1503 : key->partcollation,
1504 : lastDatums,
1505 : kind,
1506 : values,
1507 119481 : key->partnatts);
1821 alvherre 1508 ECB :
1509 : /*
1510 : * If it's equal to the lower bound then no need to check
1511 : * the upper bound.
1512 : */
250 drowley 1513 GNC 119481 : if (cmpval == 0)
1514 119386 : return boundinfo->indexes[last_datum_offset + 1];
1515 :
1516 116532 : if (cmpval < 0 && last_datum_offset + 1 < boundinfo->ndatums)
1517 : {
1518 : /* check if the value is below the upper bound */
1519 116532 : lastDatums = boundinfo->datums[last_datum_offset + 1];
1520 116532 : kind = boundinfo->kind[last_datum_offset + 1];
1521 116532 : cmpval = partition_rbound_datum_cmp(key->partsupfunc,
1522 : key->partcollation,
1523 : lastDatums,
1524 : kind,
1525 : values,
1526 116532 : key->partnatts);
1527 :
1528 116532 : if (cmpval > 0)
1529 116437 : return boundinfo->indexes[last_datum_offset + 1];
1530 : }
1531 : /* fall-through and do a manual lookup */
1532 : }
1533 :
1534 243265 : bound_offset = partition_range_datum_bsearch(key->partsupfunc,
1535 : key->partcollation,
1536 : boundinfo,
1537 243265 : key->partnatts,
1538 : values,
1539 : &equal);
1540 :
1541 : /*
1542 : * The bound at bound_offset is less than or equal to the
1543 : * tuple value, so the bound at offset+1 is the upper bound of
1544 : * the partition we're looking for, if there actually exists
1545 : * one.
1546 : */
1547 243265 : part_index = boundinfo->indexes[bound_offset + 1];
1548 : }
1821 alvherre 1549 GIC 243265 : break;
1550 :
1821 alvherre 1551 UIC 0 : default:
1552 0 : elog(ERROR, "unexpected partition strategy: %d",
1553 : (int) key->strategy);
1554 : }
1555 :
1556 : /*
1557 : * part_index < 0 means we failed to find a partition of this parent. Use
1821 alvherre 1558 ECB : * the default partition, if there is one.
1559 : */
1821 alvherre 1560 GIC 286400 : if (part_index < 0)
1561 : {
1562 : /*
1563 : * No need to reset the cache fields here. The next set of values
1564 : * might end up belonging to the cached partition, so leaving the
1565 : * cache alone improves the chances of a cache hit on the next lookup.
1566 : */
250 drowley 1567 GNC 308 : return boundinfo->default_index;
1568 : }
1569 :
1570 : /* we should only make it here when the code above set bound_offset */
1571 286092 : Assert(bound_offset >= 0);
1572 :
1573 : /*
1574 : * Attend to the cache fields. If the bound_offset matches the last
1575 : * cached bound offset then we've found the same partition as last time,
1576 : * so bump the count by one. If all goes well, we'll eventually reach
1577 : * PARTITION_CACHED_FIND_THRESHOLD and try the cache path next time
1578 : * around. Otherwise, we'll reset the cache count back to 1 to mark that
1579 : * we've found this partition for the first time.
1580 : */
1581 286092 : if (bound_offset == partdesc->last_found_datum_index)
1582 218174 : partdesc->last_found_count++;
1583 : else
1584 : {
1585 67918 : partdesc->last_found_count = 1;
1586 67918 : partdesc->last_found_part_index = part_index;
1587 67918 : partdesc->last_found_datum_index = bound_offset;
1588 : }
1589 :
1821 alvherre 1590 GIC 286092 : return part_index;
1591 : }
1821 alvherre 1592 ECB :
1593 : /*
1958 rhaas 1594 : * ExecBuildSlotPartitionKeyDescription
1971 1595 : *
1596 : * This works very much like BuildIndexValueDescription() and is currently
1597 : * used for building error messages when ExecFindPartition() fails to find
1598 : * partition for a row.
1599 : */
1600 : static char *
1971 rhaas 1601 GIC 74 : ExecBuildSlotPartitionKeyDescription(Relation rel,
1602 : Datum *values,
1603 : bool *isnull,
1971 rhaas 1604 ECB : int maxfieldlen)
1605 : {
1606 : StringInfoData buf;
1971 rhaas 1607 GIC 74 : PartitionKey key = RelationGetPartitionKey(rel);
1608 74 : int partnatts = get_partition_natts(key);
1609 : int i;
1971 rhaas 1610 CBC 74 : Oid relid = RelationGetRelid(rel);
1611 : AclResult aclresult;
1612 :
1971 rhaas 1613 GIC 74 : if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED)
1971 rhaas 1614 LBC 0 : return NULL;
1971 rhaas 1615 ECB :
1616 : /* If the user has table-level access, just go build the description. */
1971 rhaas 1617 CBC 74 : aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_SELECT);
1971 rhaas 1618 GIC 74 : if (aclresult != ACLCHECK_OK)
1971 rhaas 1619 ECB : {
1620 : /*
1621 : * Step through the columns of the partition key and make sure the
1622 : * user has SELECT rights on all of them.
1623 : */
1971 rhaas 1624 GIC 12 : for (i = 0; i < partnatts; i++)
1625 : {
1626 9 : AttrNumber attnum = get_partition_col_attnum(key, i);
1627 :
1628 : /*
1971 rhaas 1629 ECB : * If this partition key column is an expression, we return no
1630 : * detail rather than try to figure out what column(s) the
1631 : * expression includes and if the user has SELECT rights on them.
1632 : */
1971 rhaas 1633 CBC 15 : if (attnum == InvalidAttrNumber ||
1634 6 : pg_attribute_aclcheck(relid, attnum, GetUserId(),
1635 : ACL_SELECT) != ACLCHECK_OK)
1971 rhaas 1636 GIC 6 : return NULL;
1637 : }
1638 : }
1971 rhaas 1639 ECB :
1971 rhaas 1640 CBC 68 : initStringInfo(&buf);
1971 rhaas 1641 GIC 68 : appendStringInfo(&buf, "(%s) = (",
1971 rhaas 1642 ECB : pg_get_partkeydef_columns(relid, true));
1643 :
1971 rhaas 1644 CBC 163 : for (i = 0; i < partnatts; i++)
1971 rhaas 1645 ECB : {
1646 : char *val;
1647 : int vallen;
1648 :
1971 rhaas 1649 GIC 95 : if (isnull[i])
1971 rhaas 1650 CBC 15 : val = "null";
1651 : else
1652 : {
1653 : Oid foutoid;
1654 : bool typisvarlena;
1971 rhaas 1655 ECB :
1971 rhaas 1656 GIC 80 : getTypeOutputInfo(get_partition_col_typid(key, i),
1657 : &foutoid, &typisvarlena);
1658 80 : val = OidOutputFunctionCall(foutoid, values[i]);
1659 : }
1660 :
1971 rhaas 1661 CBC 95 : if (i > 0)
1662 27 : appendStringInfoString(&buf, ", ");
1663 :
1971 rhaas 1664 ECB : /* truncate if needed */
1971 rhaas 1665 GIC 95 : vallen = strlen(val);
1666 95 : if (vallen <= maxfieldlen)
1356 drowley 1667 CBC 95 : appendBinaryStringInfo(&buf, val, vallen);
1971 rhaas 1668 ECB : else
1669 : {
1971 rhaas 1670 UIC 0 : vallen = pg_mbcliplen(val, vallen, maxfieldlen);
1671 0 : appendBinaryStringInfo(&buf, val, vallen);
1672 0 : appendStringInfoString(&buf, "...");
1673 : }
1971 rhaas 1674 ECB : }
1675 :
1971 rhaas 1676 CBC 68 : appendStringInfoChar(&buf, ')');
1971 rhaas 1677 ECB :
1971 rhaas 1678 GIC 68 : return buf.data;
1679 : }
1680 :
1681 : /*
699 tgl 1682 ECB : * adjust_partition_colnos
1683 : * Adjust the list of UPDATE target column numbers to account for
1684 : * attribute differences between the parent and the partition.
362 alvherre 1685 : *
1686 : * Note: mustn't be called if no adjustment is required.
1687 : */
1688 : static List *
699 tgl 1689 GIC 38 : adjust_partition_colnos(List *colnos, ResultRelInfo *leaf_part_rri)
1690 : {
1691 38 : TupleConversionMap *map = ExecGetChildToRootMap(leaf_part_rri);
1692 :
362 alvherre 1693 38 : Assert(map != NULL);
1694 :
377 alvherre 1695 CBC 38 : return adjust_partition_colnos_using_map(colnos, map->attrMap);
1696 : }
377 alvherre 1697 ECB :
1698 : /*
377 alvherre 1699 EUB : * adjust_partition_colnos_using_map
1700 : * Like adjust_partition_colnos, but uses a caller-supplied map instead
1701 : * of assuming to map from the "root" result relation.
1702 : *
1703 : * Note: mustn't be called if no adjustment is required.
1704 : */
1705 : static List *
377 alvherre 1706 GIC 60 : adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap)
1707 : {
377 alvherre 1708 CBC 60 : List *new_colnos = NIL;
1709 : ListCell *lc;
1710 :
377 alvherre 1711 GIC 60 : Assert(attrMap != NULL); /* else we shouldn't be here */
1712 :
699 tgl 1713 152 : foreach(lc, colnos)
1714 : {
699 tgl 1715 CBC 92 : AttrNumber parentattrno = lfirst_int(lc);
1716 :
699 tgl 1717 GIC 92 : if (parentattrno <= 0 ||
1718 92 : parentattrno > attrMap->maplen ||
699 tgl 1719 CBC 92 : attrMap->attnums[parentattrno - 1] == 0)
699 tgl 1720 UIC 0 : elog(ERROR, "unexpected attno %d in target column list",
1721 : parentattrno);
699 tgl 1722 GIC 92 : new_colnos = lappend_int(new_colnos,
1723 92 : attrMap->attnums[parentattrno - 1]);
1724 : }
1725 :
1726 60 : return new_colnos;
1727 : }
1728 :
1828 alvherre 1729 ECB : /*-------------------------------------------------------------------------
1730 : * Run-Time Partition Pruning Support.
1731 : *
1732 : * The following series of functions exist to support the removal of unneeded
1764 tgl 1733 : * subplans for queries against partitioned tables. The supporting functions
1734 : * here are designed to work with any plan type which supports an arbitrary
1735 : * number of subplans, e.g. Append, MergeAppend.
1736 : *
1737 : * When pruning involves comparison of a partition key to a constant, it's
1738 : * done by the planner. However, if we have a comparison to a non-constant
1739 : * but not volatile expression, that presents an opportunity for run-time
1740 : * pruning by the executor, allowing irrelevant partitions to be skipped
1741 : * dynamically.
1742 : *
1743 : * We must distinguish expressions containing PARAM_EXEC Params from
1744 : * expressions that don't contain those. Even though a PARAM_EXEC Param is
1745 : * considered to be a stable expression, it can change value from one plan
1746 : * node scan to the next during query execution. Stable comparison
1747 : * expressions that don't involve such Params allow partition pruning to be
1748 : * done once during executor startup. Expressions that do involve such Params
1749 : * require us to prune separately for each scan of the parent plan node.
1750 : *
1751 : * Note that pruning away unneeded subplans during executor startup has the
1752 : * added benefit of not having to initialize the unneeded subplans at all.
1753 : *
1754 : *
1828 alvherre 1755 : * Functions:
1756 : *
1757 : * ExecInitPartitionPruning:
369 1758 : * Creates the PartitionPruneState required by ExecFindMatchingSubPlans.
1759 : * Details stored include how to map the partition index returned by the
1760 : * partition pruning code into subplan indexes. Also determines the set
1761 : * of subplans to initialize considering the result of performing initial
369 alvherre 1762 EUB : * pruning steps if any. Maps in PartitionPruneState are updated to
1763 : * account for initial pruning possibly having eliminated some of the
1764 : * subplans.
1828 alvherre 1765 ECB : *
1766 : * ExecFindMatchingSubPlans:
1767 : * Returns indexes of matching subplans after evaluating the expressions
1768 : * that are safe to evaluate at a given point. This function is first
1769 : * called during ExecInitPartitionPruning() to find the initially
1770 : * matching subplans based on performing the initial pruning steps and
1771 : * then must be called again each time the value of a Param listed in
1764 tgl 1772 : * PartitionPruneState's 'execparamids' changes.
1773 : *-------------------------------------------------------------------------
1828 alvherre 1774 : */
1775 :
1776 : /*
1777 : * ExecInitPartitionPruning
1778 : * Initialize data structure needed for run-time partition pruning and
1779 : * do initial pruning if needed
1780 : *
1781 : * 'root_parent_relids' identifies the relation to which both the parent plan
1782 : * and the PartitionPruneInfo given by 'part_prune_index' belong.
1783 : *
369 1784 : * On return, *initially_valid_subplans is assigned the set of indexes of
1785 : * child subplans that must be initialized along with the parent plan node.
1786 : * Initial pruning is performed here if needed and in that case only the
1787 : * surviving subplans' indexes are added.
1788 : *
1789 : * If subplans are indeed pruned, subplan_map arrays contained in the returned
1790 : * PartitionPruneState are re-sequenced to not count those, though only if the
1791 : * maps will be needed for subsequent execution pruning passes.
1792 : */
1793 : PartitionPruneState *
369 alvherre 1794 GIC 302 : ExecInitPartitionPruning(PlanState *planstate,
369 alvherre 1795 ECB : int n_total_subplans,
1796 : int part_prune_index,
1797 : Bitmapset *root_parent_relids,
1798 : Bitmapset **initially_valid_subplans)
1799 : {
1800 : PartitionPruneState *prunestate;
369 alvherre 1801 CBC 302 : EState *estate = planstate->state;
1802 : PartitionPruneInfo *pruneinfo;
1803 :
1804 : /* Obtain the pruneinfo we need, and make sure it's the right one */
129 alvherre 1805 GNC 302 : pruneinfo = list_nth(estate->es_part_prune_infos, part_prune_index);
1806 302 : if (!bms_equal(root_parent_relids, pruneinfo->root_parent_relids))
129 alvherre 1807 UNC 0 : ereport(ERROR,
1808 : errcode(ERRCODE_INTERNAL_ERROR),
1809 : errmsg_internal("mismatching PartitionPruneInfo found at part_prune_index %d",
1810 : part_prune_index),
1811 : errdetail_internal("plan node relids %s, pruneinfo relids %s",
1812 : bmsToString(root_parent_relids),
1813 : bmsToString(pruneinfo->root_parent_relids)));
369 alvherre 1814 ECB :
1815 : /* We may need an expression context to evaluate partition exprs */
369 alvherre 1816 GIC 302 : ExecAssignExprContext(estate, planstate);
1817 :
1818 : /* Create the working data structure for pruning */
1819 302 : prunestate = CreatePartitionPruneState(planstate, pruneinfo);
369 alvherre 1820 ECB :
1821 : /*
1822 : * Perform an initial partition prune pass, if required.
1823 : */
369 alvherre 1824 GIC 302 : if (prunestate->do_initial_prune)
369 alvherre 1825 CBC 130 : *initially_valid_subplans = ExecFindMatchingSubPlans(prunestate, true);
369 alvherre 1826 ECB : else
1827 : {
1828 : /* No pruning, so we'll need to initialize all subplans */
369 alvherre 1829 CBC 172 : Assert(n_total_subplans > 0);
1830 172 : *initially_valid_subplans = bms_add_range(NULL, 0,
369 alvherre 1831 ECB : n_total_subplans - 1);
1832 : }
1833 :
369 alvherre 1834 EUB : /*
1835 : * Re-sequence subplan indexes contained in prunestate to account for any
1836 : * that were removed above due to initial pruning. No need to do this if
1837 : * no steps were removed.
1838 : */
369 alvherre 1839 GIC 302 : if (bms_num_members(*initially_valid_subplans) < n_total_subplans)
369 alvherre 1840 ECB : {
1841 : /*
1842 : * We can safely skip this when !do_exec_prune, even though that
1843 : * leaves invalid data in prunestate, because that data won't be
1844 : * consulted again (cf initial Assert in ExecFindMatchingSubPlans).
1845 : */
369 alvherre 1846 GIC 130 : if (prunestate->do_exec_prune)
1847 24 : PartitionPruneFixSubPlanMap(prunestate,
1848 : *initially_valid_subplans,
1849 : n_total_subplans);
1850 : }
1851 :
1852 302 : return prunestate;
369 alvherre 1853 ECB : }
1854 :
1855 : /*
1856 : * CreatePartitionPruneState
1857 : * Build the data structure required for calling ExecFindMatchingSubPlans
1858 : *
1764 tgl 1859 : * 'planstate' is the parent plan node's execution state.
1860 : *
1861 : * 'pruneinfo' is a PartitionPruneInfo as generated by
1862 : * make_partition_pruneinfo. Here we build a PartitionPruneState containing a
1863 : * PartitionPruningData for each partitioning hierarchy (i.e., each sublist of
1864 : * pruneinfo->prune_infos), each of which contains a PartitionedRelPruningData
1865 : * for each PartitionedRelPruneInfo appearing in that sublist. This two-level
1866 : * system is needed to keep from confusing the different hierarchies when a
1867 : * UNION ALL contains multiple partitioned tables as children. The data
1868 : * stored in each PartitionedRelPruningData can be re-used each time we
1869 : * re-evaluate which partitions match the pruning steps provided in each
369 alvherre 1870 : * PartitionedRelPruneInfo.
1871 : */
1872 : static PartitionPruneState *
369 alvherre 1873 GIC 302 : CreatePartitionPruneState(PlanState *planstate, PartitionPruneInfo *pruneinfo)
1874 : {
1648 tgl 1875 CBC 302 : EState *estate = planstate->state;
1876 : PartitionPruneState *prunestate;
1712 tgl 1877 ECB : int n_part_hierarchies;
1878 : ListCell *lc;
1828 alvherre 1879 : int i;
369 alvherre 1880 GIC 302 : ExprContext *econtext = planstate->ps_ExprContext;
1828 alvherre 1881 ECB :
717 1882 : /* For data reading, executor always omits detached partitions */
1494 rhaas 1883 CBC 302 : if (estate->es_partition_directory == NULL)
1494 rhaas 1884 GBC 293 : estate->es_partition_directory =
717 alvherre 1885 GIC 293 : CreatePartitionDirectory(estate->es_query_cxt, false);
1494 rhaas 1886 ECB :
369 alvherre 1887 CBC 302 : n_part_hierarchies = list_length(pruneinfo->prune_infos);
1712 tgl 1888 GIC 302 : Assert(n_part_hierarchies > 0);
1889 :
1764 tgl 1890 ECB : /*
1891 : * Allocate the data structure
1892 : */
1893 : prunestate = (PartitionPruneState *)
1712 tgl 1894 GIC 302 : palloc(offsetof(PartitionPruneState, partprunedata) +
1895 : sizeof(PartitionPruningData *) * n_part_hierarchies);
1896 :
1897 302 : prunestate->execparamids = NULL;
1898 : /* other_subplans can change at runtime, so we need our own copy */
369 alvherre 1899 302 : prunestate->other_subplans = bms_copy(pruneinfo->other_subplans);
1764 tgl 1900 302 : prunestate->do_initial_prune = false; /* may be set below */
1901 302 : prunestate->do_exec_prune = false; /* may be set below */
1712 1902 302 : prunestate->num_partprunedata = n_part_hierarchies;
1903 :
1904 : /*
1905 : * Create a short-term memory context which we'll use when making calls to
1906 : * the partition pruning functions. This avoids possible memory leaks,
1907 : * since the pruning functions call comparison functions that aren't under
1908 : * our control.
1909 : */
1828 alvherre 1910 302 : prunestate->prune_context =
1911 302 : AllocSetContextCreate(CurrentMemoryContext,
1912 : "Partition Prune",
1913 : ALLOCSET_DEFAULT_SIZES);
1914 :
1915 302 : i = 0;
369 1916 616 : foreach(lc, pruneinfo->prune_infos)
1917 : {
1712 tgl 1918 314 : List *partrelpruneinfos = lfirst_node(List, lc);
1919 314 : int npartrelpruneinfos = list_length(partrelpruneinfos);
1920 : PartitionPruningData *prunedata;
1921 : ListCell *lc2;
1922 : int j;
1923 :
1924 : prunedata = (PartitionPruningData *)
1925 314 : palloc(offsetof(PartitionPruningData, partrelprunedata) +
1926 314 : npartrelpruneinfos * sizeof(PartitionedRelPruningData));
1927 314 : prunestate->partprunedata[i] = prunedata;
1928 314 : prunedata->num_partrelprunedata = npartrelpruneinfos;
1929 :
1930 314 : j = 0;
1931 1030 : foreach(lc2, partrelpruneinfos)
1932 : {
1933 716 : PartitionedRelPruneInfo *pinfo = lfirst_node(PartitionedRelPruneInfo, lc2);
1934 716 : PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
1935 : Relation partrel;
1936 : PartitionDesc partdesc;
1937 : PartitionKey partkey;
1938 :
1939 : /*
1940 : * We can rely on the copies of the partitioned table's partition
1941 : * key and partition descriptor appearing in its relcache entry,
1942 : * because that entry will be held open and locked for the
1943 : * duration of this executor run.
1944 : */
1648 1945 716 : partrel = ExecGetRangeTableRelation(estate, pinfo->rtindex);
1946 716 : partkey = RelationGetPartitionKey(partrel);
1494 rhaas 1947 716 : partdesc = PartitionDirectoryLookup(estate->es_partition_directory,
1948 : partrel);
1949 :
1950 : /*
1951 : * Initialize the subplan_map and subpart_map.
1952 : *
1953 : * Because we request detached partitions to be included, and
1954 : * detaching waits for old transactions, it is safe to assume that
1955 : * no partitions have disappeared since this query was planned.
1956 : *
1957 : * However, new partitions may have been added.
1494 rhaas 1958 ECB : */
1494 rhaas 1959 GIC 716 : Assert(partdesc->nparts >= pinfo->nparts);
1423 tgl 1960 716 : pprune->nparts = partdesc->nparts;
1494 rhaas 1961 716 : pprune->subplan_map = palloc(sizeof(int) * partdesc->nparts);
1962 716 : if (partdesc->nparts == pinfo->nparts)
1963 : {
1964 : /*
1494 rhaas 1965 ECB : * There are no new partitions, so this is simple. We can
1966 : * simply point to the subpart_map from the plan, but we must
1967 : * copy the subplan_map since we may change it later.
1968 : */
1494 rhaas 1969 CBC 715 : pprune->subpart_map = pinfo->subpart_map;
1970 715 : memcpy(pprune->subplan_map, pinfo->subplan_map,
1494 rhaas 1971 GBC 715 : sizeof(int) * pinfo->nparts);
1972 :
1973 : /*
1974 : * Double-check that the list of unpruned relations has not
1975 : * changed. (Pruned partitions are not in relid_map[].)
1976 : */
1977 : #ifdef USE_ASSERT_CHECKING
1471 tgl 1978 GIC 2806 : for (int k = 0; k < pinfo->nparts; k++)
1979 : {
1471 tgl 1980 CBC 2091 : Assert(partdesc->oids[k] == pinfo->relid_map[k] ||
1981 : pinfo->subplan_map[k] == -1);
1982 : }
1471 tgl 1983 ECB : #endif
1984 : }
1985 : else
1986 : {
1418 tgl 1987 GIC 1 : int pd_idx = 0;
1418 tgl 1988 ECB : int pp_idx;
1494 rhaas 1989 :
1990 : /*
1991 : * Some new partitions have appeared since plan time, and
1992 : * those are reflected in our PartitionDesc but were not
1993 : * present in the one used to construct subplan_map and
1994 : * subpart_map. So we must construct new and longer arrays
1995 : * where the partitions that were originally present map to
1996 : * the same sub-structures, and any added partitions map to
1997 : * -1, as if the new partitions had been pruned.
1998 : *
1999 : * Note: pinfo->relid_map[] may contain InvalidOid entries for
2000 : * partitions pruned by the planner. We cannot tell exactly
2001 : * which of the partdesc entries these correspond to, but we
2002 : * don't have to; just skip over them. The non-pruned
977 tgl 2003 : * relid_map entries, however, had better be a subset of the
2004 : * partdesc entries and in the same order.
2005 : */
1494 rhaas 2006 GIC 1 : pprune->subpart_map = palloc(sizeof(int) * partdesc->nparts);
977 tgl 2007 5 : for (pp_idx = 0; pp_idx < partdesc->nparts; pp_idx++)
2008 : {
2009 : /* Skip any InvalidOid relid_map entries */
977 tgl 2010 CBC 5 : while (pd_idx < pinfo->nparts &&
2011 5 : !OidIsValid(pinfo->relid_map[pd_idx]))
977 tgl 2012 GIC 1 : pd_idx++;
2013 :
2014 4 : if (pd_idx < pinfo->nparts &&
2015 4 : pinfo->relid_map[pd_idx] == partdesc->oids[pp_idx])
1494 rhaas 2016 ECB : {
2017 : /* match... */
1494 rhaas 2018 GIC 2 : pprune->subplan_map[pp_idx] =
2019 2 : pinfo->subplan_map[pd_idx];
2020 2 : pprune->subpart_map[pp_idx] =
977 tgl 2021 2 : pinfo->subpart_map[pd_idx];
2022 2 : pd_idx++;
2023 : }
2024 : else
2025 : {
2026 : /* this partdesc entry is not in the plan */
2027 2 : pprune->subplan_map[pp_idx] = -1;
2028 2 : pprune->subpart_map[pp_idx] = -1;
2029 : }
2030 : }
2031 :
2032 : /*
2033 : * It might seem that we need to skip any trailing InvalidOid
2034 : * entries in pinfo->relid_map before checking that we scanned
2035 : * all of the relid_map. But we will have skipped them above,
2036 : * because they must correspond to some partdesc->oids
977 tgl 2037 ECB : * entries; we just couldn't tell which.
2038 : */
977 tgl 2039 CBC 1 : if (pd_idx != pinfo->nparts)
977 tgl 2040 UIC 0 : elog(ERROR, "could not match partition child tables to plan elements");
2041 : }
2042 :
2043 : /* present_parts is also subject to later modification */
1423 tgl 2044 CBC 716 : pprune->present_parts = bms_copy(pinfo->present_parts);
2045 :
2046 : /*
2047 : * Initialize pruning contexts as needed. Note that we must skip
2048 : * execution-time partition pruning in EXPLAIN (GENERIC_PLAN),
2049 : * since parameter values may be missing.
1423 tgl 2050 ECB : */
1423 tgl 2051 CBC 716 : pprune->initial_pruning_steps = pinfo->initial_pruning_steps;
16 tgl 2052 GNC 716 : if (pinfo->initial_pruning_steps &&
2053 184 : !(econtext->ecxt_estate->es_top_eflags & EXEC_FLAG_EXPLAIN_GENERIC))
1811 alvherre 2054 ECB : {
369 alvherre 2055 CBC 181 : InitPartitionPruneContext(&pprune->initial_context,
2056 : pinfo->initial_pruning_steps,
2057 : partdesc, partkey, planstate,
2058 : econtext);
2059 : /* Record whether initial pruning is needed at any level */
1423 tgl 2060 GIC 181 : prunestate->do_initial_prune = true;
1423 tgl 2061 ECB : }
1423 tgl 2062 GIC 716 : pprune->exec_pruning_steps = pinfo->exec_pruning_steps;
16 tgl 2063 GNC 716 : if (pinfo->exec_pruning_steps &&
2064 250 : !(econtext->ecxt_estate->es_top_eflags & EXEC_FLAG_EXPLAIN_GENERIC))
1423 tgl 2065 ECB : {
369 alvherre 2066 GIC 250 : InitPartitionPruneContext(&pprune->exec_context,
369 alvherre 2067 ECB : pinfo->exec_pruning_steps,
2068 : partdesc, partkey, planstate,
2069 : econtext);
1423 tgl 2070 : /* Record whether exec pruning is needed at any level */
1423 tgl 2071 GIC 250 : prunestate->do_exec_prune = true;
2072 : }
2073 :
2074 : /*
2075 : * Accumulate the IDs of all PARAM_EXEC Params affecting the
2076 : * partitioning decisions at this plan node.
2077 : */
1712 tgl 2078 CBC 1432 : prunestate->execparamids = bms_add_members(prunestate->execparamids,
2079 716 : pinfo->execparamids);
2080 :
1712 tgl 2081 GIC 716 : j++;
2082 : }
1828 alvherre 2083 CBC 314 : i++;
1828 alvherre 2084 ECB : }
2085 :
1828 alvherre 2086 CBC 302 : return prunestate;
1828 alvherre 2087 ECB : }
2088 :
2089 : /*
2090 : * Initialize a PartitionPruneContext for the given list of pruning steps.
2091 : */
2092 : static void
369 alvherre 2093 CBC 431 : InitPartitionPruneContext(PartitionPruneContext *context,
369 alvherre 2094 ECB : List *pruning_steps,
2095 : PartitionDesc partdesc,
2096 : PartitionKey partkey,
2097 : PlanState *planstate,
2098 : ExprContext *econtext)
1423 tgl 2099 : {
2100 : int n_steps;
2101 : int partnatts;
2102 : ListCell *lc;
2103 :
1423 tgl 2104 GIC 431 : n_steps = list_length(pruning_steps);
2105 :
2106 431 : context->strategy = partkey->strategy;
2107 431 : context->partnatts = partnatts = partkey->partnatts;
2108 431 : context->nparts = partdesc->nparts;
2109 431 : context->boundinfo = partdesc->boundinfo;
2110 431 : context->partcollation = partkey->partcollation;
2111 431 : context->partsupfunc = partkey->partsupfunc;
2112 :
1423 tgl 2113 ECB : /* We'll look up type-specific support functions as needed */
1423 tgl 2114 CBC 431 : context->stepcmpfuncs = (FmgrInfo *)
2115 431 : palloc0(sizeof(FmgrInfo) * n_steps * partnatts);
2116 :
1423 tgl 2117 GIC 431 : context->ppccontext = CurrentMemoryContext;
2118 431 : context->planstate = planstate;
369 alvherre 2119 431 : context->exprcontext = econtext;
2120 :
2121 : /* Initialize expression state for each expression we need */
1423 tgl 2122 431 : context->exprstates = (ExprState **)
2123 431 : palloc0(sizeof(ExprState *) * n_steps * partnatts);
2124 1191 : foreach(lc, pruning_steps)
2125 : {
2126 760 : PartitionPruneStepOp *step = (PartitionPruneStepOp *) lfirst(lc);
1423 tgl 2127 ECB : ListCell *lc2;
2128 : int keyno;
2129 :
2130 : /* not needed for other step kinds */
1423 tgl 2131 GIC 760 : if (!IsA(step, PartitionPruneStepOp))
2132 142 : continue;
2133 :
2134 618 : Assert(list_length(step->exprs) <= partnatts);
2135 :
2136 618 : keyno = 0;
1423 tgl 2137 CBC 1260 : foreach(lc2, step->exprs)
1423 tgl 2138 ECB : {
1423 tgl 2139 CBC 642 : Expr *expr = (Expr *) lfirst(lc2);
2140 :
2141 : /* not needed for Consts */
1423 tgl 2142 GIC 642 : if (!IsA(expr, Const))
2143 : {
2144 595 : int stateidx = PruneCxtStateIdx(partnatts,
2145 : step->step.step_id,
1423 tgl 2146 ECB : keyno);
2147 :
369 alvherre 2148 : /*
2149 : * When planstate is NULL, pruning_steps is known not to
2150 : * contain any expressions that depend on the parent plan.
2151 : * Information of any available EXTERN parameters must be
2152 : * passed explicitly in that case, which the caller must have
2153 : * made available via econtext.
2154 : */
369 alvherre 2155 CBC 595 : if (planstate == NULL)
369 alvherre 2156 UIC 0 : context->exprstates[stateidx] =
2157 0 : ExecInitExprWithParams(expr,
2158 : econtext->ecxt_param_list_info);
2159 : else
369 alvherre 2160 GIC 595 : context->exprstates[stateidx] =
2161 595 : ExecInitExpr(expr, context->planstate);
2162 : }
1423 tgl 2163 642 : keyno++;
2164 : }
2165 : }
2166 431 : }
2167 :
2168 : /*
2169 : * PartitionPruneFixSubPlanMap
2170 : * Fix mapping of partition indexes to subplan indexes contained in
2171 : * prunestate by considering the new list of subplans that survived
2172 : * initial pruning
2173 : *
369 alvherre 2174 ECB : * Current values of the indexes present in PartitionPruneState count all the
2175 : * subplans that would be present before initial pruning was done. If initial
2176 : * pruning got rid of some of the subplans, any subsequent pruning passes will
2177 : * be looking at a different set of target subplans to choose from than those
2178 : * in the pre-initial-pruning set, so the maps in PartitionPruneState
2179 : * containing those indexes must be updated to reflect the new indexes of
2180 : * subplans in the post-initial-pruning set.
2181 : */
2182 : static void
369 alvherre 2183 CBC 24 : PartitionPruneFixSubPlanMap(PartitionPruneState *prunestate,
2184 : Bitmapset *initially_valid_subplans,
2185 : int n_total_subplans)
1828 alvherre 2186 ECB : {
369 2187 : int *new_subplan_indexes;
2188 : Bitmapset *new_other_subplans;
1712 tgl 2189 : int i;
369 alvherre 2190 : int newidx;
2191 :
2192 : /*
2193 : * First we must build a temporary array which maps old subplan indexes to
2194 : * new ones. For convenience of initialization, we use 1-based indexes in
2195 : * this array and leave pruned items as 0.
1828 2196 : */
369 alvherre 2197 GIC 24 : new_subplan_indexes = (int *) palloc0(sizeof(int) * n_total_subplans);
2198 24 : newidx = 1;
2199 24 : i = -1;
2200 93 : while ((i = bms_next_member(initially_valid_subplans, i)) >= 0)
2201 : {
2202 69 : Assert(i < n_total_subplans);
2203 69 : new_subplan_indexes[i] = newidx++;
2204 : }
2205 :
2206 : /*
369 alvherre 2207 ECB : * Now we can update each PartitionedRelPruneInfo's subplan_map with new
369 alvherre 2208 EUB : * subplan indexes. We must also recompute its present_parts bitmap.
2209 : */
369 alvherre 2210 GIC 60 : for (i = 0; i < prunestate->num_partprunedata; i++)
2211 : {
369 alvherre 2212 CBC 36 : PartitionPruningData *prunedata = prunestate->partprunedata[i];
2213 : int j;
2214 :
2215 : /*
2216 : * Within each hierarchy, we perform this loop in back-to-front order
2217 : * so that we determine present_parts for the lowest-level partitioned
2218 : * tables first. This way we can tell whether a sub-partitioned
369 alvherre 2219 ECB : * table's partitions were entirely pruned so we can exclude it from
2220 : * the current level's present_parts.
1828 2221 : */
369 alvherre 2222 GIC 132 : for (j = prunedata->num_partrelprunedata - 1; j >= 0; j--)
1828 alvherre 2223 ECB : {
369 alvherre 2224 GIC 96 : PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
2225 96 : int nparts = pprune->nparts;
2226 : int k;
2227 :
369 alvherre 2228 ECB : /* We just rebuild present_parts from scratch */
369 alvherre 2229 GIC 96 : bms_free(pprune->present_parts);
369 alvherre 2230 CBC 96 : pprune->present_parts = NULL;
1828 alvherre 2231 ECB :
369 alvherre 2232 CBC 354 : for (k = 0; k < nparts; k++)
2233 : {
2234 258 : int oldidx = pprune->subplan_map[k];
2235 : int subidx;
2236 :
2237 : /*
2238 : * If this partition existed as a subplan then change the old
369 alvherre 2239 ECB : * subplan index to the new subplan index. The new index may
2240 : * become -1 if the partition was pruned above, or it may just
2241 : * come earlier in the subplan list due to some subplans being
2242 : * removed earlier in the list. If it's a subpartition, add
2243 : * it to present_parts unless it's entirely pruned.
2244 : */
369 alvherre 2245 GIC 258 : if (oldidx >= 0)
1764 tgl 2246 ECB : {
369 alvherre 2247 CBC 198 : Assert(oldidx < n_total_subplans);
369 alvherre 2248 GIC 198 : pprune->subplan_map[k] = new_subplan_indexes[oldidx] - 1;
1764 tgl 2249 ECB :
369 alvherre 2250 GIC 198 : if (new_subplan_indexes[oldidx] > 0)
369 alvherre 2251 CBC 57 : pprune->present_parts =
369 alvherre 2252 GIC 57 : bms_add_member(pprune->present_parts, k);
2253 : }
369 alvherre 2254 CBC 60 : else if ((subidx = pprune->subpart_map[k]) >= 0)
2255 : {
2256 : PartitionedRelPruningData *subprune;
2257 :
369 alvherre 2258 GIC 60 : subprune = &prunedata->partrelprunedata[subidx];
2259 :
2260 60 : if (!bms_is_empty(subprune->present_parts))
369 alvherre 2261 CBC 24 : pprune->present_parts =
369 alvherre 2262 GIC 24 : bms_add_member(pprune->present_parts, k);
2263 : }
2264 : }
2265 : }
2266 : }
2267 :
2268 : /*
2269 : * We must also recompute the other_subplans set, since indexes in it may
2270 : * change.
2271 : */
369 alvherre 2272 CBC 24 : new_other_subplans = NULL;
369 alvherre 2273 GIC 24 : i = -1;
369 alvherre 2274 CBC 36 : while ((i = bms_next_member(prunestate->other_subplans, i)) >= 0)
2275 12 : new_other_subplans = bms_add_member(new_other_subplans,
2276 12 : new_subplan_indexes[i] - 1);
1606 tgl 2277 ECB :
369 alvherre 2278 CBC 24 : bms_free(prunestate->other_subplans);
2279 24 : prunestate->other_subplans = new_other_subplans;
2280 :
369 alvherre 2281 GIC 24 : pfree(new_subplan_indexes);
1828 alvherre 2282 CBC 24 : }
1828 alvherre 2283 ECB :
2284 : /*
2285 : * ExecFindMatchingSubPlans
1826 2286 : * Determine which subplans match the pruning steps detailed in
1764 tgl 2287 : * 'prunestate' for the current comparison expression values.
2288 : *
2289 : * Pass initial_prune if PARAM_EXEC Params cannot yet be evaluated. This
369 alvherre 2290 : * differentiates the initial executor-time pruning step from later
2291 : * runtime pruning.
1828 2292 : */
2293 : Bitmapset *
369 alvherre 2294 CBC 1864 : ExecFindMatchingSubPlans(PartitionPruneState *prunestate,
2295 : bool initial_prune)
2296 : {
1828 alvherre 2297 GIC 1864 : Bitmapset *result = NULL;
2298 : MemoryContext oldcontext;
1712 tgl 2299 ECB : int i;
1828 alvherre 2300 :
2301 : /*
369 2302 : * Either we're here on the initial prune done during pruning
2303 : * initialization, or we're at a point where PARAM_EXEC Params can be
2304 : * evaluated *and* there are steps in which to do so.
1606 tgl 2305 : */
369 alvherre 2306 GIC 1864 : Assert(initial_prune || prunestate->do_exec_prune);
1606 tgl 2307 ECB :
2308 : /*
2309 : * Switch to a temp context to avoid leaking memory in the executor's
2310 : * query-lifespan memory context.
2311 : */
1828 alvherre 2312 CBC 1864 : oldcontext = MemoryContextSwitchTo(prunestate->prune_context);
2313 :
2314 : /*
2315 : * For each hierarchy, do the pruning tests, and add nondeletable
2316 : * subplans' indexes to "result".
2317 : */
1712 tgl 2318 GIC 3749 : for (i = 0; i < prunestate->num_partprunedata; i++)
2319 : {
369 alvherre 2320 1885 : PartitionPruningData *prunedata = prunestate->partprunedata[i];
2321 : PartitionedRelPruningData *pprune;
2322 :
369 alvherre 2323 ECB : /*
369 alvherre 2324 EUB : * We pass the zeroth item, belonging to the root table of the
2325 : * hierarchy, and find_matching_subplans_recurse() takes care of
2326 : * recursing to other (lower-level) parents as needed.
2327 : */
1712 tgl 2328 CBC 1885 : pprune = &prunedata->partrelprunedata[0];
369 alvherre 2329 1885 : find_matching_subplans_recurse(prunedata, pprune, initial_prune,
2330 : &result);
1712 tgl 2331 ECB :
2332 : /* Expression eval may have used space in ExprContext too */
1423 tgl 2333 GIC 1885 : if (pprune->exec_pruning_steps)
369 alvherre 2334 CBC 1716 : ResetExprContext(pprune->exec_context.exprcontext);
2335 : }
2336 :
2337 : /* Add in any subplans that partition pruning didn't account for */
1606 tgl 2338 GIC 1864 : result = bms_add_members(result, prunestate->other_subplans);
2339 :
1828 alvherre 2340 1864 : MemoryContextSwitchTo(oldcontext);
2341 :
2342 : /* Copy result out of the temp context before we reset it */
2343 1864 : result = bms_copy(result);
2344 :
2345 1864 : MemoryContextReset(prunestate->prune_context);
2346 :
2347 1864 : return result;
2348 : }
2349 :
2350 : /*
1764 tgl 2351 ECB : * find_matching_subplans_recurse
2352 : * Recursive worker function for ExecFindMatchingSubPlans
2353 : *
2354 : * Adds valid (non-prunable) subplan IDs to *validsubplans
2355 : */
2356 : static void
1712 tgl 2357 GIC 2090 : find_matching_subplans_recurse(PartitionPruningData *prunedata,
2358 : PartitionedRelPruningData *pprune,
2359 : bool initial_prune,
2360 : Bitmapset **validsubplans)
2361 : {
2362 : Bitmapset *partset;
2363 : int i;
2364 :
1828 alvherre 2365 ECB : /* Guard against stack overflow due to overly deep partition hierarchy. */
1828 alvherre 2366 CBC 2090 : check_stack_depth();
1828 alvherre 2367 ECB :
369 2368 : /*
2369 : * Prune as appropriate, if we have pruning steps matching the current
2370 : * execution context. Otherwise just include all partitions at this
2371 : * level.
2372 : */
1423 tgl 2373 GIC 2090 : if (initial_prune && pprune->initial_pruning_steps)
2374 172 : partset = get_matching_partitions(&pprune->initial_context,
2375 : pprune->initial_pruning_steps);
2376 1918 : else if (!initial_prune && pprune->exec_pruning_steps)
2377 1749 : partset = get_matching_partitions(&pprune->exec_context,
1423 tgl 2378 ECB : pprune->exec_pruning_steps);
2379 : else
1828 alvherre 2380 CBC 169 : partset = pprune->present_parts;
2381 :
2382 : /* Translate partset into subplan indexes */
1828 alvherre 2383 GIC 2090 : i = -1;
2384 2919 : while ((i = bms_next_member(partset, i)) >= 0)
2385 : {
1764 tgl 2386 829 : if (pprune->subplan_map[i] >= 0)
1828 alvherre 2387 623 : *validsubplans = bms_add_member(*validsubplans,
1764 tgl 2388 623 : pprune->subplan_map[i]);
2389 : else
1828 alvherre 2390 ECB : {
1828 alvherre 2391 GIC 206 : int partidx = pprune->subpart_map[i];
1828 alvherre 2392 ECB :
1764 tgl 2393 CBC 206 : if (partidx >= 0)
1712 tgl 2394 GIC 205 : find_matching_subplans_recurse(prunedata,
2395 : &prunedata->partrelprunedata[partidx],
2396 : initial_prune, validsubplans);
1828 alvherre 2397 ECB : else
2398 : {
2399 : /*
1697 2400 : * We get here if the planner already pruned all the sub-
2401 : * partitions for this partition. Silently ignore this
2402 : * partition in this case. The end result is the same: we
2403 : * would have pruned all partitions just the same, but we
2404 : * don't have any pruning steps to execute to verify this.
2405 : */
2406 : }
2407 : }
2408 : }
1828 alvherre 2409 GIC 2090 : }
|