Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * execPartition.c
4 : : * Support routines for partitioning.
5 : : *
6 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : * IDENTIFICATION
10 : : * src/backend/executor/execPartition.c
11 : : *
12 : : *-------------------------------------------------------------------------
13 : : */
14 : : #include "postgres.h"
15 : :
16 : : #include "access/table.h"
17 : : #include "access/tableam.h"
18 : : #include "catalog/partition.h"
19 : : #include "executor/execPartition.h"
20 : : #include "executor/executor.h"
21 : : #include "executor/nodeModifyTable.h"
22 : : #include "foreign/fdwapi.h"
23 : : #include "mb/pg_wchar.h"
24 : : #include "miscadmin.h"
25 : : #include "partitioning/partbounds.h"
26 : : #include "partitioning/partdesc.h"
27 : : #include "partitioning/partprune.h"
28 : : #include "rewrite/rewriteManip.h"
29 : : #include "utils/acl.h"
30 : : #include "utils/lsyscache.h"
31 : : #include "utils/partcache.h"
32 : : #include "utils/rls.h"
33 : : #include "utils/ruleutils.h"
34 : :
35 : :
36 : : /*-----------------------
37 : : * PartitionTupleRouting - Encapsulates all information required to
38 : : * route a tuple inserted into a partitioned table to one of its leaf
39 : : * partitions.
40 : : *
41 : : * partition_root
42 : : * The partitioned table that's the target of the command.
43 : : *
44 : : * partition_dispatch_info
45 : : * Array of 'max_dispatch' elements containing a pointer to a
46 : : * PartitionDispatch object for every partitioned table touched by tuple
47 : : * routing. The entry for the target partitioned table is *always*
48 : : * present in the 0th element of this array. See comment for
49 : : * PartitionDispatchData->indexes for details on how this array is
50 : : * indexed.
51 : : *
52 : : * nonleaf_partitions
53 : : * Array of 'max_dispatch' elements containing pointers to fake
54 : : * ResultRelInfo objects for nonleaf partitions, useful for checking
55 : : * the partition constraint.
56 : : *
57 : : * num_dispatch
58 : : * The current number of items stored in the 'partition_dispatch_info'
59 : : * array. Also serves as the index of the next free array element for
60 : : * new PartitionDispatch objects that need to be stored.
61 : : *
62 : : * max_dispatch
63 : : * The current allocated size of the 'partition_dispatch_info' array.
64 : : *
65 : : * partitions
66 : : * Array of 'max_partitions' elements containing a pointer to a
67 : : * ResultRelInfo for every leaf partition touched by tuple routing.
68 : : * Some of these are pointers to ResultRelInfos which are borrowed out of
69 : : * the owning ModifyTableState node. The remainder have been built
70 : : * especially for tuple routing. See comment for
71 : : * PartitionDispatchData->indexes for details on how this array is
72 : : * indexed.
73 : : *
74 : : * is_borrowed_rel
75 : : * Array of 'max_partitions' booleans recording whether a given entry
76 : : * in 'partitions' is a ResultRelInfo pointer borrowed from the owning
77 : : * ModifyTableState node, rather than being built here.
78 : : *
79 : : * num_partitions
80 : : * The current number of items stored in the 'partitions' array. Also
81 : : * serves as the index of the next free array element for new
82 : : * ResultRelInfo objects that need to be stored.
83 : : *
84 : : * max_partitions
85 : : * The current allocated size of the 'partitions' array.
86 : : *
87 : : * memcxt
88 : : * Memory context used to allocate subsidiary structs.
89 : : *-----------------------
90 : : */
91 : : struct PartitionTupleRouting
92 : : {
93 : : Relation partition_root;
94 : : PartitionDispatch *partition_dispatch_info;
95 : : ResultRelInfo **nonleaf_partitions;
96 : : int num_dispatch;
97 : : int max_dispatch;
98 : : ResultRelInfo **partitions;
99 : : bool *is_borrowed_rel;
100 : : int num_partitions;
101 : : int max_partitions;
102 : : MemoryContext memcxt;
103 : : };
104 : :
105 : : /*-----------------------
106 : : * PartitionDispatch - information about one partitioned table in a partition
107 : : * hierarchy required to route a tuple to any of its partitions. A
108 : : * PartitionDispatch is always encapsulated inside a PartitionTupleRouting
109 : : * struct and stored inside its 'partition_dispatch_info' array.
110 : : *
111 : : * reldesc
112 : : * Relation descriptor of the table
113 : : *
114 : : * key
115 : : * Partition key information of the table
116 : : *
117 : : * keystate
118 : : * Execution state required for expressions in the partition key
119 : : *
120 : : * partdesc
121 : : * Partition descriptor of the table
122 : : *
123 : : * tupslot
124 : : * A standalone TupleTableSlot initialized with this table's tuple
125 : : * descriptor, or NULL if no tuple conversion between the parent is
126 : : * required.
127 : : *
128 : : * tupmap
129 : : * TupleConversionMap to convert from the parent's rowtype to this table's
130 : : * rowtype (when extracting the partition key of a tuple just before
131 : : * routing it through this table). A NULL value is stored if no tuple
132 : : * conversion is required.
133 : : *
134 : : * indexes
135 : : * Array of partdesc->nparts elements. For leaf partitions the index
136 : : * corresponds to the partition's ResultRelInfo in the encapsulating
137 : : * PartitionTupleRouting's partitions array. For partitioned partitions,
138 : : * the index corresponds to the PartitionDispatch for it in its
139 : : * partition_dispatch_info array. -1 indicates we've not yet allocated
140 : : * anything in PartitionTupleRouting for the partition.
141 : : *-----------------------
142 : : */
143 : : typedef struct PartitionDispatchData
144 : : {
145 : : Relation reldesc;
146 : : PartitionKey key;
147 : : List *keystate; /* list of ExprState */
148 : : PartitionDesc partdesc;
149 : : TupleTableSlot *tupslot;
150 : : AttrMap *tupmap;
151 : : int indexes[FLEXIBLE_ARRAY_MEMBER];
152 : : } PartitionDispatchData;
153 : :
154 : :
155 : : static ResultRelInfo *ExecInitPartitionInfo(ModifyTableState *mtstate,
156 : : EState *estate, PartitionTupleRouting *proute,
157 : : PartitionDispatch dispatch,
158 : : ResultRelInfo *rootResultRelInfo,
159 : : int partidx);
160 : : static void ExecInitRoutingInfo(ModifyTableState *mtstate,
161 : : EState *estate,
162 : : PartitionTupleRouting *proute,
163 : : PartitionDispatch dispatch,
164 : : ResultRelInfo *partRelInfo,
165 : : int partidx,
166 : : bool is_borrowed_rel);
167 : : static PartitionDispatch ExecInitPartitionDispatchInfo(EState *estate,
168 : : PartitionTupleRouting *proute,
169 : : Oid partoid, PartitionDispatch parent_pd,
170 : : int partidx, ResultRelInfo *rootResultRelInfo);
171 : : static void FormPartitionKeyDatum(PartitionDispatch pd,
172 : : TupleTableSlot *slot,
173 : : EState *estate,
174 : : Datum *values,
175 : : bool *isnull);
176 : : static int get_partition_for_tuple(PartitionDispatch pd, Datum *values,
177 : : bool *isnull);
178 : : static char *ExecBuildSlotPartitionKeyDescription(Relation rel,
179 : : Datum *values,
180 : : bool *isnull,
181 : : int maxfieldlen);
182 : : static List *adjust_partition_colnos(List *colnos, ResultRelInfo *leaf_part_rri);
183 : : static List *adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap);
184 : : static PartitionPruneState *CreatePartitionPruneState(PlanState *planstate,
185 : : PartitionPruneInfo *pruneinfo);
186 : : static void InitPartitionPruneContext(PartitionPruneContext *context,
187 : : List *pruning_steps,
188 : : PartitionDesc partdesc,
189 : : PartitionKey partkey,
190 : : PlanState *planstate,
191 : : ExprContext *econtext);
192 : : static void PartitionPruneFixSubPlanMap(PartitionPruneState *prunestate,
193 : : Bitmapset *initially_valid_subplans,
194 : : int n_total_subplans);
195 : : static void find_matching_subplans_recurse(PartitionPruningData *prunedata,
196 : : PartitionedRelPruningData *pprune,
197 : : bool initial_prune,
198 : : Bitmapset **validsubplans);
199 : :
200 : :
201 : : /*
202 : : * ExecSetupPartitionTupleRouting - sets up information needed during
203 : : * tuple routing for partitioned tables, encapsulates it in
204 : : * PartitionTupleRouting, and returns it.
205 : : *
206 : : * Callers must use the returned PartitionTupleRouting during calls to
207 : : * ExecFindPartition(). The actual ResultRelInfo for a partition is only
208 : : * allocated when the partition is found for the first time.
209 : : *
210 : : * The current memory context is used to allocate this struct and all
211 : : * subsidiary structs that will be allocated from it later on. Typically
212 : : * it should be estate->es_query_cxt.
213 : : */
214 : : PartitionTupleRouting *
1104 tgl@sss.pgh.pa.us 215 :CBC 3058 : ExecSetupPartitionTupleRouting(EState *estate, Relation rel)
216 : : {
217 : : PartitionTupleRouting *proute;
218 : :
219 : : /*
220 : : * Here we attempt to expend as little effort as possible in setting up
221 : : * the PartitionTupleRouting. Each partition's ResultRelInfo is built on
222 : : * demand, only when we actually need to route a tuple to that partition.
223 : : * The reason for this is that a common case is for INSERT to insert a
224 : : * single tuple into a partitioned table and this must be fast.
225 : : */
2292 rhaas@postgresql.org 226 : 3058 : proute = (PartitionTupleRouting *) palloc0(sizeof(PartitionTupleRouting));
1976 alvherre@alvh.no-ip. 227 : 3058 : proute->partition_root = rel;
228 : 3058 : proute->memcxt = CurrentMemoryContext;
229 : : /* Rest of members initialized by zeroing */
230 : :
231 : : /*
232 : : * Initialize this table's PartitionDispatch object. Here we pass in the
233 : : * parent as NULL as we don't need to care about any parent of the target
234 : : * partitioned table.
235 : : */
1865 rhaas@postgresql.org 236 : 3058 : ExecInitPartitionDispatchInfo(estate, proute, RelationGetRelid(rel),
237 : : NULL, 0, NULL);
238 : :
2292 239 : 3058 : return proute;
240 : : }
241 : :
242 : : /*
243 : : * ExecFindPartition -- Return the ResultRelInfo for the leaf partition that
244 : : * the tuple contained in *slot should belong to.
245 : : *
246 : : * If the partition's ResultRelInfo does not yet exist in 'proute' then we set
247 : : * one up or reuse one from mtstate's resultRelInfo array. When reusing a
248 : : * ResultRelInfo from the mtstate we verify that the relation is a valid
249 : : * target for INSERTs and initialize tuple routing information.
250 : : *
251 : : * rootResultRelInfo is the relation named in the query.
252 : : *
253 : : * estate must be non-NULL; we'll need it to compute any expressions in the
254 : : * partition keys. Also, its per-tuple contexts are used as evaluation
255 : : * scratch space.
256 : : *
257 : : * If no leaf partition is found, this routine errors out with the appropriate
258 : : * error message. An error may also be raised if the found target partition
259 : : * is not a valid target for an INSERT.
260 : : */
261 : : ResultRelInfo *
1976 alvherre@alvh.no-ip. 262 : 497812 : ExecFindPartition(ModifyTableState *mtstate,
263 : : ResultRelInfo *rootResultRelInfo,
264 : : PartitionTupleRouting *proute,
265 : : TupleTableSlot *slot, EState *estate)
266 : : {
267 : 497812 : PartitionDispatch *pd = proute->partition_dispatch_info;
268 : : Datum values[PARTITION_MAX_KEYS];
269 : : bool isnull[PARTITION_MAX_KEYS];
270 : : Relation rel;
271 : : PartitionDispatch dispatch;
272 : : PartitionDesc partdesc;
2342 rhaas@postgresql.org 273 [ + + ]: 497812 : ExprContext *ecxt = GetPerTupleExprContext(estate);
1314 alvherre@alvh.no-ip. 274 : 497812 : TupleTableSlot *ecxt_scantuple_saved = ecxt->ecxt_scantuple;
275 : 497812 : TupleTableSlot *rootslot = slot;
2083 276 : 497812 : TupleTableSlot *myslot = NULL;
277 : : MemoryContext oldcxt;
1314 278 : 497812 : ResultRelInfo *rri = NULL;
279 : :
280 : : /* use per-tuple context here to avoid leaking memory */
2083 281 [ + - ]: 497812 : oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
282 : :
283 : : /*
284 : : * First check the root table's partition constraint, if any. No point in
285 : : * routing the tuple if it doesn't belong in the root table itself.
286 : : */
1306 tgl@sss.pgh.pa.us 287 [ + + ]: 497812 : if (rootResultRelInfo->ri_RelationDesc->rd_rel->relispartition)
1976 alvherre@alvh.no-ip. 288 : 2248 : ExecPartitionCheck(rootResultRelInfo, slot, estate, true);
289 : :
290 : : /* start with the root partitioned table */
2083 291 : 497796 : dispatch = pd[0];
1314 292 [ + + ]: 1051770 : while (dispatch != NULL)
293 : : {
1976 294 : 554061 : int partidx = -1;
295 : : bool is_leaf;
296 : :
297 [ - + ]: 554061 : CHECK_FOR_INTERRUPTS();
298 : :
2083 299 : 554061 : rel = dispatch->reldesc;
1976 300 : 554061 : partdesc = dispatch->partdesc;
301 : :
302 : : /*
303 : : * Extract partition key from tuple. Expression evaluation machinery
304 : : * that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
305 : : * point to the correct tuple slot. The slot might have changed from
306 : : * what was used for the parent table if the table of the current
307 : : * partitioning level has different tuple descriptor from the parent.
308 : : * So update ecxt_scantuple accordingly.
309 : : */
2342 rhaas@postgresql.org 310 : 554061 : ecxt->ecxt_scantuple = slot;
2083 alvherre@alvh.no-ip. 311 : 554061 : FormPartitionKeyDatum(dispatch, slot, estate, values, isnull);
312 : :
313 : : /*
314 : : * If this partitioned table has no partitions or no partition for
315 : : * these values, error out.
316 : : */
1976 317 [ + + + + ]: 1108101 : if (partdesc->nparts == 0 ||
738 318 : 554040 : (partidx = get_partition_for_tuple(dispatch, values, isnull)) < 0)
319 : : {
320 : : char *val_desc;
321 : :
1976 322 : 77 : val_desc = ExecBuildSlotPartitionKeyDescription(rel,
323 : : values, isnull, 64);
324 [ - + ]: 77 : Assert(OidIsValid(RelationGetRelid(rel)));
325 [ + - + + ]: 77 : ereport(ERROR,
326 : : (errcode(ERRCODE_CHECK_VIOLATION),
327 : : errmsg("no partition of relation \"%s\" found for row",
328 : : RelationGetRelationName(rel)),
329 : : val_desc ?
330 : : errdetail("Partition key of the failing row contains %s.",
331 : : val_desc) : 0,
332 : : errtable(rel)));
333 : : }
334 : :
1273 heikki.linnakangas@i 335 : 553984 : is_leaf = partdesc->is_leaf[partidx];
336 [ + + ]: 553984 : if (is_leaf)
337 : : {
338 : : /*
339 : : * We've reached the leaf -- hurray, we're done. Look to see if
340 : : * we've already got a ResultRelInfo for this partition.
341 : : */
1976 alvherre@alvh.no-ip. 342 [ + + ]: 497718 : if (likely(dispatch->indexes[partidx] >= 0))
343 : : {
344 : : /* ResultRelInfo already built */
345 [ - + ]: 493782 : Assert(dispatch->indexes[partidx] < proute->num_partitions);
346 : 493782 : rri = proute->partitions[dispatch->indexes[partidx]];
347 : : }
348 : : else
349 : : {
350 : : /*
351 : : * If the partition is known in the owning ModifyTableState
352 : : * node, we can re-use that ResultRelInfo instead of creating
353 : : * a new one with ExecInitPartitionInfo().
354 : : */
1104 tgl@sss.pgh.pa.us 355 : 3936 : rri = ExecLookupResultRelByOid(mtstate,
356 : 3936 : partdesc->oids[partidx],
357 : : true, false);
358 [ + + ]: 3936 : if (rri)
359 : : {
360 : : /* Verify this ResultRelInfo allows INSERTs */
45 dean.a.rasheed@gmail 361 :GNC 241 : CheckValidResultRel(rri, CMD_INSERT, NIL);
362 : :
363 : : /*
364 : : * Initialize information needed to insert this and
365 : : * subsequent tuples routed to this partition.
366 : : */
1104 tgl@sss.pgh.pa.us 367 :CBC 241 : ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
368 : : rri, partidx, true);
369 : : }
370 : : else
371 : : {
372 : : /* We need to create a new one. */
1976 alvherre@alvh.no-ip. 373 : 3695 : rri = ExecInitPartitionInfo(mtstate, estate, proute,
374 : : dispatch,
375 : : rootResultRelInfo, partidx);
376 : : }
377 : : }
1314 378 [ - + ]: 497709 : Assert(rri != NULL);
379 : :
380 : : /* Signal to terminate the loop */
381 : 497709 : dispatch = NULL;
382 : : }
383 : : else
384 : : {
385 : : /*
386 : : * Partition is a sub-partitioned table; get the PartitionDispatch
387 : : */
1976 388 [ + + ]: 56266 : if (likely(dispatch->indexes[partidx] >= 0))
389 : : {
390 : : /* Already built. */
391 [ - + ]: 55666 : Assert(dispatch->indexes[partidx] < proute->num_dispatch);
392 : :
1314 393 : 55666 : rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
394 : :
395 : : /*
396 : : * Move down to the next partition level and search again
397 : : * until we find a leaf partition that matches this tuple
398 : : */
1976 399 : 55666 : dispatch = pd[dispatch->indexes[partidx]];
400 : : }
401 : : else
402 : : {
403 : : /* Not yet built. Do that now. */
404 : : PartitionDispatch subdispatch;
405 : :
406 : : /*
407 : : * Create the new PartitionDispatch. We pass the current one
408 : : * in as the parent PartitionDispatch
409 : : */
1161 heikki.linnakangas@i 410 : 600 : subdispatch = ExecInitPartitionDispatchInfo(estate,
411 : : proute,
1976 alvherre@alvh.no-ip. 412 : 600 : partdesc->oids[partidx],
413 : : dispatch, partidx,
414 : : mtstate->rootResultRelInfo);
415 [ + - - + ]: 600 : Assert(dispatch->indexes[partidx] >= 0 &&
416 : : dispatch->indexes[partidx] < proute->num_dispatch);
417 : :
1314 418 : 600 : rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
1976 419 : 600 : dispatch = subdispatch;
420 : : }
421 : :
422 : : /*
423 : : * Convert the tuple to the new parent's layout, if different from
424 : : * the previous parent.
425 : : */
1314 426 [ + + ]: 56266 : if (dispatch->tupslot)
427 : : {
428 : 30846 : AttrMap *map = dispatch->tupmap;
429 : 30846 : TupleTableSlot *tempslot = myslot;
430 : :
431 : 30846 : myslot = dispatch->tupslot;
432 : 30846 : slot = execute_attr_map_slot(map, slot, myslot);
433 : :
434 [ + + ]: 30846 : if (tempslot != NULL)
435 : 147 : ExecClearTuple(tempslot);
436 : : }
437 : : }
438 : :
439 : : /*
440 : : * If this partition is the default one, we must check its partition
441 : : * constraint now, which may have changed concurrently due to
442 : : * partitions being added to the parent.
443 : : *
444 : : * (We do this here, and do not rely on ExecInsert doing it, because
445 : : * we don't want to miss doing it for non-leaf partitions.)
446 : : */
447 [ + + ]: 553975 : if (partidx == partdesc->boundinfo->default_index)
448 : : {
449 : : /*
450 : : * The tuple must match the partition's layout for the constraint
451 : : * expression to be evaluated successfully. If the partition is
452 : : * sub-partitioned, that would already be the case due to the code
453 : : * above, but for a leaf partition the tuple still matches the
454 : : * parent's layout.
455 : : *
456 : : * Note that we have a map to convert from root to current
457 : : * partition, but not from immediate parent to current partition.
458 : : * So if we have to convert, do it from the root slot; if not, use
459 : : * the root slot as-is.
460 : : */
1273 heikki.linnakangas@i 461 [ + + ]: 408 : if (is_leaf)
462 : : {
499 alvherre@alvh.no-ip. 463 : 386 : TupleConversionMap *map = ExecGetRootToChildMap(rri, estate);
464 : :
1314 465 [ + + ]: 386 : if (map)
466 : 81 : slot = execute_attr_map_slot(map->attrMap, rootslot,
467 : : rri->ri_PartitionTupleSlot);
468 : : else
469 : 305 : slot = rootslot;
470 : : }
471 : :
472 : 408 : ExecPartitionCheck(rri, slot, estate, true);
473 : : }
474 : : }
475 : :
476 : : /* Release the tuple in the lowest parent's dedicated slot. */
477 [ + + ]: 497709 : if (myslot != NULL)
478 : 30680 : ExecClearTuple(myslot);
479 : : /* and restore ecxt's scantuple */
480 : 497709 : ecxt->ecxt_scantuple = ecxt_scantuple_saved;
481 : 497709 : MemoryContextSwitchTo(oldcxt);
482 : :
483 : 497709 : return rri;
484 : : }
485 : :
486 : : /*
487 : : * ExecInitPartitionInfo
488 : : * Lock the partition and initialize ResultRelInfo. Also setup other
489 : : * information for the partition and store it in the next empty slot in
490 : : * the proute->partitions array.
491 : : *
492 : : * Returns the ResultRelInfo
493 : : */
494 : : static ResultRelInfo *
1976 495 : 3695 : ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
496 : : PartitionTupleRouting *proute,
497 : : PartitionDispatch dispatch,
498 : : ResultRelInfo *rootResultRelInfo,
499 : : int partidx)
500 : : {
2200 rhaas@postgresql.org 501 : 3695 : ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
1116 alvherre@alvh.no-ip. 502 : 3695 : Oid partOid = dispatch->partdesc->oids[partidx];
503 : : Relation partrel;
1161 heikki.linnakangas@i 504 : 3695 : int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
2190 alvherre@alvh.no-ip. 505 : 3695 : Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
506 : : ResultRelInfo *leaf_part_rri;
507 : : MemoryContext oldcxt;
1579 michael@paquier.xyz 508 : 3695 : AttrMap *part_attmap = NULL;
509 : : bool found_whole_row;
510 : :
1976 alvherre@alvh.no-ip. 511 : 3695 : oldcxt = MemoryContextSwitchTo(proute->memcxt);
512 : :
1116 513 : 3695 : partrel = table_open(partOid, RowExclusiveLock);
514 : :
2211 515 : 3695 : leaf_part_rri = makeNode(ResultRelInfo);
2243 rhaas@postgresql.org 516 : 3695 : InitResultRelInfo(leaf_part_rri,
517 : : partrel,
518 : : 0,
519 : : rootResultRelInfo,
520 : : estate->es_instrument);
521 : :
522 : : /*
523 : : * Verify result relation is a valid target for an INSERT. An UPDATE of a
524 : : * partition-key becomes a DELETE+INSERT operation, so this check is still
525 : : * required when the operation is CMD_UPDATE.
526 : : */
45 dean.a.rasheed@gmail 527 :GNC 3695 : CheckValidResultRel(leaf_part_rri, CMD_INSERT, NIL);
528 : :
529 : : /*
530 : : * Open partition indices. The user may have asked to check for conflicts
531 : : * within this leaf partition and do "nothing" instead of throwing an
532 : : * error. Be prepared in that case by initializing the index information
533 : : * needed by ExecInsert() to perform speculative insertions.
534 : : */
2243 rhaas@postgresql.org 535 [ + + ]:CBC 3692 : if (partrel->rd_rel->relhasindex &&
536 [ + - ]: 1158 : leaf_part_rri->ri_IndexRelationDescs == NULL)
537 : 1158 : ExecOpenIndices(leaf_part_rri,
2218 alvherre@alvh.no-ip. 538 [ + + ]: 2231 : (node != NULL &&
539 [ + + ]: 1073 : node->onConflictAction != ONCONFLICT_NONE));
540 : :
541 : : /*
542 : : * Build WITH CHECK OPTION constraints for the partition. Note that we
543 : : * didn't build the withCheckOptionList for partitions within the planner,
544 : : * but simple translation of varattnos will suffice. This only occurs for
545 : : * the INSERT case or in the case of UPDATE/MERGE tuple routing where we
546 : : * didn't find a result rel to reuse.
547 : : */
2243 rhaas@postgresql.org 548 [ + + + + ]: 3692 : if (node && node->withCheckOptionLists != NIL)
549 : : {
550 : : List *wcoList;
551 : 48 : List *wcoExprs = NIL;
552 : : ListCell *ll;
553 : :
554 : : /*
555 : : * In the case of INSERT on a partitioned table, there is only one
556 : : * plan. Likewise, there is only one WCO list, not one per partition.
557 : : * For UPDATE/MERGE, there are as many WCO lists as there are plans.
558 : : */
559 [ + + + - : 48 : Assert((node->operation == CMD_INSERT &&
- + + + -
+ + - -
+ ]
560 : : list_length(node->withCheckOptionLists) == 1 &&
561 : : list_length(node->resultRelations) == 1) ||
562 : : (node->operation == CMD_UPDATE &&
563 : : list_length(node->withCheckOptionLists) ==
564 : : list_length(node->resultRelations)) ||
565 : : (node->operation == CMD_MERGE &&
566 : : list_length(node->withCheckOptionLists) ==
567 : : list_length(node->resultRelations)));
568 : :
569 : : /*
570 : : * Use the WCO list of the first plan as a reference to calculate
571 : : * attno's for the WCO list of this partition. In the INSERT case,
572 : : * that refers to the root partitioned table, whereas in the UPDATE
573 : : * tuple routing case, that refers to the first partition in the
574 : : * mtstate->resultRelInfo array. In any case, both that relation and
575 : : * this partition should have the same columns, so we should be able
576 : : * to map attributes successfully.
577 : : */
578 : 48 : wcoList = linitial(node->withCheckOptionLists);
579 : :
580 : : /*
581 : : * Convert Vars in it to contain this partition's attribute numbers.
582 : : */
583 : : part_attmap =
1579 michael@paquier.xyz 584 : 48 : build_attrmap_by_name(RelationGetDescr(partrel),
585 : : RelationGetDescr(firstResultRel),
586 : : false);
587 : : wcoList = (List *)
2187 alvherre@alvh.no-ip. 588 : 48 : map_variable_attnos((Node *) wcoList,
589 : : firstVarno, 0,
590 : : part_attmap,
591 : 48 : RelationGetForm(partrel)->reltype,
592 : : &found_whole_row);
593 : : /* We ignore the value of found_whole_row. */
594 : :
2243 rhaas@postgresql.org 595 [ + - + + : 135 : foreach(ll, wcoList)
+ + ]
596 : : {
1000 peter@eisentraut.org 597 : 87 : WithCheckOption *wco = lfirst_node(WithCheckOption, ll);
2243 rhaas@postgresql.org 598 : 87 : ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual),
599 : : &mtstate->ps);
600 : :
601 : 87 : wcoExprs = lappend(wcoExprs, wcoExpr);
602 : : }
603 : :
604 : 48 : leaf_part_rri->ri_WithCheckOptions = wcoList;
605 : 48 : leaf_part_rri->ri_WithCheckOptionExprs = wcoExprs;
606 : : }
607 : :
608 : : /*
609 : : * Build the RETURNING projection for the partition. Note that we didn't
610 : : * build the returningList for partitions within the planner, but simple
611 : : * translation of varattnos will suffice. This only occurs for the INSERT
612 : : * case or in the case of UPDATE/MERGE tuple routing where we didn't find
613 : : * a result rel to reuse.
614 : : */
615 [ + + + + ]: 3692 : if (node && node->returningLists != NIL)
616 : : {
617 : : TupleTableSlot *slot;
618 : : ExprContext *econtext;
619 : : List *returningList;
620 : :
621 : : /* See the comment above for WCO lists. */
622 [ + + + - : 76 : Assert((node->operation == CMD_INSERT &&
- + + + -
+ + - -
+ ]
623 : : list_length(node->returningLists) == 1 &&
624 : : list_length(node->resultRelations) == 1) ||
625 : : (node->operation == CMD_UPDATE &&
626 : : list_length(node->returningLists) ==
627 : : list_length(node->resultRelations)) ||
628 : : (node->operation == CMD_MERGE &&
629 : : list_length(node->returningLists) ==
630 : : list_length(node->resultRelations)));
631 : :
632 : : /*
633 : : * Use the RETURNING list of the first plan as a reference to
634 : : * calculate attno's for the RETURNING list of this partition. See
635 : : * the comment above for WCO lists for more details on why this is
636 : : * okay.
637 : : */
638 : 76 : returningList = linitial(node->returningLists);
639 : :
640 : : /*
641 : : * Convert Vars in it to contain this partition's attribute numbers.
642 : : */
1579 michael@paquier.xyz 643 [ + - ]: 76 : if (part_attmap == NULL)
644 : : part_attmap =
645 : 76 : build_attrmap_by_name(RelationGetDescr(partrel),
646 : : RelationGetDescr(firstResultRel),
647 : : false);
648 : : returningList = (List *)
2187 alvherre@alvh.no-ip. 649 : 76 : map_variable_attnos((Node *) returningList,
650 : : firstVarno, 0,
651 : : part_attmap,
652 : 76 : RelationGetForm(partrel)->reltype,
653 : : &found_whole_row);
654 : : /* We ignore the value of found_whole_row. */
655 : :
2200 rhaas@postgresql.org 656 : 76 : leaf_part_rri->ri_returningList = returningList;
657 : :
658 : : /*
659 : : * Initialize the projection itself.
660 : : *
661 : : * Use the slot and the expression context that would have been set up
662 : : * in ExecInitModifyTable() for projection's output.
663 : : */
2243 664 [ - + ]: 76 : Assert(mtstate->ps.ps_ResultTupleSlot != NULL);
665 : 76 : slot = mtstate->ps.ps_ResultTupleSlot;
666 [ - + ]: 76 : Assert(mtstate->ps.ps_ExprContext != NULL);
667 : 76 : econtext = mtstate->ps.ps_ExprContext;
668 : 76 : leaf_part_rri->ri_projectReturning =
669 : 76 : ExecBuildProjectionInfo(returningList, econtext, slot,
670 : : &mtstate->ps, RelationGetDescr(partrel));
671 : : }
672 : :
673 : : /* Set up information needed for routing tuples to the partition. */
1976 alvherre@alvh.no-ip. 674 : 3692 : ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
675 : : leaf_part_rri, partidx, false);
676 : :
677 : : /*
678 : : * If there is an ON CONFLICT clause, initialize state for it.
679 : : */
2211 680 [ + + + + ]: 3692 : if (node && node->onConflictAction != ONCONFLICT_NONE)
681 : : {
682 : 111 : TupleDesc partrelDesc = RelationGetDescr(partrel);
683 : 111 : ExprContext *econtext = mtstate->ps.ps_ExprContext;
684 : : ListCell *lc;
685 : 111 : List *arbiterIndexes = NIL;
686 : :
687 : : /*
688 : : * If there is a list of arbiter indexes, map it to a list of indexes
689 : : * in the partition. We do that by scanning the partition's index
690 : : * list and searching for ancestry relationships to each index in the
691 : : * ancestor table.
692 : : */
606 tgl@sss.pgh.pa.us 693 [ + + ]: 111 : if (rootResultRelInfo->ri_onConflictArbiterIndexes != NIL)
694 : : {
695 : : List *childIdxs;
696 : :
2211 alvherre@alvh.no-ip. 697 : 86 : childIdxs = RelationGetIndexList(leaf_part_rri->ri_RelationDesc);
698 : :
699 [ + - + + : 178 : foreach(lc, childIdxs)
+ + ]
700 : : {
701 : 92 : Oid childIdx = lfirst_oid(lc);
702 : : List *ancestors;
703 : : ListCell *lc2;
704 : :
705 : 92 : ancestors = get_partition_ancestors(childIdx);
1976 706 [ + - + + : 184 : foreach(lc2, rootResultRelInfo->ri_onConflictArbiterIndexes)
+ + ]
707 : : {
2211 708 [ + + ]: 92 : if (list_member_oid(ancestors, lfirst_oid(lc2)))
709 : 86 : arbiterIndexes = lappend_oid(arbiterIndexes, childIdx);
710 : : }
711 : 92 : list_free(ancestors);
712 : : }
713 : : }
714 : :
715 : : /*
716 : : * If the resulting lists are of inequal length, something is wrong.
717 : : * (This shouldn't happen, since arbiter index selection should not
718 : : * pick up an invalid index.)
719 : : */
1976 720 [ - + ]: 222 : if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) !=
2211 721 : 111 : list_length(arbiterIndexes))
2211 alvherre@alvh.no-ip. 722 [ # # ]:UBC 0 : elog(ERROR, "invalid arbiter index list");
2211 alvherre@alvh.no-ip. 723 :CBC 111 : leaf_part_rri->ri_onConflictArbiterIndexes = arbiterIndexes;
724 : :
725 : : /*
726 : : * In the DO UPDATE case, we have some more state to initialize.
727 : : */
728 [ + + ]: 111 : if (node->onConflictAction == ONCONFLICT_UPDATE)
729 : : {
1070 tgl@sss.pgh.pa.us 730 : 83 : OnConflictSetState *onconfl = makeNode(OnConflictSetState);
731 : : TupleConversionMap *map;
732 : :
499 alvherre@alvh.no-ip. 733 : 83 : map = ExecGetRootToChildMap(leaf_part_rri, estate);
734 : :
2211 735 [ - + ]: 83 : Assert(node->onConflictSet != NIL);
1976 736 [ - + ]: 83 : Assert(rootResultRelInfo->ri_onConflict != NULL);
737 : :
1070 tgl@sss.pgh.pa.us 738 : 83 : leaf_part_rri->ri_onConflict = onconfl;
739 : :
740 : : /*
741 : : * Need a separate existing slot for each partition, as the
742 : : * partition could be of a different AM, even if the tuple
743 : : * descriptors match.
744 : : */
745 : 83 : onconfl->oc_Existing =
1861 andres@anarazel.de 746 : 83 : table_slot_create(leaf_part_rri->ri_RelationDesc,
747 : 83 : &mtstate->ps.state->es_tupleTable);
748 : :
749 : : /*
750 : : * If the partition's tuple descriptor matches exactly the root
751 : : * parent (the common case), we can re-use most of the parent's ON
752 : : * CONFLICT SET state, skipping a bunch of work. Otherwise, we
753 : : * need to create state specific to this partition.
754 : : */
2211 alvherre@alvh.no-ip. 755 [ + + ]: 83 : if (map == NULL)
756 : : {
757 : : /*
758 : : * It's safe to reuse these from the partition root, as we
759 : : * only process one tuple at a time (therefore we won't
760 : : * overwrite needed data in slots), and the results of
761 : : * projections are independent of the underlying storage.
762 : : * Projections and where clauses themselves don't store state
763 : : * / are independent of the underlying storage.
764 : : */
1070 tgl@sss.pgh.pa.us 765 : 45 : onconfl->oc_ProjSlot =
1866 andres@anarazel.de 766 : 45 : rootResultRelInfo->ri_onConflict->oc_ProjSlot;
1070 tgl@sss.pgh.pa.us 767 : 45 : onconfl->oc_ProjInfo =
1866 andres@anarazel.de 768 : 45 : rootResultRelInfo->ri_onConflict->oc_ProjInfo;
1070 tgl@sss.pgh.pa.us 769 : 45 : onconfl->oc_WhereClause =
1866 andres@anarazel.de 770 : 45 : rootResultRelInfo->ri_onConflict->oc_WhereClause;
771 : : }
772 : : else
773 : : {
774 : : List *onconflset;
775 : : List *onconflcols;
776 : :
777 : : /*
778 : : * Translate expressions in onConflictSet to account for
779 : : * different attribute numbers. For that, map partition
780 : : * varattnos twice: first to catch the EXCLUDED
781 : : * pseudo-relation (INNER_VAR), and second to handle the main
782 : : * target relation (firstVarno).
783 : : */
1070 tgl@sss.pgh.pa.us 784 : 38 : onconflset = copyObject(node->onConflictSet);
1579 michael@paquier.xyz 785 [ + + ]: 38 : if (part_attmap == NULL)
786 : : part_attmap =
787 : 35 : build_attrmap_by_name(RelationGetDescr(partrel),
788 : : RelationGetDescr(firstResultRel),
789 : : false);
790 : : onconflset = (List *)
2187 alvherre@alvh.no-ip. 791 : 38 : map_variable_attnos((Node *) onconflset,
792 : : INNER_VAR, 0,
793 : : part_attmap,
794 : 38 : RelationGetForm(partrel)->reltype,
795 : : &found_whole_row);
796 : : /* We ignore the value of found_whole_row. */
797 : : onconflset = (List *)
798 : 38 : map_variable_attnos((Node *) onconflset,
799 : : firstVarno, 0,
800 : : part_attmap,
801 : 38 : RelationGetForm(partrel)->reltype,
802 : : &found_whole_row);
803 : : /* We ignore the value of found_whole_row. */
804 : :
805 : : /* Finally, adjust the target colnos to match the partition. */
1070 tgl@sss.pgh.pa.us 806 : 38 : onconflcols = adjust_partition_colnos(node->onConflictCols,
807 : : leaf_part_rri);
808 : :
809 : : /* create the tuple slot for the UPDATE SET projection */
810 : 38 : onconfl->oc_ProjSlot =
811 : 38 : table_slot_create(partrel,
812 : 38 : &mtstate->ps.state->es_tupleTable);
813 : :
814 : : /* build UPDATE SET projection state */
815 : 38 : onconfl->oc_ProjInfo =
816 : 38 : ExecBuildUpdateProjection(onconflset,
817 : : true,
818 : : onconflcols,
819 : : partrelDesc,
820 : : econtext,
821 : : onconfl->oc_ProjSlot,
822 : : &mtstate->ps);
823 : :
824 : : /*
825 : : * If there is a WHERE clause, initialize state where it will
826 : : * be evaluated, mapping the attribute numbers appropriately.
827 : : * As with onConflictSet, we need to map partition varattnos
828 : : * to the partition's tupdesc.
829 : : */
2211 alvherre@alvh.no-ip. 830 [ + + ]: 38 : if (node->onConflictWhere)
831 : : {
832 : : List *clause;
833 : :
834 : 15 : clause = copyObject((List *) node->onConflictWhere);
835 : : clause = (List *)
2187 836 : 15 : map_variable_attnos((Node *) clause,
837 : : INNER_VAR, 0,
838 : : part_attmap,
839 : 15 : RelationGetForm(partrel)->reltype,
840 : : &found_whole_row);
841 : : /* We ignore the value of found_whole_row. */
842 : : clause = (List *)
843 : 15 : map_variable_attnos((Node *) clause,
844 : : firstVarno, 0,
845 : : part_attmap,
846 : 15 : RelationGetForm(partrel)->reltype,
847 : : &found_whole_row);
848 : : /* We ignore the value of found_whole_row. */
1070 tgl@sss.pgh.pa.us 849 : 15 : onconfl->oc_WhereClause =
2211 alvherre@alvh.no-ip. 850 : 15 : ExecInitQual((List *) clause, &mtstate->ps);
851 : : }
852 : : }
853 : : }
854 : : }
855 : :
856 : : /*
857 : : * Since we've just initialized this ResultRelInfo, it's not in any list
858 : : * attached to the estate as yet. Add it, so that it can be found later.
859 : : *
860 : : * Note that the entries in this list appear in no predetermined order,
861 : : * because partition result rels are initialized as and when they're
862 : : * needed.
863 : : */
1976 864 : 3692 : MemoryContextSwitchTo(estate->es_query_cxt);
865 : 3692 : estate->es_tuple_routing_result_relations =
866 : 3692 : lappend(estate->es_tuple_routing_result_relations,
867 : : leaf_part_rri);
868 : :
869 : : /*
870 : : * Initialize information about this partition that's needed to handle
871 : : * MERGE. We take the "first" result relation's mergeActionList as
872 : : * reference and make copy for this relation, converting stuff that
873 : : * references attribute numbers to match this relation's.
874 : : *
875 : : * This duplicates much of the logic in ExecInitMerge(), so something
876 : : * changes there, look here too.
877 : : */
748 878 [ + + + + ]: 3692 : if (node && node->operation == CMD_MERGE)
879 : : {
880 : 9 : List *firstMergeActionList = linitial(node->mergeActionLists);
881 : : ListCell *lc;
882 : 9 : ExprContext *econtext = mtstate->ps.ps_ExprContext;
883 : : Node *joinCondition;
884 : :
885 [ + + ]: 9 : if (part_attmap == NULL)
886 : : part_attmap =
887 : 3 : build_attrmap_by_name(RelationGetDescr(partrel),
888 : : RelationGetDescr(firstResultRel),
889 : : false);
890 : :
891 [ + - ]: 9 : if (unlikely(!leaf_part_rri->ri_projectNewInfoValid))
892 : 9 : ExecInitMergeTupleSlots(mtstate, leaf_part_rri);
893 : :
894 : : /* Initialize state for join condition checking. */
895 : : joinCondition =
15 dean.a.rasheed@gmail 896 :GNC 9 : map_variable_attnos(linitial(node->mergeJoinConditions),
897 : : firstVarno, 0,
898 : : part_attmap,
899 : 9 : RelationGetForm(partrel)->reltype,
900 : : &found_whole_row);
901 : : /* We ignore the value of found_whole_row. */
902 : 9 : leaf_part_rri->ri_MergeJoinCondition =
903 : 9 : ExecInitQual((List *) joinCondition, &mtstate->ps);
904 : :
748 alvherre@alvh.no-ip. 905 [ + - + + :CBC 21 : foreach(lc, firstMergeActionList)
+ + ]
906 : : {
907 : : /* Make a copy for this relation to be safe. */
908 : 12 : MergeAction *action = copyObject(lfirst(lc));
909 : : MergeActionState *action_state;
910 : :
911 : : /* Generate the action's state for this relation */
912 : 12 : action_state = makeNode(MergeActionState);
913 : 12 : action_state->mas_action = action;
914 : :
915 : : /* And put the action in the appropriate list */
15 dean.a.rasheed@gmail 916 :GNC 24 : leaf_part_rri->ri_MergeActions[action->matchKind] =
917 : 12 : lappend(leaf_part_rri->ri_MergeActions[action->matchKind],
918 : : action_state);
919 : :
748 alvherre@alvh.no-ip. 920 [ + + - - ]:CBC 12 : switch (action->commandType)
921 : : {
922 : 3 : case CMD_INSERT:
923 : :
924 : : /*
925 : : * ExecCheckPlanOutput() already done on the targetlist
926 : : * when "first" result relation initialized and it is same
927 : : * for all result relations.
928 : : */
929 : 3 : action_state->mas_proj =
930 : 3 : ExecBuildProjectionInfo(action->targetList, econtext,
931 : : leaf_part_rri->ri_newTupleSlot,
932 : : &mtstate->ps,
933 : : RelationGetDescr(partrel));
934 : 3 : break;
935 : 9 : case CMD_UPDATE:
936 : :
937 : : /*
938 : : * Convert updateColnos from "first" result relation
939 : : * attribute numbers to this result rel's.
940 : : */
941 [ + - ]: 9 : if (part_attmap)
942 : 9 : action->updateColnos =
943 : 9 : adjust_partition_colnos_using_map(action->updateColnos,
944 : : part_attmap);
945 : 9 : action_state->mas_proj =
946 : 9 : ExecBuildUpdateProjection(action->targetList,
947 : : true,
948 : : action->updateColnos,
949 : 9 : RelationGetDescr(leaf_part_rri->ri_RelationDesc),
950 : : econtext,
951 : : leaf_part_rri->ri_newTupleSlot,
952 : : NULL);
953 : 9 : break;
748 alvherre@alvh.no-ip. 954 :LBC (6) : case CMD_DELETE:
955 : (6) : break;
956 : :
748 alvherre@alvh.no-ip. 957 :UBC 0 : default:
958 [ # # ]: 0 : elog(ERROR, "unknown action in MERGE WHEN clause");
959 : : }
960 : :
961 : : /* found_whole_row intentionally ignored. */
748 alvherre@alvh.no-ip. 962 :CBC 12 : action->qual =
963 : 12 : map_variable_attnos(action->qual,
964 : : firstVarno, 0,
965 : : part_attmap,
966 : 12 : RelationGetForm(partrel)->reltype,
967 : : &found_whole_row);
968 : 12 : action_state->mas_whenqual =
969 : 12 : ExecInitQual((List *) action->qual, &mtstate->ps);
970 : : }
971 : : }
1976 972 : 3692 : MemoryContextSwitchTo(oldcxt);
973 : :
2243 rhaas@postgresql.org 974 : 3692 : return leaf_part_rri;
975 : : }
976 : :
977 : : /*
978 : : * ExecInitRoutingInfo
979 : : * Set up information needed for translating tuples between root
980 : : * partitioned table format and partition format, and keep track of it
981 : : * in PartitionTupleRouting.
982 : : */
983 : : static void
2200 984 : 3933 : ExecInitRoutingInfo(ModifyTableState *mtstate,
985 : : EState *estate,
986 : : PartitionTupleRouting *proute,
987 : : PartitionDispatch dispatch,
988 : : ResultRelInfo *partRelInfo,
989 : : int partidx,
990 : : bool is_borrowed_rel)
991 : : {
992 : : MemoryContext oldcxt;
993 : : int rri_index;
994 : :
1976 alvherre@alvh.no-ip. 995 : 3933 : oldcxt = MemoryContextSwitchTo(proute->memcxt);
996 : :
997 : : /*
998 : : * Set up tuple conversion between root parent and the partition if the
999 : : * two have different rowtypes. If conversion is indeed required, also
1000 : : * initialize a slot dedicated to storing this partition's converted
1001 : : * tuples. Various operations that are applied to tuples after routing,
1002 : : * such as checking constraints, will refer to this slot.
1003 : : */
499 1004 [ + + ]: 3933 : if (ExecGetRootToChildMap(partRelInfo, estate) != NULL)
1005 : : {
2021 andres@anarazel.de 1006 : 629 : Relation partrel = partRelInfo->ri_RelationDesc;
1007 : :
1008 : : /*
1009 : : * This pins the partition's TupleDesc, which will be released at the
1010 : : * end of the command.
1011 : : */
1273 heikki.linnakangas@i 1012 : 629 : partRelInfo->ri_PartitionTupleSlot =
1861 andres@anarazel.de 1013 : 629 : table_slot_create(partrel, &estate->es_tupleTable);
1014 : : }
1015 : : else
1273 heikki.linnakangas@i 1016 : 3304 : partRelInfo->ri_PartitionTupleSlot = NULL;
1017 : :
1018 : : /*
1019 : : * If the partition is a foreign table, let the FDW init itself for
1020 : : * routing tuples to the partition.
1021 : : */
2200 rhaas@postgresql.org 1022 [ + + ]: 3933 : if (partRelInfo->ri_FdwRoutine != NULL &&
1023 [ + - ]: 42 : partRelInfo->ri_FdwRoutine->BeginForeignInsert != NULL)
1024 : 42 : partRelInfo->ri_FdwRoutine->BeginForeignInsert(mtstate, partRelInfo);
1025 : :
1026 : : /*
1027 : : * Determine if the FDW supports batch insert and determine the batch size
1028 : : * (a FDW may support batching, but it may be disabled for the
1029 : : * server/table or for this particular query).
1030 : : *
1031 : : * If the FDW does not support batching, we set the batch size to 1.
1032 : : */
481 efujita@postgresql.o 1033 [ + + ]: 3927 : if (partRelInfo->ri_FdwRoutine != NULL &&
1180 tomas.vondra@postgre 1034 [ + - ]: 36 : partRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize &&
1035 [ + - ]: 36 : partRelInfo->ri_FdwRoutine->ExecForeignBatchInsert)
1036 : 36 : partRelInfo->ri_BatchSize =
1037 : 36 : partRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize(partRelInfo);
1038 : : else
1039 : 3891 : partRelInfo->ri_BatchSize = 1;
1040 : :
1041 [ - + ]: 3927 : Assert(partRelInfo->ri_BatchSize >= 1);
1042 : :
1837 andres@anarazel.de 1043 : 3927 : partRelInfo->ri_CopyMultiInsertBuffer = NULL;
1044 : :
1045 : : /*
1046 : : * Keep track of it in the PartitionTupleRouting->partitions array.
1047 : : */
1976 alvherre@alvh.no-ip. 1048 [ - + ]: 3927 : Assert(dispatch->indexes[partidx] == -1);
1049 : :
1050 : 3927 : rri_index = proute->num_partitions++;
1051 : :
1052 : : /* Allocate or enlarge the array, as needed */
1053 [ + + ]: 3927 : if (proute->num_partitions >= proute->max_partitions)
1054 : : {
1055 [ + + ]: 2925 : if (proute->max_partitions == 0)
1056 : : {
1057 : 2919 : proute->max_partitions = 8;
1058 : 2919 : proute->partitions = (ResultRelInfo **)
1059 : 2919 : palloc(sizeof(ResultRelInfo *) * proute->max_partitions);
1104 tgl@sss.pgh.pa.us 1060 : 2919 : proute->is_borrowed_rel = (bool *)
1061 : 2919 : palloc(sizeof(bool) * proute->max_partitions);
1062 : : }
1063 : : else
1064 : : {
1976 alvherre@alvh.no-ip. 1065 : 6 : proute->max_partitions *= 2;
1066 : 6 : proute->partitions = (ResultRelInfo **)
1067 : 6 : repalloc(proute->partitions, sizeof(ResultRelInfo *) *
1068 : 6 : proute->max_partitions);
1104 tgl@sss.pgh.pa.us 1069 : 6 : proute->is_borrowed_rel = (bool *)
1070 : 6 : repalloc(proute->is_borrowed_rel, sizeof(bool) *
1071 : 6 : proute->max_partitions);
1072 : : }
1073 : : }
1074 : :
1976 alvherre@alvh.no-ip. 1075 : 3927 : proute->partitions[rri_index] = partRelInfo;
1104 tgl@sss.pgh.pa.us 1076 : 3927 : proute->is_borrowed_rel[rri_index] = is_borrowed_rel;
1976 alvherre@alvh.no-ip. 1077 : 3927 : dispatch->indexes[partidx] = rri_index;
1078 : :
1079 : 3927 : MemoryContextSwitchTo(oldcxt);
2200 rhaas@postgresql.org 1080 : 3927 : }
1081 : :
1082 : : /*
1083 : : * ExecInitPartitionDispatchInfo
1084 : : * Lock the partitioned table (if not locked already) and initialize
1085 : : * PartitionDispatch for a partitioned table and store it in the next
1086 : : * available slot in the proute->partition_dispatch_info array. Also,
1087 : : * record the index into this array in the parent_pd->indexes[] array in
1088 : : * the partidx element so that we can properly retrieve the newly created
1089 : : * PartitionDispatch later.
1090 : : */
1091 : : static PartitionDispatch
1865 1092 : 3658 : ExecInitPartitionDispatchInfo(EState *estate,
1093 : : PartitionTupleRouting *proute, Oid partoid,
1094 : : PartitionDispatch parent_pd, int partidx,
1095 : : ResultRelInfo *rootResultRelInfo)
1096 : : {
1097 : : Relation rel;
1098 : : PartitionDesc partdesc;
1099 : : PartitionDispatch pd;
1100 : : int dispatchidx;
1101 : : MemoryContext oldcxt;
1102 : :
1103 : : /*
1104 : : * For data modification, it is better that executor does not include
1105 : : * partitions being detached, except when running in snapshot-isolation
1106 : : * mode. This means that a read-committed transaction immediately gets a
1107 : : * "no partition for tuple" error when a tuple is inserted into a
1108 : : * partition that's being detached concurrently, but a transaction in
1109 : : * repeatable-read mode can still use such a partition.
1110 : : */
1111 [ + + ]: 3658 : if (estate->es_partition_directory == NULL)
1112 : 3052 : estate->es_partition_directory =
1116 alvherre@alvh.no-ip. 1113 : 3052 : CreatePartitionDirectory(estate->es_query_cxt,
1114 : : !IsolationUsesXactSnapshot());
1115 : :
1976 1116 : 3658 : oldcxt = MemoryContextSwitchTo(proute->memcxt);
1117 : :
1118 : : /*
1119 : : * Only sub-partitioned tables need to be locked here. The root
1120 : : * partitioned table will already have been locked as it's referenced in
1121 : : * the query's rtable.
1122 : : */
1123 [ + + ]: 3658 : if (partoid != RelationGetRelid(proute->partition_root))
1879 rhaas@postgresql.org 1124 : 600 : rel = table_open(partoid, RowExclusiveLock);
1125 : : else
1976 alvherre@alvh.no-ip. 1126 : 3058 : rel = proute->partition_root;
1865 rhaas@postgresql.org 1127 : 3658 : partdesc = PartitionDirectoryLookup(estate->es_partition_directory, rel);
1128 : :
1976 alvherre@alvh.no-ip. 1129 : 3658 : pd = (PartitionDispatch) palloc(offsetof(PartitionDispatchData, indexes) +
1130 : 3658 : partdesc->nparts * sizeof(int));
1131 : 3658 : pd->reldesc = rel;
1132 : 3658 : pd->key = RelationGetPartitionKey(rel);
1133 : 3658 : pd->keystate = NIL;
1134 : 3658 : pd->partdesc = partdesc;
1135 [ + + ]: 3658 : if (parent_pd != NULL)
1136 : : {
1137 : 600 : TupleDesc tupdesc = RelationGetDescr(rel);
1138 : :
1139 : : /*
1140 : : * For sub-partitioned tables where the column order differs from its
1141 : : * direct parent partitioned table, we must store a tuple table slot
1142 : : * initialized with its tuple descriptor and a tuple conversion map to
1143 : : * convert a tuple from its parent's rowtype to its own. This is to
1144 : : * make sure that we are looking at the correct row using the correct
1145 : : * tuple descriptor when computing its partition key for tuple
1146 : : * routing.
1147 : : */
1579 michael@paquier.xyz 1148 : 600 : pd->tupmap = build_attrmap_by_name_if_req(RelationGetDescr(parent_pd->reldesc),
1149 : : tupdesc,
1150 : : false);
1976 alvherre@alvh.no-ip. 1151 : 600 : pd->tupslot = pd->tupmap ?
1871 andres@anarazel.de 1152 [ + + ]: 600 : MakeSingleTupleTableSlot(tupdesc, &TTSOpsVirtual) : NULL;
1153 : : }
1154 : : else
1155 : : {
1156 : : /* Not required for the root partitioned table */
1976 alvherre@alvh.no-ip. 1157 : 3058 : pd->tupmap = NULL;
1158 : 3058 : pd->tupslot = NULL;
1159 : : }
1160 : :
1161 : : /*
1162 : : * Initialize with -1 to signify that the corresponding partition's
1163 : : * ResultRelInfo or PartitionDispatch has not been created yet.
1164 : : */
1165 : 3658 : memset(pd->indexes, -1, sizeof(int) * partdesc->nparts);
1166 : :
1167 : : /* Track in PartitionTupleRouting for later use */
1168 : 3658 : dispatchidx = proute->num_dispatch++;
1169 : :
1170 : : /* Allocate or enlarge the array, as needed */
1171 [ + + ]: 3658 : if (proute->num_dispatch >= proute->max_dispatch)
1172 : : {
1173 [ + - ]: 3058 : if (proute->max_dispatch == 0)
1174 : : {
1175 : 3058 : proute->max_dispatch = 4;
1176 : 3058 : proute->partition_dispatch_info = (PartitionDispatch *)
1177 : 3058 : palloc(sizeof(PartitionDispatch) * proute->max_dispatch);
1314 1178 : 3058 : proute->nonleaf_partitions = (ResultRelInfo **)
1179 : 3058 : palloc(sizeof(ResultRelInfo *) * proute->max_dispatch);
1180 : : }
1181 : : else
1182 : : {
1976 alvherre@alvh.no-ip. 1183 :UBC 0 : proute->max_dispatch *= 2;
1184 : 0 : proute->partition_dispatch_info = (PartitionDispatch *)
1185 : 0 : repalloc(proute->partition_dispatch_info,
1186 : 0 : sizeof(PartitionDispatch) * proute->max_dispatch);
1314 1187 : 0 : proute->nonleaf_partitions = (ResultRelInfo **)
1188 : 0 : repalloc(proute->nonleaf_partitions,
1189 : 0 : sizeof(ResultRelInfo *) * proute->max_dispatch);
1190 : : }
1191 : : }
1976 alvherre@alvh.no-ip. 1192 :CBC 3658 : proute->partition_dispatch_info[dispatchidx] = pd;
1193 : :
1194 : : /*
1195 : : * If setting up a PartitionDispatch for a sub-partitioned table, we may
1196 : : * also need a minimally valid ResultRelInfo for checking the partition
1197 : : * constraint later; set that up now.
1198 : : */
1314 1199 [ + + ]: 3658 : if (parent_pd)
1200 : : {
1201 : 600 : ResultRelInfo *rri = makeNode(ResultRelInfo);
1202 : :
1161 heikki.linnakangas@i 1203 : 600 : InitResultRelInfo(rri, rel, 0, rootResultRelInfo, 0);
1314 alvherre@alvh.no-ip. 1204 : 600 : proute->nonleaf_partitions[dispatchidx] = rri;
1205 : : }
1206 : : else
1207 : 3058 : proute->nonleaf_partitions[dispatchidx] = NULL;
1208 : :
1209 : : /*
1210 : : * Finally, if setting up a PartitionDispatch for a sub-partitioned table,
1211 : : * install a downlink in the parent to allow quick descent.
1212 : : */
1976 1213 [ + + ]: 3658 : if (parent_pd)
1214 : : {
1215 [ - + ]: 600 : Assert(parent_pd->indexes[partidx] == -1);
1216 : 600 : parent_pd->indexes[partidx] = dispatchidx;
1217 : : }
1218 : :
1219 : 3658 : MemoryContextSwitchTo(oldcxt);
1220 : :
1221 : 3658 : return pd;
1222 : : }
1223 : :
1224 : : /*
1225 : : * ExecCleanupTupleRouting -- Clean up objects allocated for partition tuple
1226 : : * routing.
1227 : : *
1228 : : * Close all the partitioned tables, leaf partitions, and their indices.
1229 : : */
1230 : : void
2200 rhaas@postgresql.org 1231 : 2697 : ExecCleanupTupleRouting(ModifyTableState *mtstate,
1232 : : PartitionTupleRouting *proute)
1233 : : {
1234 : : int i;
1235 : :
1236 : : /*
1237 : : * Remember, proute->partition_dispatch_info[0] corresponds to the root
1238 : : * partitioned table, which we must not try to close, because it is the
1239 : : * main target table of the query that will be closed by callers such as
1240 : : * ExecEndPlan() or DoCopy(). Also, tupslot is NULL for the root
1241 : : * partitioned table.
1242 : : */
2292 1243 [ + + ]: 3188 : for (i = 1; i < proute->num_dispatch; i++)
1244 : : {
1245 : 491 : PartitionDispatch pd = proute->partition_dispatch_info[i];
1246 : :
1910 andres@anarazel.de 1247 : 491 : table_close(pd->reldesc, NoLock);
1248 : :
1976 alvherre@alvh.no-ip. 1249 [ + + ]: 491 : if (pd->tupslot)
1250 : 224 : ExecDropSingleTupleTableSlot(pd->tupslot);
1251 : : }
1252 : :
2292 rhaas@postgresql.org 1253 [ + + ]: 6372 : for (i = 0; i < proute->num_partitions; i++)
1254 : : {
1255 : 3675 : ResultRelInfo *resultRelInfo = proute->partitions[i];
1256 : :
1257 : : /* Allow any FDWs to shut down */
1826 efujita@postgresql.o 1258 [ + + ]: 3675 : if (resultRelInfo->ri_FdwRoutine != NULL &&
1259 [ + - ]: 34 : resultRelInfo->ri_FdwRoutine->EndForeignInsert != NULL)
1260 : 34 : resultRelInfo->ri_FdwRoutine->EndForeignInsert(mtstate->ps.state,
1261 : : resultRelInfo);
1262 : :
1263 : : /*
1264 : : * Close it if it's not one of the result relations borrowed from the
1265 : : * owning ModifyTableState; those will be closed by ExecEndPlan().
1266 : : */
1104 tgl@sss.pgh.pa.us 1267 [ + + ]: 3675 : if (proute->is_borrowed_rel[i])
1268 : 217 : continue;
1269 : :
2292 rhaas@postgresql.org 1270 : 3458 : ExecCloseIndices(resultRelInfo);
1910 andres@anarazel.de 1271 : 3458 : table_close(resultRelInfo->ri_RelationDesc, NoLock);
1272 : : }
2342 rhaas@postgresql.org 1273 : 2697 : }
1274 : :
1275 : : /* ----------------
1276 : : * FormPartitionKeyDatum
1277 : : * Construct values[] and isnull[] arrays for the partition key
1278 : : * of a tuple.
1279 : : *
1280 : : * pd Partition dispatch object of the partitioned table
1281 : : * slot Heap tuple from which to extract partition key
1282 : : * estate executor state for evaluating any partition key
1283 : : * expressions (must be non-NULL)
1284 : : * values Array of partition key Datums (output area)
1285 : : * isnull Array of is-null indicators (output area)
1286 : : *
1287 : : * the ecxt_scantuple slot of estate's per-tuple expr context must point to
1288 : : * the heap tuple passed in.
1289 : : * ----------------
1290 : : */
1291 : : static void
1292 : 554061 : FormPartitionKeyDatum(PartitionDispatch pd,
1293 : : TupleTableSlot *slot,
1294 : : EState *estate,
1295 : : Datum *values,
1296 : : bool *isnull)
1297 : : {
1298 : : ListCell *partexpr_item;
1299 : : int i;
1300 : :
1301 [ + + + + ]: 554061 : if (pd->key->partexprs != NIL && pd->keystate == NIL)
1302 : : {
1303 : : /* Check caller has set up context correctly */
1304 [ + - + - : 267 : Assert(estate != NULL &&
- + ]
1305 : : GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1306 : :
1307 : : /* First time through, set up expression evaluation state */
1308 : 267 : pd->keystate = ExecPrepareExprList(pd->key->partexprs, estate);
1309 : : }
1310 : :
1311 : 554061 : partexpr_item = list_head(pd->keystate);
1312 [ + + ]: 1119654 : for (i = 0; i < pd->key->partnatts; i++)
1313 : : {
1314 : 565593 : AttrNumber keycol = pd->key->partattrs[i];
1315 : : Datum datum;
1316 : : bool isNull;
1317 : :
1318 [ + + ]: 565593 : if (keycol != 0)
1319 : : {
1320 : : /* Plain column; get the value directly from the heap tuple */
1321 : 521781 : datum = slot_getattr(slot, keycol, &isNull);
1322 : : }
1323 : : else
1324 : : {
1325 : : /* Expression; need to evaluate it */
1326 [ - + ]: 43812 : if (partexpr_item == NULL)
2342 rhaas@postgresql.org 1327 [ # # ]:UBC 0 : elog(ERROR, "wrong number of partition key expressions");
2342 rhaas@postgresql.org 1328 :CBC 43812 : datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item),
1329 [ + - ]: 43812 : GetPerTupleExprContext(estate),
1330 : : &isNull);
1735 tgl@sss.pgh.pa.us 1331 : 43812 : partexpr_item = lnext(pd->keystate, partexpr_item);
1332 : : }
2342 rhaas@postgresql.org 1333 : 565593 : values[i] = datum;
1334 : 565593 : isnull[i] = isNull;
1335 : : }
1336 : :
1337 [ - + ]: 554061 : if (partexpr_item != NULL)
2342 rhaas@postgresql.org 1338 [ # # ]:UBC 0 : elog(ERROR, "wrong number of partition key expressions");
2342 rhaas@postgresql.org 1339 :CBC 554061 : }
1340 : :
1341 : : /*
1342 : : * The number of times the same partition must be found in a row before we
1343 : : * switch from a binary search for the given values to just checking if the
1344 : : * values belong to the last found partition. This must be above 0.
1345 : : */
1346 : : #define PARTITION_CACHED_FIND_THRESHOLD 16
1347 : :
1348 : : /*
1349 : : * get_partition_for_tuple
1350 : : * Finds partition of relation which accepts the partition key specified
1351 : : * in values and isnull.
1352 : : *
1353 : : * Calling this function can be quite expensive when LIST and RANGE
1354 : : * partitioned tables have many partitions. This is due to the binary search
1355 : : * that's done to find the correct partition. Many of the use cases for LIST
1356 : : * and RANGE partitioned tables make it likely that the same partition is
1357 : : * found in subsequent ExecFindPartition() calls. This is especially true for
1358 : : * cases such as RANGE partitioned tables on a TIMESTAMP column where the
1359 : : * partition key is the current time. When asked to find a partition for a
1360 : : * RANGE or LIST partitioned table, we record the partition index and datum
1361 : : * offset we've found for the given 'values' in the PartitionDesc (which is
1362 : : * stored in relcache), and if we keep finding the same partition
1363 : : * PARTITION_CACHED_FIND_THRESHOLD times in a row, then we'll enable caching
1364 : : * logic and instead of performing a binary search to find the correct
1365 : : * partition, we'll just double-check that 'values' still belong to the last
1366 : : * found partition, and if so, we'll return that partition index, thus
1367 : : * skipping the need for the binary search. If we fail to match the last
1368 : : * partition when double checking, then we fall back on doing a binary search.
1369 : : * In this case, unless we find 'values' belong to the DEFAULT partition,
1370 : : * we'll reset the number of times we've hit the same partition so that we
1371 : : * don't attempt to use the cache again until we've found that partition at
1372 : : * least PARTITION_CACHED_FIND_THRESHOLD times in a row.
1373 : : *
1374 : : * For cases where the partition changes on each lookup, the amount of
1375 : : * additional work required just amounts to recording the last found partition
1376 : : * and bound offset then resetting the found counter. This is cheap and does
1377 : : * not appear to cause any meaningful slowdowns for such cases.
1378 : : *
1379 : : * No caching of partitions is done when the last found partition is the
1380 : : * DEFAULT or NULL partition. For the case of the DEFAULT partition, there
1381 : : * is no bound offset storing the matching datum, so we cannot confirm the
1382 : : * indexes match. For the NULL partition, this is just so cheap, there's no
1383 : : * sense in caching.
1384 : : *
1385 : : * Return value is index of the partition (>= 0 and < partdesc->nparts) if one
1386 : : * found or -1 if none found.
1387 : : */
1388 : : static int
738 alvherre@alvh.no-ip. 1389 : 554040 : get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull)
1390 : : {
621 drowley@postgresql.o 1391 : 554040 : int bound_offset = -1;
2192 alvherre@alvh.no-ip. 1392 : 554040 : int part_index = -1;
738 1393 : 554040 : PartitionKey key = pd->key;
1394 : 554040 : PartitionDesc partdesc = pd->partdesc;
2132 tgl@sss.pgh.pa.us 1395 : 554040 : PartitionBoundInfo boundinfo = partdesc->boundinfo;
1396 : :
1397 : : /*
1398 : : * In the switch statement below, when we perform a cached lookup for
1399 : : * RANGE and LIST partitioned tables, if we find that the last found
1400 : : * partition matches the 'values', we return the partition index right
1401 : : * away. We do this instead of breaking out of the switch as we don't
1402 : : * want to execute the code about the DEFAULT partition or do any updates
1403 : : * for any of the cache-related fields. That would be a waste of effort
1404 : : * as we already know it's not the DEFAULT partition and have no need to
1405 : : * increment the number of times we found the same partition any higher
1406 : : * than PARTITION_CACHED_FIND_THRESHOLD.
1407 : : */
1408 : :
1409 : : /* Route as appropriate based on partitioning strategy. */
2192 alvherre@alvh.no-ip. 1410 [ + + + - ]: 554040 : switch (key->strategy)
1411 : : {
1412 : 105363 : case PARTITION_STRATEGY_HASH:
1413 : : {
1414 : : uint64 rowHash;
1415 : :
1416 : : /* hash partitioning is too cheap to bother caching */
2132 tgl@sss.pgh.pa.us 1417 : 105363 : rowHash = compute_partition_hash_value(key->partnatts,
1418 : : key->partsupfunc,
1850 peter@eisentraut.org 1419 :GIC 105363 : key->partcollation,
1420 : : values, isnull);
1421 : :
1422 : : /*
1423 : : * HASH partitions can't have a DEFAULT partition and we don't
1424 : : * do any caching work for them, so just return the part index
1425 : : */
621 drowley@postgresql.o 1426 :CBC 105363 : return boundinfo->indexes[rowHash % boundinfo->nindexes];
1427 : : }
1428 : :
2192 alvherre@alvh.no-ip. 1429 : 85263 : case PARTITION_STRATEGY_LIST:
1430 [ + + ]: 85263 : if (isnull[0])
1431 : : {
1432 : : /* this is far too cheap to bother doing any caching */
2132 tgl@sss.pgh.pa.us 1433 [ + + ]: 66 : if (partition_bound_accepts_nulls(boundinfo))
1434 : : {
1435 : : /*
1436 : : * When there is a NULL partition we just return that
1437 : : * directly. We don't have a bound_offset so it's not
1438 : : * valid to drop into the code after the switch which
1439 : : * checks and updates the cache fields. We perhaps should
1440 : : * be invalidating the details of the last cached
1441 : : * partition but there's no real need to. Keeping those
1442 : : * fields set gives a chance at matching to the cached
1443 : : * partition on the next lookup.
1444 : : */
621 drowley@postgresql.o 1445 : 51 : return boundinfo->null_index;
1446 : : }
1447 : : }
1448 : : else
1449 : : {
1450 : : bool equal;
1451 : :
1452 [ + + ]: 85197 : if (partdesc->last_found_count >= PARTITION_CACHED_FIND_THRESHOLD)
1453 : : {
1454 : 11946 : int last_datum_offset = partdesc->last_found_datum_index;
1455 : 11946 : Datum lastDatum = boundinfo->datums[last_datum_offset][0];
1456 : : int32 cmpval;
1457 : :
1458 : : /* does the last found datum index match this datum? */
1459 : 11946 : cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
1460 : 11946 : key->partcollation[0],
1461 : : lastDatum,
1462 : : values[0]));
1463 : :
1464 [ + + ]: 11946 : if (cmpval == 0)
1465 : 11769 : return boundinfo->indexes[last_datum_offset];
1466 : :
1467 : : /* fall-through and do a manual lookup */
1468 : : }
1469 : :
2192 alvherre@alvh.no-ip. 1470 : 73428 : bound_offset = partition_list_bsearch(key->partsupfunc,
1471 : : key->partcollation,
1472 : : boundinfo,
1473 : : values[0], &equal);
1474 [ + + + + ]: 73428 : if (bound_offset >= 0 && equal)
2132 tgl@sss.pgh.pa.us 1475 : 73229 : part_index = boundinfo->indexes[bound_offset];
1476 : : }
2192 alvherre@alvh.no-ip. 1477 : 73443 : break;
1478 : :
1479 : 363414 : case PARTITION_STRATEGY_RANGE:
1480 : : {
1481 : 363414 : bool equal = false,
1482 : 363414 : range_partkey_has_null = false;
1483 : : int i;
1484 : :
1485 : : /*
1486 : : * No range includes NULL, so this will be accepted by the
1487 : : * default partition if there is one, and otherwise rejected.
1488 : : */
1489 [ + + ]: 738150 : for (i = 0; i < key->partnatts; i++)
1490 : : {
1491 [ + + ]: 374763 : if (isnull[i])
1492 : : {
1493 : 27 : range_partkey_has_null = true;
1494 : 27 : break;
1495 : : }
1496 : : }
1497 : :
1498 : : /* NULLs belong in the DEFAULT partition */
621 drowley@postgresql.o 1499 [ + + ]: 363414 : if (range_partkey_has_null)
1500 : 27 : break;
1501 : :
1502 [ + + ]: 363387 : if (partdesc->last_found_count >= PARTITION_CACHED_FIND_THRESHOLD)
1503 : : {
1504 : 119481 : int last_datum_offset = partdesc->last_found_datum_index;
1505 : 119481 : Datum *lastDatums = boundinfo->datums[last_datum_offset];
1506 : 119481 : PartitionRangeDatumKind *kind = boundinfo->kind[last_datum_offset];
1507 : : int32 cmpval;
1508 : :
1509 : : /* check if the value is >= to the lower bound */
1510 : 119481 : cmpval = partition_rbound_datum_cmp(key->partsupfunc,
1511 : : key->partcollation,
1512 : : lastDatums,
1513 : : kind,
1514 : : values,
1515 : 119481 : key->partnatts);
1516 : :
1517 : : /*
1518 : : * If it's equal to the lower bound then no need to check
1519 : : * the upper bound.
1520 : : */
1521 [ + + ]: 119481 : if (cmpval == 0)
1522 : 119386 : return boundinfo->indexes[last_datum_offset + 1];
1523 : :
1524 [ + - + - ]: 116532 : if (cmpval < 0 && last_datum_offset + 1 < boundinfo->ndatums)
1525 : : {
1526 : : /* check if the value is below the upper bound */
1527 : 116532 : lastDatums = boundinfo->datums[last_datum_offset + 1];
1528 : 116532 : kind = boundinfo->kind[last_datum_offset + 1];
1529 : 116532 : cmpval = partition_rbound_datum_cmp(key->partsupfunc,
1530 : : key->partcollation,
1531 : : lastDatums,
1532 : : kind,
1533 : : values,
1534 : 116532 : key->partnatts);
1535 : :
1536 [ + + ]: 116532 : if (cmpval > 0)
1537 : 116437 : return boundinfo->indexes[last_datum_offset + 1];
1538 : : }
1539 : : /* fall-through and do a manual lookup */
1540 : : }
1541 : :
1542 : 244001 : bound_offset = partition_range_datum_bsearch(key->partsupfunc,
1543 : : key->partcollation,
1544 : : boundinfo,
1545 : 244001 : key->partnatts,
1546 : : values,
1547 : : &equal);
1548 : :
1549 : : /*
1550 : : * The bound at bound_offset is less than or equal to the
1551 : : * tuple value, so the bound at offset+1 is the upper bound of
1552 : : * the partition we're looking for, if there actually exists
1553 : : * one.
1554 : : */
1555 : 244001 : part_index = boundinfo->indexes[bound_offset + 1];
1556 : : }
2192 alvherre@alvh.no-ip. 1557 : 244001 : break;
1558 : :
2192 alvherre@alvh.no-ip. 1559 :UBC 0 : default:
1560 [ # # ]: 0 : elog(ERROR, "unexpected partition strategy: %d",
1561 : : (int) key->strategy);
1562 : : }
1563 : :
1564 : : /*
1565 : : * part_index < 0 means we failed to find a partition of this parent. Use
1566 : : * the default partition, if there is one.
1567 : : */
2192 alvherre@alvh.no-ip. 1568 [ + + ]:CBC 317471 : if (part_index < 0)
1569 : : {
1570 : : /*
1571 : : * No need to reset the cache fields here. The next set of values
1572 : : * might end up belonging to the cached partition, so leaving the
1573 : : * cache alone improves the chances of a cache hit on the next lookup.
1574 : : */
621 drowley@postgresql.o 1575 : 464 : return boundinfo->default_index;
1576 : : }
1577 : :
1578 : : /* we should only make it here when the code above set bound_offset */
1579 [ - + ]: 317007 : Assert(bound_offset >= 0);
1580 : :
1581 : : /*
1582 : : * Attend to the cache fields. If the bound_offset matches the last
1583 : : * cached bound offset then we've found the same partition as last time,
1584 : : * so bump the count by one. If all goes well, we'll eventually reach
1585 : : * PARTITION_CACHED_FIND_THRESHOLD and try the cache path next time
1586 : : * around. Otherwise, we'll reset the cache count back to 1 to mark that
1587 : : * we've found this partition for the first time.
1588 : : */
1589 [ + + ]: 317007 : if (bound_offset == partdesc->last_found_datum_index)
1590 : 218433 : partdesc->last_found_count++;
1591 : : else
1592 : : {
1593 : 98574 : partdesc->last_found_count = 1;
1594 : 98574 : partdesc->last_found_part_index = part_index;
1595 : 98574 : partdesc->last_found_datum_index = bound_offset;
1596 : : }
1597 : :
2192 alvherre@alvh.no-ip. 1598 : 317007 : return part_index;
1599 : : }
1600 : :
1601 : : /*
1602 : : * ExecBuildSlotPartitionKeyDescription
1603 : : *
1604 : : * This works very much like BuildIndexValueDescription() and is currently
1605 : : * used for building error messages when ExecFindPartition() fails to find
1606 : : * partition for a row.
1607 : : */
1608 : : static char *
2342 rhaas@postgresql.org 1609 : 77 : ExecBuildSlotPartitionKeyDescription(Relation rel,
1610 : : Datum *values,
1611 : : bool *isnull,
1612 : : int maxfieldlen)
1613 : : {
1614 : : StringInfoData buf;
1615 : 77 : PartitionKey key = RelationGetPartitionKey(rel);
1616 : 77 : int partnatts = get_partition_natts(key);
1617 : : int i;
1618 : 77 : Oid relid = RelationGetRelid(rel);
1619 : : AclResult aclresult;
1620 : :
1621 [ - + ]: 77 : if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED)
2342 rhaas@postgresql.org 1622 :UBC 0 : return NULL;
1623 : :
1624 : : /* If the user has table-level access, just go build the description. */
2342 rhaas@postgresql.org 1625 :CBC 77 : aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_SELECT);
1626 [ + + ]: 77 : if (aclresult != ACLCHECK_OK)
1627 : : {
1628 : : /*
1629 : : * Step through the columns of the partition key and make sure the
1630 : : * user has SELECT rights on all of them.
1631 : : */
1632 [ + + ]: 12 : for (i = 0; i < partnatts; i++)
1633 : : {
1634 : 9 : AttrNumber attnum = get_partition_col_attnum(key, i);
1635 : :
1636 : : /*
1637 : : * If this partition key column is an expression, we return no
1638 : : * detail rather than try to figure out what column(s) the
1639 : : * expression includes and if the user has SELECT rights on them.
1640 : : */
1641 [ + + + + ]: 15 : if (attnum == InvalidAttrNumber ||
1642 : 6 : pg_attribute_aclcheck(relid, attnum, GetUserId(),
1643 : : ACL_SELECT) != ACLCHECK_OK)
1644 : 6 : return NULL;
1645 : : }
1646 : : }
1647 : :
1648 : 71 : initStringInfo(&buf);
1649 : 71 : appendStringInfo(&buf, "(%s) = (",
1650 : : pg_get_partkeydef_columns(relid, true));
1651 : :
1652 [ + + ]: 169 : for (i = 0; i < partnatts; i++)
1653 : : {
1654 : : char *val;
1655 : : int vallen;
1656 : :
1657 [ + + ]: 98 : if (isnull[i])
1658 : 15 : val = "null";
1659 : : else
1660 : : {
1661 : : Oid foutoid;
1662 : : bool typisvarlena;
1663 : :
1664 : 83 : getTypeOutputInfo(get_partition_col_typid(key, i),
1665 : : &foutoid, &typisvarlena);
1666 : 83 : val = OidOutputFunctionCall(foutoid, values[i]);
1667 : : }
1668 : :
1669 [ + + ]: 98 : if (i > 0)
1670 : 27 : appendStringInfoString(&buf, ", ");
1671 : :
1672 : : /* truncate if needed */
1673 : 98 : vallen = strlen(val);
1674 [ + - ]: 98 : if (vallen <= maxfieldlen)
1727 drowley@postgresql.o 1675 : 98 : appendBinaryStringInfo(&buf, val, vallen);
1676 : : else
1677 : : {
2342 rhaas@postgresql.org 1678 :UBC 0 : vallen = pg_mbcliplen(val, vallen, maxfieldlen);
1679 : 0 : appendBinaryStringInfo(&buf, val, vallen);
1680 : 0 : appendStringInfoString(&buf, "...");
1681 : : }
1682 : : }
1683 : :
2342 rhaas@postgresql.org 1684 :CBC 71 : appendStringInfoChar(&buf, ')');
1685 : :
1686 : 71 : return buf.data;
1687 : : }
1688 : :
1689 : : /*
1690 : : * adjust_partition_colnos
1691 : : * Adjust the list of UPDATE target column numbers to account for
1692 : : * attribute differences between the parent and the partition.
1693 : : *
1694 : : * Note: mustn't be called if no adjustment is required.
1695 : : */
1696 : : static List *
1070 tgl@sss.pgh.pa.us 1697 : 38 : adjust_partition_colnos(List *colnos, ResultRelInfo *leaf_part_rri)
1698 : : {
1699 : 38 : TupleConversionMap *map = ExecGetChildToRootMap(leaf_part_rri);
1700 : :
733 alvherre@alvh.no-ip. 1701 [ - + ]: 38 : Assert(map != NULL);
1702 : :
748 1703 : 38 : return adjust_partition_colnos_using_map(colnos, map->attrMap);
1704 : : }
1705 : :
1706 : : /*
1707 : : * adjust_partition_colnos_using_map
1708 : : * Like adjust_partition_colnos, but uses a caller-supplied map instead
1709 : : * of assuming to map from the "root" result relation.
1710 : : *
1711 : : * Note: mustn't be called if no adjustment is required.
1712 : : */
1713 : : static List *
1714 : 47 : adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap)
1715 : : {
1716 : 47 : List *new_colnos = NIL;
1717 : : ListCell *lc;
1718 : :
1719 [ - + ]: 47 : Assert(attrMap != NULL); /* else we shouldn't be here */
1720 : :
1070 tgl@sss.pgh.pa.us 1721 [ + - + + : 116 : foreach(lc, colnos)
+ + ]
1722 : : {
1723 : 69 : AttrNumber parentattrno = lfirst_int(lc);
1724 : :
1725 [ + - ]: 69 : if (parentattrno <= 0 ||
1726 [ + - ]: 69 : parentattrno > attrMap->maplen ||
1727 [ - + ]: 69 : attrMap->attnums[parentattrno - 1] == 0)
1070 tgl@sss.pgh.pa.us 1728 [ # # ]:UBC 0 : elog(ERROR, "unexpected attno %d in target column list",
1729 : : parentattrno);
1070 tgl@sss.pgh.pa.us 1730 :CBC 69 : new_colnos = lappend_int(new_colnos,
1731 : 69 : attrMap->attnums[parentattrno - 1]);
1732 : : }
1733 : :
1734 : 47 : return new_colnos;
1735 : : }
1736 : :
1737 : : /*-------------------------------------------------------------------------
1738 : : * Run-Time Partition Pruning Support.
1739 : : *
1740 : : * The following series of functions exist to support the removal of unneeded
1741 : : * subplans for queries against partitioned tables. The supporting functions
1742 : : * here are designed to work with any plan type which supports an arbitrary
1743 : : * number of subplans, e.g. Append, MergeAppend.
1744 : : *
1745 : : * When pruning involves comparison of a partition key to a constant, it's
1746 : : * done by the planner. However, if we have a comparison to a non-constant
1747 : : * but not volatile expression, that presents an opportunity for run-time
1748 : : * pruning by the executor, allowing irrelevant partitions to be skipped
1749 : : * dynamically.
1750 : : *
1751 : : * We must distinguish expressions containing PARAM_EXEC Params from
1752 : : * expressions that don't contain those. Even though a PARAM_EXEC Param is
1753 : : * considered to be a stable expression, it can change value from one plan
1754 : : * node scan to the next during query execution. Stable comparison
1755 : : * expressions that don't involve such Params allow partition pruning to be
1756 : : * done once during executor startup. Expressions that do involve such Params
1757 : : * require us to prune separately for each scan of the parent plan node.
1758 : : *
1759 : : * Note that pruning away unneeded subplans during executor startup has the
1760 : : * added benefit of not having to initialize the unneeded subplans at all.
1761 : : *
1762 : : *
1763 : : * Functions:
1764 : : *
1765 : : * ExecInitPartitionPruning:
1766 : : * Creates the PartitionPruneState required by ExecFindMatchingSubPlans.
1767 : : * Details stored include how to map the partition index returned by the
1768 : : * partition pruning code into subplan indexes. Also determines the set
1769 : : * of subplans to initialize considering the result of performing initial
1770 : : * pruning steps if any. Maps in PartitionPruneState are updated to
1771 : : * account for initial pruning possibly having eliminated some of the
1772 : : * subplans.
1773 : : *
1774 : : * ExecFindMatchingSubPlans:
1775 : : * Returns indexes of matching subplans after evaluating the expressions
1776 : : * that are safe to evaluate at a given point. This function is first
1777 : : * called during ExecInitPartitionPruning() to find the initially
1778 : : * matching subplans based on performing the initial pruning steps and
1779 : : * then must be called again each time the value of a Param listed in
1780 : : * PartitionPruneState's 'execparamids' changes.
1781 : : *-------------------------------------------------------------------------
1782 : : */
1783 : :
1784 : : /*
1785 : : * ExecInitPartitionPruning
1786 : : * Initialize data structure needed for run-time partition pruning and
1787 : : * do initial pruning if needed
1788 : : *
1789 : : * On return, *initially_valid_subplans is assigned the set of indexes of
1790 : : * child subplans that must be initialized along with the parent plan node.
1791 : : * Initial pruning is performed here if needed and in that case only the
1792 : : * surviving subplans' indexes are added.
1793 : : *
1794 : : * If subplans are indeed pruned, subplan_map arrays contained in the returned
1795 : : * PartitionPruneState are re-sequenced to not count those, though only if the
1796 : : * maps will be needed for subsequent execution pruning passes.
1797 : : */
1798 : : PartitionPruneState *
740 alvherre@alvh.no-ip. 1799 : 324 : ExecInitPartitionPruning(PlanState *planstate,
1800 : : int n_total_subplans,
1801 : : PartitionPruneInfo *pruneinfo,
1802 : : Bitmapset **initially_valid_subplans)
1803 : : {
1804 : : PartitionPruneState *prunestate;
1805 : 324 : EState *estate = planstate->state;
1806 : :
1807 : : /* We may need an expression context to evaluate partition exprs */
1808 : 324 : ExecAssignExprContext(estate, planstate);
1809 : :
1810 : : /* Create the working data structure for pruning */
1811 : 324 : prunestate = CreatePartitionPruneState(planstate, pruneinfo);
1812 : :
1813 : : /*
1814 : : * Perform an initial partition prune pass, if required.
1815 : : */
1816 [ + + ]: 324 : if (prunestate->do_initial_prune)
1817 : 151 : *initially_valid_subplans = ExecFindMatchingSubPlans(prunestate, true);
1818 : : else
1819 : : {
1820 : : /* No pruning, so we'll need to initialize all subplans */
1821 [ - + ]: 173 : Assert(n_total_subplans > 0);
1822 : 173 : *initially_valid_subplans = bms_add_range(NULL, 0,
1823 : : n_total_subplans - 1);
1824 : : }
1825 : :
1826 : : /*
1827 : : * Re-sequence subplan indexes contained in prunestate to account for any
1828 : : * that were removed above due to initial pruning. No need to do this if
1829 : : * no steps were removed.
1830 : : */
1831 [ + + ]: 324 : if (bms_num_members(*initially_valid_subplans) < n_total_subplans)
1832 : : {
1833 : : /*
1834 : : * We can safely skip this when !do_exec_prune, even though that
1835 : : * leaves invalid data in prunestate, because that data won't be
1836 : : * consulted again (cf initial Assert in ExecFindMatchingSubPlans).
1837 : : */
1838 [ + + ]: 151 : if (prunestate->do_exec_prune)
1839 : 24 : PartitionPruneFixSubPlanMap(prunestate,
1840 : : *initially_valid_subplans,
1841 : : n_total_subplans);
1842 : : }
1843 : :
1844 : 324 : return prunestate;
1845 : : }
1846 : :
1847 : : /*
1848 : : * CreatePartitionPruneState
1849 : : * Build the data structure required for calling ExecFindMatchingSubPlans
1850 : : *
1851 : : * 'planstate' is the parent plan node's execution state.
1852 : : *
1853 : : * 'pruneinfo' is a PartitionPruneInfo as generated by
1854 : : * make_partition_pruneinfo. Here we build a PartitionPruneState containing a
1855 : : * PartitionPruningData for each partitioning hierarchy (i.e., each sublist of
1856 : : * pruneinfo->prune_infos), each of which contains a PartitionedRelPruningData
1857 : : * for each PartitionedRelPruneInfo appearing in that sublist. This two-level
1858 : : * system is needed to keep from confusing the different hierarchies when a
1859 : : * UNION ALL contains multiple partitioned tables as children. The data
1860 : : * stored in each PartitionedRelPruningData can be re-used each time we
1861 : : * re-evaluate which partitions match the pruning steps provided in each
1862 : : * PartitionedRelPruneInfo.
1863 : : */
1864 : : static PartitionPruneState *
1865 : 324 : CreatePartitionPruneState(PlanState *planstate, PartitionPruneInfo *pruneinfo)
1866 : : {
2019 tgl@sss.pgh.pa.us 1867 : 324 : EState *estate = planstate->state;
1868 : : PartitionPruneState *prunestate;
1869 : : int n_part_hierarchies;
1870 : : ListCell *lc;
1871 : : int i;
740 alvherre@alvh.no-ip. 1872 : 324 : ExprContext *econtext = planstate->ps_ExprContext;
1873 : :
1874 : : /* For data reading, executor always omits detached partitions */
1865 rhaas@postgresql.org 1875 [ + + ]: 324 : if (estate->es_partition_directory == NULL)
1876 : 309 : estate->es_partition_directory =
1088 alvherre@alvh.no-ip. 1877 : 309 : CreatePartitionDirectory(estate->es_query_cxt, false);
1878 : :
740 1879 : 324 : n_part_hierarchies = list_length(pruneinfo->prune_infos);
2083 tgl@sss.pgh.pa.us 1880 [ - + ]: 324 : Assert(n_part_hierarchies > 0);
1881 : :
1882 : : /*
1883 : : * Allocate the data structure
1884 : : */
1885 : : prunestate = (PartitionPruneState *)
1886 : 324 : palloc(offsetof(PartitionPruneState, partprunedata) +
1887 : : sizeof(PartitionPruningData *) * n_part_hierarchies);
1888 : :
1889 : 324 : prunestate->execparamids = NULL;
1890 : : /* other_subplans can change at runtime, so we need our own copy */
740 alvherre@alvh.no-ip. 1891 : 324 : prunestate->other_subplans = bms_copy(pruneinfo->other_subplans);
2135 tgl@sss.pgh.pa.us 1892 : 324 : prunestate->do_initial_prune = false; /* may be set below */
1893 : 324 : prunestate->do_exec_prune = false; /* may be set below */
2083 1894 : 324 : prunestate->num_partprunedata = n_part_hierarchies;
1895 : :
1896 : : /*
1897 : : * Create a short-term memory context which we'll use when making calls to
1898 : : * the partition pruning functions. This avoids possible memory leaks,
1899 : : * since the pruning functions call comparison functions that aren't under
1900 : : * our control.
1901 : : */
2199 alvherre@alvh.no-ip. 1902 : 324 : prunestate->prune_context =
1903 : 324 : AllocSetContextCreate(CurrentMemoryContext,
1904 : : "Partition Prune",
1905 : : ALLOCSET_DEFAULT_SIZES);
1906 : :
1907 : 324 : i = 0;
740 1908 [ + - + + : 660 : foreach(lc, pruneinfo->prune_infos)
+ + ]
1909 : : {
2083 tgl@sss.pgh.pa.us 1910 : 336 : List *partrelpruneinfos = lfirst_node(List, lc);
1911 : 336 : int npartrelpruneinfos = list_length(partrelpruneinfos);
1912 : : PartitionPruningData *prunedata;
1913 : : ListCell *lc2;
1914 : : int j;
1915 : :
1916 : : prunedata = (PartitionPruningData *)
1917 : 336 : palloc(offsetof(PartitionPruningData, partrelprunedata) +
1918 : 336 : npartrelpruneinfos * sizeof(PartitionedRelPruningData));
1919 : 336 : prunestate->partprunedata[i] = prunedata;
1920 : 336 : prunedata->num_partrelprunedata = npartrelpruneinfos;
1921 : :
1922 : 336 : j = 0;
1923 [ + - + + : 1077 : foreach(lc2, partrelpruneinfos)
+ + ]
1924 : : {
1925 : 741 : PartitionedRelPruneInfo *pinfo = lfirst_node(PartitionedRelPruneInfo, lc2);
1926 : 741 : PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
1927 : : Relation partrel;
1928 : : PartitionDesc partdesc;
1929 : : PartitionKey partkey;
1930 : :
1931 : : /*
1932 : : * We can rely on the copies of the partitioned table's partition
1933 : : * key and partition descriptor appearing in its relcache entry,
1934 : : * because that entry will be held open and locked for the
1935 : : * duration of this executor run.
1936 : : */
2019 1937 : 741 : partrel = ExecGetRangeTableRelation(estate, pinfo->rtindex);
1938 : 741 : partkey = RelationGetPartitionKey(partrel);
1865 rhaas@postgresql.org 1939 : 741 : partdesc = PartitionDirectoryLookup(estate->es_partition_directory,
1940 : : partrel);
1941 : :
1942 : : /*
1943 : : * Initialize the subplan_map and subpart_map.
1944 : : *
1945 : : * Because we request detached partitions to be included, and
1946 : : * detaching waits for old transactions, it is safe to assume that
1947 : : * no partitions have disappeared since this query was planned.
1948 : : *
1949 : : * However, new partitions may have been added.
1950 : : */
1951 [ - + ]: 741 : Assert(partdesc->nparts >= pinfo->nparts);
1794 tgl@sss.pgh.pa.us 1952 : 741 : pprune->nparts = partdesc->nparts;
1865 rhaas@postgresql.org 1953 : 741 : pprune->subplan_map = palloc(sizeof(int) * partdesc->nparts);
1954 [ + + ]: 741 : if (partdesc->nparts == pinfo->nparts)
1955 : : {
1956 : : /*
1957 : : * There are no new partitions, so this is simple. We can
1958 : : * simply point to the subpart_map from the plan, but we must
1959 : : * copy the subplan_map since we may change it later.
1960 : : */
1961 : 740 : pprune->subpart_map = pinfo->subpart_map;
1962 : 740 : memcpy(pprune->subplan_map, pinfo->subplan_map,
1963 : 740 : sizeof(int) * pinfo->nparts);
1964 : :
1965 : : /*
1966 : : * Double-check that the list of unpruned relations has not
1967 : : * changed. (Pruned partitions are not in relid_map[].)
1968 : : */
1969 : : #ifdef USE_ASSERT_CHECKING
1842 tgl@sss.pgh.pa.us 1970 [ + + ]: 2891 : for (int k = 0; k < pinfo->nparts; k++)
1971 : : {
1972 [ + + - + ]: 2151 : Assert(partdesc->oids[k] == pinfo->relid_map[k] ||
1973 : : pinfo->subplan_map[k] == -1);
1974 : : }
1975 : : #endif
1976 : : }
1977 : : else
1978 : : {
1789 1979 : 1 : int pd_idx = 0;
1980 : : int pp_idx;
1981 : :
1982 : : /*
1983 : : * Some new partitions have appeared since plan time, and
1984 : : * those are reflected in our PartitionDesc but were not
1985 : : * present in the one used to construct subplan_map and
1986 : : * subpart_map. So we must construct new and longer arrays
1987 : : * where the partitions that were originally present map to
1988 : : * the same sub-structures, and any added partitions map to
1989 : : * -1, as if the new partitions had been pruned.
1990 : : *
1991 : : * Note: pinfo->relid_map[] may contain InvalidOid entries for
1992 : : * partitions pruned by the planner. We cannot tell exactly
1993 : : * which of the partdesc entries these correspond to, but we
1994 : : * don't have to; just skip over them. The non-pruned
1995 : : * relid_map entries, however, had better be a subset of the
1996 : : * partdesc entries and in the same order.
1997 : : */
1865 rhaas@postgresql.org 1998 : 1 : pprune->subpart_map = palloc(sizeof(int) * partdesc->nparts);
1348 tgl@sss.pgh.pa.us 1999 [ + + ]: 5 : for (pp_idx = 0; pp_idx < partdesc->nparts; pp_idx++)
2000 : : {
2001 : : /* Skip any InvalidOid relid_map entries */
2002 [ + - ]: 5 : while (pd_idx < pinfo->nparts &&
2003 [ + + ]: 5 : !OidIsValid(pinfo->relid_map[pd_idx]))
2004 : 1 : pd_idx++;
2005 : :
2006 [ + - ]: 4 : if (pd_idx < pinfo->nparts &&
2007 [ + + ]: 4 : pinfo->relid_map[pd_idx] == partdesc->oids[pp_idx])
2008 : : {
2009 : : /* match... */
1865 rhaas@postgresql.org 2010 : 2 : pprune->subplan_map[pp_idx] =
2011 : 2 : pinfo->subplan_map[pd_idx];
2012 : 2 : pprune->subpart_map[pp_idx] =
1348 tgl@sss.pgh.pa.us 2013 : 2 : pinfo->subpart_map[pd_idx];
2014 : 2 : pd_idx++;
2015 : : }
2016 : : else
2017 : : {
2018 : : /* this partdesc entry is not in the plan */
2019 : 2 : pprune->subplan_map[pp_idx] = -1;
2020 : 2 : pprune->subpart_map[pp_idx] = -1;
2021 : : }
2022 : : }
2023 : :
2024 : : /*
2025 : : * It might seem that we need to skip any trailing InvalidOid
2026 : : * entries in pinfo->relid_map before checking that we scanned
2027 : : * all of the relid_map. But we will have skipped them above,
2028 : : * because they must correspond to some partdesc->oids
2029 : : * entries; we just couldn't tell which.
2030 : : */
2031 [ - + ]: 1 : if (pd_idx != pinfo->nparts)
1348 tgl@sss.pgh.pa.us 2032 [ # # ]:UBC 0 : elog(ERROR, "could not match partition child tables to plan elements");
2033 : : }
2034 : :
2035 : : /* present_parts is also subject to later modification */
1794 tgl@sss.pgh.pa.us 2036 :CBC 741 : pprune->present_parts = bms_copy(pinfo->present_parts);
2037 : :
2038 : : /*
2039 : : * Initialize pruning contexts as needed. Note that we must skip
2040 : : * execution-time partition pruning in EXPLAIN (GENERIC_PLAN),
2041 : : * since parameter values may be missing.
2042 : : */
2043 : 741 : pprune->initial_pruning_steps = pinfo->initial_pruning_steps;
387 2044 [ + + ]: 741 : if (pinfo->initial_pruning_steps &&
2045 [ + + ]: 205 : !(econtext->ecxt_estate->es_top_eflags & EXEC_FLAG_EXPLAIN_GENERIC))
2046 : : {
740 alvherre@alvh.no-ip. 2047 : 202 : InitPartitionPruneContext(&pprune->initial_context,
2048 : : pinfo->initial_pruning_steps,
2049 : : partdesc, partkey, planstate,
2050 : : econtext);
2051 : : /* Record whether initial pruning is needed at any level */
1794 tgl@sss.pgh.pa.us 2052 : 202 : prunestate->do_initial_prune = true;
2053 : : }
2054 : 741 : pprune->exec_pruning_steps = pinfo->exec_pruning_steps;
387 2055 [ + + ]: 741 : if (pinfo->exec_pruning_steps &&
2056 [ + - ]: 251 : !(econtext->ecxt_estate->es_top_eflags & EXEC_FLAG_EXPLAIN_GENERIC))
2057 : : {
740 alvherre@alvh.no-ip. 2058 : 251 : InitPartitionPruneContext(&pprune->exec_context,
2059 : : pinfo->exec_pruning_steps,
2060 : : partdesc, partkey, planstate,
2061 : : econtext);
2062 : : /* Record whether exec pruning is needed at any level */
1794 tgl@sss.pgh.pa.us 2063 : 251 : prunestate->do_exec_prune = true;
2064 : : }
2065 : :
2066 : : /*
2067 : : * Accumulate the IDs of all PARAM_EXEC Params affecting the
2068 : : * partitioning decisions at this plan node.
2069 : : */
2083 2070 : 1482 : prunestate->execparamids = bms_add_members(prunestate->execparamids,
2071 : 741 : pinfo->execparamids);
2072 : :
2073 : 741 : j++;
2074 : : }
2199 alvherre@alvh.no-ip. 2075 : 336 : i++;
2076 : : }
2077 : :
2078 : 324 : return prunestate;
2079 : : }
2080 : :
2081 : : /*
2082 : : * Initialize a PartitionPruneContext for the given list of pruning steps.
2083 : : */
2084 : : static void
740 2085 : 453 : InitPartitionPruneContext(PartitionPruneContext *context,
2086 : : List *pruning_steps,
2087 : : PartitionDesc partdesc,
2088 : : PartitionKey partkey,
2089 : : PlanState *planstate,
2090 : : ExprContext *econtext)
2091 : : {
2092 : : int n_steps;
2093 : : int partnatts;
2094 : : ListCell *lc;
2095 : :
1794 tgl@sss.pgh.pa.us 2096 : 453 : n_steps = list_length(pruning_steps);
2097 : :
2098 : 453 : context->strategy = partkey->strategy;
2099 : 453 : context->partnatts = partnatts = partkey->partnatts;
2100 : 453 : context->nparts = partdesc->nparts;
2101 : 453 : context->boundinfo = partdesc->boundinfo;
2102 : 453 : context->partcollation = partkey->partcollation;
2103 : 453 : context->partsupfunc = partkey->partsupfunc;
2104 : :
2105 : : /* We'll look up type-specific support functions as needed */
2106 : 453 : context->stepcmpfuncs = (FmgrInfo *)
2107 : 453 : palloc0(sizeof(FmgrInfo) * n_steps * partnatts);
2108 : :
2109 : 453 : context->ppccontext = CurrentMemoryContext;
2110 : 453 : context->planstate = planstate;
740 alvherre@alvh.no-ip. 2111 : 453 : context->exprcontext = econtext;
2112 : :
2113 : : /* Initialize expression state for each expression we need */
1794 tgl@sss.pgh.pa.us 2114 : 453 : context->exprstates = (ExprState **)
2115 : 453 : palloc0(sizeof(ExprState *) * n_steps * partnatts);
2116 [ + - + + : 1237 : foreach(lc, pruning_steps)
+ + ]
2117 : : {
2118 : 784 : PartitionPruneStepOp *step = (PartitionPruneStepOp *) lfirst(lc);
184 drowley@postgresql.o 2119 : 784 : ListCell *lc2 = list_head(step->exprs);
2120 : : int keyno;
2121 : :
2122 : : /* not needed for other step kinds */
1794 tgl@sss.pgh.pa.us 2123 [ + + ]: 784 : if (!IsA(step, PartitionPruneStepOp))
2124 : 143 : continue;
2125 : :
2126 [ - + ]: 641 : Assert(list_length(step->exprs) <= partnatts);
2127 : :
184 drowley@postgresql.o 2128 [ + + ]: 1357 : for (keyno = 0; keyno < partnatts; keyno++)
2129 : : {
2130 [ + + ]: 716 : if (bms_is_member(keyno, step->nullkeys))
2131 : 3 : continue;
2132 : :
2133 [ + + ]: 713 : if (lc2 != NULL)
2134 : : {
2135 : 665 : Expr *expr = lfirst(lc2);
2136 : :
2137 : : /* not needed for Consts */
2138 [ + + ]: 665 : if (!IsA(expr, Const))
2139 : : {
2140 : 618 : int stateidx = PruneCxtStateIdx(partnatts,
2141 : : step->step.step_id,
2142 : : keyno);
2143 : :
2144 : : /*
2145 : : * When planstate is NULL, pruning_steps is known not to
2146 : : * contain any expressions that depend on the parent plan.
2147 : : * Information of any available EXTERN parameters must be
2148 : : * passed explicitly in that case, which the caller must
2149 : : * have made available via econtext.
2150 : : */
2151 [ - + ]: 618 : if (planstate == NULL)
184 drowley@postgresql.o 2152 :UBC 0 : context->exprstates[stateidx] =
2153 : 0 : ExecInitExprWithParams(expr,
2154 : : econtext->ecxt_param_list_info);
2155 : : else
184 drowley@postgresql.o 2156 :CBC 618 : context->exprstates[stateidx] =
2157 : 618 : ExecInitExpr(expr, context->planstate);
2158 : : }
2159 : 665 : lc2 = lnext(step->exprs, lc2);
2160 : : }
2161 : : }
2162 : : }
1794 tgl@sss.pgh.pa.us 2163 : 453 : }
2164 : :
2165 : : /*
2166 : : * PartitionPruneFixSubPlanMap
2167 : : * Fix mapping of partition indexes to subplan indexes contained in
2168 : : * prunestate by considering the new list of subplans that survived
2169 : : * initial pruning
2170 : : *
2171 : : * Current values of the indexes present in PartitionPruneState count all the
2172 : : * subplans that would be present before initial pruning was done. If initial
2173 : : * pruning got rid of some of the subplans, any subsequent pruning passes will
2174 : : * be looking at a different set of target subplans to choose from than those
2175 : : * in the pre-initial-pruning set, so the maps in PartitionPruneState
2176 : : * containing those indexes must be updated to reflect the new indexes of
2177 : : * subplans in the post-initial-pruning set.
2178 : : */
2179 : : static void
740 alvherre@alvh.no-ip. 2180 : 24 : PartitionPruneFixSubPlanMap(PartitionPruneState *prunestate,
2181 : : Bitmapset *initially_valid_subplans,
2182 : : int n_total_subplans)
2183 : : {
2184 : : int *new_subplan_indexes;
2185 : : Bitmapset *new_other_subplans;
2186 : : int i;
2187 : : int newidx;
2188 : :
2189 : : /*
2190 : : * First we must build a temporary array which maps old subplan indexes to
2191 : : * new ones. For convenience of initialization, we use 1-based indexes in
2192 : : * this array and leave pruned items as 0.
2193 : : */
2194 : 24 : new_subplan_indexes = (int *) palloc0(sizeof(int) * n_total_subplans);
2195 : 24 : newidx = 1;
2196 : 24 : i = -1;
2197 [ + + ]: 93 : while ((i = bms_next_member(initially_valid_subplans, i)) >= 0)
2198 : : {
2199 [ - + ]: 69 : Assert(i < n_total_subplans);
2200 : 69 : new_subplan_indexes[i] = newidx++;
2201 : : }
2202 : :
2203 : : /*
2204 : : * Now we can update each PartitionedRelPruneInfo's subplan_map with new
2205 : : * subplan indexes. We must also recompute its present_parts bitmap.
2206 : : */
2207 [ + + ]: 60 : for (i = 0; i < prunestate->num_partprunedata; i++)
2208 : : {
2209 : 36 : PartitionPruningData *prunedata = prunestate->partprunedata[i];
2210 : : int j;
2211 : :
2212 : : /*
2213 : : * Within each hierarchy, we perform this loop in back-to-front order
2214 : : * so that we determine present_parts for the lowest-level partitioned
2215 : : * tables first. This way we can tell whether a sub-partitioned
2216 : : * table's partitions were entirely pruned so we can exclude it from
2217 : : * the current level's present_parts.
2218 : : */
2219 [ + + ]: 132 : for (j = prunedata->num_partrelprunedata - 1; j >= 0; j--)
2220 : : {
2221 : 96 : PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
2222 : 96 : int nparts = pprune->nparts;
2223 : : int k;
2224 : :
2225 : : /* We just rebuild present_parts from scratch */
2226 : 96 : bms_free(pprune->present_parts);
2227 : 96 : pprune->present_parts = NULL;
2228 : :
2229 [ + + ]: 354 : for (k = 0; k < nparts; k++)
2230 : : {
2231 : 258 : int oldidx = pprune->subplan_map[k];
2232 : : int subidx;
2233 : :
2234 : : /*
2235 : : * If this partition existed as a subplan then change the old
2236 : : * subplan index to the new subplan index. The new index may
2237 : : * become -1 if the partition was pruned above, or it may just
2238 : : * come earlier in the subplan list due to some subplans being
2239 : : * removed earlier in the list. If it's a subpartition, add
2240 : : * it to present_parts unless it's entirely pruned.
2241 : : */
2242 [ + + ]: 258 : if (oldidx >= 0)
2243 : : {
2244 [ - + ]: 198 : Assert(oldidx < n_total_subplans);
2245 : 198 : pprune->subplan_map[k] = new_subplan_indexes[oldidx] - 1;
2246 : :
2247 [ + + ]: 198 : if (new_subplan_indexes[oldidx] > 0)
2248 : 57 : pprune->present_parts =
2249 : 57 : bms_add_member(pprune->present_parts, k);
2250 : : }
2251 [ + - ]: 60 : else if ((subidx = pprune->subpart_map[k]) >= 0)
2252 : : {
2253 : : PartitionedRelPruningData *subprune;
2254 : :
2255 : 60 : subprune = &prunedata->partrelprunedata[subidx];
2256 : :
2257 [ + + ]: 60 : if (!bms_is_empty(subprune->present_parts))
2258 : 24 : pprune->present_parts =
2259 : 24 : bms_add_member(pprune->present_parts, k);
2260 : : }
2261 : : }
2262 : : }
2263 : : }
2264 : :
2265 : : /*
2266 : : * We must also recompute the other_subplans set, since indexes in it may
2267 : : * change.
2268 : : */
2269 : 24 : new_other_subplans = NULL;
2270 : 24 : i = -1;
2271 [ + + ]: 36 : while ((i = bms_next_member(prunestate->other_subplans, i)) >= 0)
2272 : 12 : new_other_subplans = bms_add_member(new_other_subplans,
2273 : 12 : new_subplan_indexes[i] - 1);
2274 : :
2275 : 24 : bms_free(prunestate->other_subplans);
2276 : 24 : prunestate->other_subplans = new_other_subplans;
2277 : :
2278 : 24 : pfree(new_subplan_indexes);
2199 2279 : 24 : }
2280 : :
2281 : : /*
2282 : : * ExecFindMatchingSubPlans
2283 : : * Determine which subplans match the pruning steps detailed in
2284 : : * 'prunestate' for the current comparison expression values.
2285 : : *
2286 : : * Pass initial_prune if PARAM_EXEC Params cannot yet be evaluated. This
2287 : : * differentiates the initial executor-time pruning step from later
2288 : : * runtime pruning.
2289 : : */
2290 : : Bitmapset *
740 2291 : 1874 : ExecFindMatchingSubPlans(PartitionPruneState *prunestate,
2292 : : bool initial_prune)
2293 : : {
2199 2294 : 1874 : Bitmapset *result = NULL;
2295 : : MemoryContext oldcontext;
2296 : : int i;
2297 : :
2298 : : /*
2299 : : * Either we're here on the initial prune done during pruning
2300 : : * initialization, or we're at a point where PARAM_EXEC Params can be
2301 : : * evaluated *and* there are steps in which to do so.
2302 : : */
740 2303 [ + + - + ]: 1874 : Assert(initial_prune || prunestate->do_exec_prune);
2304 : :
2305 : : /*
2306 : : * Switch to a temp context to avoid leaking memory in the executor's
2307 : : * query-lifespan memory context.
2308 : : */
2199 2309 : 1874 : oldcontext = MemoryContextSwitchTo(prunestate->prune_context);
2310 : :
2311 : : /*
2312 : : * For each hierarchy, do the pruning tests, and add nondeletable
2313 : : * subplans' indexes to "result".
2314 : : */
2083 tgl@sss.pgh.pa.us 2315 [ + + ]: 3769 : for (i = 0; i < prunestate->num_partprunedata; i++)
2316 : : {
740 alvherre@alvh.no-ip. 2317 : 1895 : PartitionPruningData *prunedata = prunestate->partprunedata[i];
2318 : : PartitionedRelPruningData *pprune;
2319 : :
2320 : : /*
2321 : : * We pass the zeroth item, belonging to the root table of the
2322 : : * hierarchy, and find_matching_subplans_recurse() takes care of
2323 : : * recursing to other (lower-level) parents as needed.
2324 : : */
2083 tgl@sss.pgh.pa.us 2325 : 1895 : pprune = &prunedata->partrelprunedata[0];
740 alvherre@alvh.no-ip. 2326 : 1895 : find_matching_subplans_recurse(prunedata, pprune, initial_prune,
2327 : : &result);
2328 : :
2329 : : /* Expression eval may have used space in ExprContext too */
1794 tgl@sss.pgh.pa.us 2330 [ + + ]: 1895 : if (pprune->exec_pruning_steps)
740 alvherre@alvh.no-ip. 2331 : 1705 : ResetExprContext(pprune->exec_context.exprcontext);
2332 : : }
2333 : :
2334 : : /* Add in any subplans that partition pruning didn't account for */
1977 tgl@sss.pgh.pa.us 2335 : 1874 : result = bms_add_members(result, prunestate->other_subplans);
2336 : :
2199 alvherre@alvh.no-ip. 2337 : 1874 : MemoryContextSwitchTo(oldcontext);
2338 : :
2339 : : /* Copy result out of the temp context before we reset it */
2340 : 1874 : result = bms_copy(result);
2341 : :
2342 : 1874 : MemoryContextReset(prunestate->prune_context);
2343 : :
2344 : 1874 : return result;
2345 : : }
2346 : :
2347 : : /*
2348 : : * find_matching_subplans_recurse
2349 : : * Recursive worker function for ExecFindMatchingSubPlans
2350 : : *
2351 : : * Adds valid (non-prunable) subplan IDs to *validsubplans
2352 : : */
2353 : : static void
2083 tgl@sss.pgh.pa.us 2354 : 2102 : find_matching_subplans_recurse(PartitionPruningData *prunedata,
2355 : : PartitionedRelPruningData *pprune,
2356 : : bool initial_prune,
2357 : : Bitmapset **validsubplans)
2358 : : {
2359 : : Bitmapset *partset;
2360 : : int i;
2361 : :
2362 : : /* Guard against stack overflow due to overly deep partition hierarchy. */
2199 alvherre@alvh.no-ip. 2363 : 2102 : check_stack_depth();
2364 : :
2365 : : /*
2366 : : * Prune as appropriate, if we have pruning steps matching the current
2367 : : * execution context. Otherwise just include all partitions at this
2368 : : * level.
2369 : : */
1794 tgl@sss.pgh.pa.us 2370 [ + + + + ]: 2102 : if (initial_prune && pprune->initial_pruning_steps)
2371 : 193 : partset = get_matching_partitions(&pprune->initial_context,
2372 : : pprune->initial_pruning_steps);
2373 [ + + + + ]: 1909 : else if (!initial_prune && pprune->exec_pruning_steps)
2374 : 1738 : partset = get_matching_partitions(&pprune->exec_context,
2375 : : pprune->exec_pruning_steps);
2376 : : else
2199 alvherre@alvh.no-ip. 2377 : 171 : partset = pprune->present_parts;
2378 : :
2379 : : /* Translate partset into subplan indexes */
2380 : 2102 : i = -1;
2381 [ + + ]: 2944 : while ((i = bms_next_member(partset, i)) >= 0)
2382 : : {
2135 tgl@sss.pgh.pa.us 2383 [ + + ]: 842 : if (pprune->subplan_map[i] >= 0)
2199 alvherre@alvh.no-ip. 2384 : 634 : *validsubplans = bms_add_member(*validsubplans,
2135 tgl@sss.pgh.pa.us 2385 : 634 : pprune->subplan_map[i]);
2386 : : else
2387 : : {
2199 alvherre@alvh.no-ip. 2388 : 208 : int partidx = pprune->subpart_map[i];
2389 : :
2135 tgl@sss.pgh.pa.us 2390 [ + + ]: 208 : if (partidx >= 0)
2083 2391 : 207 : find_matching_subplans_recurse(prunedata,
2392 : : &prunedata->partrelprunedata[partidx],
2393 : : initial_prune, validsubplans);
2394 : : else
2395 : : {
2396 : : /*
2397 : : * We get here if the planner already pruned all the sub-
2398 : : * partitions for this partition. Silently ignore this
2399 : : * partition in this case. The end result is the same: we
2400 : : * would have pruned all partitions just the same, but we
2401 : : * don't have any pruning steps to execute to verify this.
2402 : : */
2403 : : }
2404 : : }
2405 : : }
2199 alvherre@alvh.no-ip. 2406 : 2102 : }
|