LCOV - differential code coverage report
Current view: top level - src/backend/partitioning - partdesc.c (source / functions) Coverage Total Hit UIC UBC GIC GNC CBC EUB ECB
Current: Differential Code Coverage HEAD vs 15 Lines: 89.4 % 123 110 1 12 39 3 68 1 42
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 6 6 4 1 1 4
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * partdesc.c
       4                 :  *      Support routines for manipulating partition descriptors
       5                 :  *
       6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       7                 :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :  *
       9                 :  * IDENTIFICATION
      10                 :  *        src/backend/partitioning/partdesc.c
      11                 :  *
      12                 :  *-------------------------------------------------------------------------
      13                 :  */
      14                 : 
      15                 : #include "postgres.h"
      16                 : 
      17                 : #include "access/genam.h"
      18                 : #include "access/htup_details.h"
      19                 : #include "access/table.h"
      20                 : #include "catalog/partition.h"
      21                 : #include "catalog/pg_inherits.h"
      22                 : #include "partitioning/partbounds.h"
      23                 : #include "partitioning/partdesc.h"
      24                 : #include "storage/bufmgr.h"
      25                 : #include "storage/sinval.h"
      26                 : #include "utils/builtins.h"
      27                 : #include "utils/fmgroids.h"
      28                 : #include "utils/hsearch.h"
      29                 : #include "utils/inval.h"
      30                 : #include "utils/lsyscache.h"
      31                 : #include "utils/memutils.h"
      32                 : #include "utils/partcache.h"
      33                 : #include "utils/rel.h"
      34                 : #include "utils/syscache.h"
      35                 : 
      36                 : typedef struct PartitionDirectoryData
      37                 : {
      38                 :     MemoryContext pdir_mcxt;
      39                 :     HTAB       *pdir_hash;
      40                 :     bool        omit_detached;
      41                 : }           PartitionDirectoryData;
      42                 : 
      43                 : typedef struct PartitionDirectoryEntry
      44                 : {
      45                 :     Oid         reloid;
      46                 :     Relation    rel;
      47                 :     PartitionDesc pd;
      48                 : } PartitionDirectoryEntry;
      49                 : 
      50                 : static PartitionDesc RelationBuildPartitionDesc(Relation rel,
      51                 :                                                 bool omit_detached);
      52                 : 
      53                 : 
      54                 : /*
      55                 :  * RelationGetPartitionDesc -- get partition descriptor, if relation is partitioned
      56                 :  *
      57                 :  * We keep two partdescs in relcache: rd_partdesc includes all partitions
      58                 :  * (even those being concurrently marked detached), while rd_partdesc_nodetach
      59                 :  * omits (some of) those.  We store the pg_inherits.xmin value for the latter,
      60                 :  * to determine whether it can be validly reused in each case, since that
      61                 :  * depends on the active snapshot.
      62                 :  *
      63                 :  * Note: we arrange for partition descriptors to not get freed until the
      64                 :  * relcache entry's refcount goes to zero (see hacks in RelationClose,
      65                 :  * RelationClearRelation, and RelationBuildPartitionDesc).  Therefore, even
      66                 :  * though we hand back a direct pointer into the relcache entry, it's safe
      67                 :  * for callers to continue to use that pointer as long as (a) they hold the
      68                 :  * relation open, and (b) they hold a relation lock strong enough to ensure
      69                 :  * that the data doesn't become stale.
      70                 :  */
      71                 : PartitionDesc
      72 CBC       27537 : RelationGetPartitionDesc(Relation rel, bool omit_detached)
      73                 : {
      74           27537 :     Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
      75                 : 
      76                 :     /*
      77                 :      * If relcache has a partition descriptor, use that.  However, we can only
      78                 :      * do so when we are asked to include all partitions including detached;
      79                 :      * and also when we know that there are no detached partitions.
      80                 :      *
      81                 :      * If there is no active snapshot, detached partitions aren't omitted
      82                 :      * either, so we can use the cached descriptor too in that case.
      83                 :      */
      84           27537 :     if (likely(rel->rd_partdesc &&
      85                 :                (!rel->rd_partdesc->detached_exist || !omit_detached ||
      86                 :                 !ActiveSnapshotSet())))
      87           17702 :         return rel->rd_partdesc;
      88                 : 
      89                 :     /*
      90                 :      * If we're asked to omit detached partitions, we may be able to use a
      91                 :      * cached descriptor too.  We determine that based on the pg_inherits.xmin
      92                 :      * that was saved alongside that descriptor: if the xmin that was not in
      93                 :      * progress for that active snapshot is also not in progress for the
      94                 :      * current active snapshot, then we can use it.  Otherwise build one from
      95                 :      * scratch.
      96                 :      */
      97            9835 :     if (omit_detached &&
      98            9557 :         rel->rd_partdesc_nodetached &&
      99               7 :         ActiveSnapshotSet())
     100                 :     {
     101                 :         Snapshot    activesnap;
     102                 : 
     103               7 :         Assert(TransactionIdIsValid(rel->rd_partdesc_nodetached_xmin));
     104               7 :         activesnap = GetActiveSnapshot();
     105                 : 
     106               7 :         if (!XidInMVCCSnapshot(rel->rd_partdesc_nodetached_xmin, activesnap))
     107               7 :             return rel->rd_partdesc_nodetached;
     108                 :     }
     109                 : 
     110            9828 :     return RelationBuildPartitionDesc(rel, omit_detached);
     111                 : }
     112                 : 
     113                 : /*
     114                 :  * RelationBuildPartitionDesc
     115                 :  *      Form rel's partition descriptor, and store in relcache entry
     116                 :  *
     117                 :  * Partition descriptor is a complex structure; to avoid complicated logic to
     118                 :  * free individual elements whenever the relcache entry is flushed, we give it
     119                 :  * its own memory context, a child of CacheMemoryContext, which can easily be
     120                 :  * deleted on its own.  To avoid leaking memory in that context in case of an
     121                 :  * error partway through this function, the context is initially created as a
     122                 :  * child of CurTransactionContext and only re-parented to CacheMemoryContext
     123                 :  * at the end, when no further errors are possible.  Also, we don't make this
     124                 :  * context the current context except in very brief code sections, out of fear
     125                 :  * that some of our callees allocate memory on their own which would be leaked
     126                 :  * permanently.
     127                 :  *
     128                 :  * As a special case, partition descriptors that are requested to omit
     129                 :  * partitions being detached (and which contain such partitions) are transient
     130                 :  * and are not associated with the relcache entry.  Such descriptors only last
     131                 :  * through the requesting Portal, so we use the corresponding memory context
     132                 :  * for them.
     133                 :  */
     134                 : static PartitionDesc
     135            9828 : RelationBuildPartitionDesc(Relation rel, bool omit_detached)
     136                 : {
     137                 :     PartitionDesc partdesc;
     138            9828 :     PartitionBoundInfo boundinfo = NULL;
     139                 :     List       *inhoids;
     140            9828 :     PartitionBoundSpec **boundspecs = NULL;
     141            9828 :     Oid        *oids = NULL;
     142            9828 :     bool       *is_leaf = NULL;
     143                 :     bool        detached_exist;
     144                 :     bool        is_omit;
     145                 :     TransactionId detached_xmin;
     146                 :     ListCell   *cell;
     147                 :     int         i,
     148                 :                 nparts;
     149            9828 :     PartitionKey key = RelationGetPartitionKey(rel);
     150                 :     MemoryContext new_pdcxt;
     151                 :     MemoryContext oldcxt;
     152                 :     int        *mapping;
     153                 : 
     154                 :     /*
     155                 :      * Get partition oids from pg_inherits.  This uses a single snapshot to
     156                 :      * fetch the list of children, so while more children may be getting added
     157                 :      * concurrently, whatever this function returns will be accurate as of
     158                 :      * some well-defined point in time.
     159                 :      */
     160            9828 :     detached_exist = false;
     161            9828 :     detached_xmin = InvalidTransactionId;
     162            9828 :     inhoids = find_inheritance_children_extended(RelationGetRelid(rel),
     163                 :                                                  omit_detached, NoLock,
     164                 :                                                  &detached_exist,
     165                 :                                                  &detached_xmin);
     166                 : 
     167            9828 :     nparts = list_length(inhoids);
     168                 : 
     169                 :     /* Allocate working arrays for OIDs, leaf flags, and boundspecs. */
     170            9828 :     if (nparts > 0)
     171                 :     {
     172            7269 :         oids = (Oid *) palloc(nparts * sizeof(Oid));
     173            7269 :         is_leaf = (bool *) palloc(nparts * sizeof(bool));
     174            7269 :         boundspecs = palloc(nparts * sizeof(PartitionBoundSpec *));
     175                 :     }
     176                 : 
     177                 :     /* Collect bound spec nodes for each partition. */
     178            9828 :     i = 0;
     179           24377 :     foreach(cell, inhoids)
     180                 :     {
     181           14549 :         Oid         inhrelid = lfirst_oid(cell);
     182                 :         HeapTuple   tuple;
     183           14549 :         PartitionBoundSpec *boundspec = NULL;
     184                 : 
     185                 :         /* Try fetching the tuple from the catcache, for speed. */
     186           14549 :         tuple = SearchSysCache1(RELOID, inhrelid);
     187           14549 :         if (HeapTupleIsValid(tuple))
     188                 :         {
     189                 :             Datum       datum;
     190                 :             bool        isnull;
     191                 : 
     192           14549 :             datum = SysCacheGetAttr(RELOID, tuple,
     193                 :                                     Anum_pg_class_relpartbound,
     194                 :                                     &isnull);
     195           14549 :             if (!isnull)
     196           14549 :                 boundspec = stringToNode(TextDatumGetCString(datum));
     197           14549 :             ReleaseSysCache(tuple);
     198                 :         }
     199                 : 
     200                 :         /*
     201                 :          * The system cache may be out of date; if so, we may find no pg_class
     202                 :          * tuple or an old one where relpartbound is NULL.  In that case, try
     203                 :          * the table directly.  We can't just AcceptInvalidationMessages() and
     204                 :          * retry the system cache lookup because it's possible that a
     205                 :          * concurrent ATTACH PARTITION operation has removed itself from the
     206                 :          * ProcArray but not yet added invalidation messages to the shared
     207                 :          * queue; InvalidateSystemCaches() would work, but seems excessive.
     208                 :          *
     209                 :          * Note that this algorithm assumes that PartitionBoundSpec we manage
     210                 :          * to fetch is the right one -- so this is only good enough for
     211                 :          * concurrent ATTACH PARTITION, not concurrent DETACH PARTITION or
     212                 :          * some hypothetical operation that changes the partition bounds.
     213                 :          */
     214           14549 :         if (boundspec == NULL)
     215                 :         {
     216                 :             Relation    pg_class;
     217                 :             SysScanDesc scan;
     218                 :             ScanKeyData key[1];
     219                 :             Datum       datum;
     220                 :             bool        isnull;
     221                 : 
     222 UBC           0 :             pg_class = table_open(RelationRelationId, AccessShareLock);
     223               0 :             ScanKeyInit(&key[0],
     224                 :                         Anum_pg_class_oid,
     225                 :                         BTEqualStrategyNumber, F_OIDEQ,
     226                 :                         ObjectIdGetDatum(inhrelid));
     227               0 :             scan = systable_beginscan(pg_class, ClassOidIndexId, true,
     228                 :                                       NULL, 1, key);
     229               0 :             tuple = systable_getnext(scan);
     230               0 :             datum = heap_getattr(tuple, Anum_pg_class_relpartbound,
     231                 :                                  RelationGetDescr(pg_class), &isnull);
     232               0 :             if (!isnull)
     233               0 :                 boundspec = stringToNode(TextDatumGetCString(datum));
     234               0 :             systable_endscan(scan);
     235               0 :             table_close(pg_class, AccessShareLock);
     236                 :         }
     237                 : 
     238                 :         /* Sanity checks. */
     239 CBC       14549 :         if (!boundspec)
     240 UBC           0 :             elog(ERROR, "missing relpartbound for relation %u", inhrelid);
     241 CBC       14549 :         if (!IsA(boundspec, PartitionBoundSpec))
     242 UBC           0 :             elog(ERROR, "invalid relpartbound for relation %u", inhrelid);
     243                 : 
     244                 :         /*
     245                 :          * If the PartitionBoundSpec says this is the default partition, its
     246                 :          * OID should match pg_partitioned_table.partdefid; if not, the
     247                 :          * catalog is corrupt.
     248                 :          */
     249 CBC       14549 :         if (boundspec->is_default)
     250                 :         {
     251                 :             Oid         partdefid;
     252                 : 
     253             800 :             partdefid = get_default_partition_oid(RelationGetRelid(rel));
     254             800 :             if (partdefid != inhrelid)
     255 UBC           0 :                 elog(ERROR, "expected partdefid %u, but got %u",
     256                 :                      inhrelid, partdefid);
     257                 :         }
     258                 : 
     259                 :         /* Save results. */
     260 CBC       14549 :         oids[i] = inhrelid;
     261           14549 :         is_leaf[i] = (get_rel_relkind(inhrelid) != RELKIND_PARTITIONED_TABLE);
     262           14549 :         boundspecs[i] = boundspec;
     263           14549 :         ++i;
     264                 :     }
     265                 : 
     266                 :     /*
     267                 :      * Create PartitionBoundInfo and mapping, working in the caller's context.
     268                 :      * This could fail, but we haven't done any damage if so.
     269                 :      */
     270            9828 :     if (nparts > 0)
     271            7269 :         boundinfo = partition_bounds_create(boundspecs, nparts, key, &mapping);
     272                 : 
     273                 :     /*
     274                 :      * Now build the actual relcache partition descriptor, copying all the
     275                 :      * data into a new, small context.  As per above comment, we don't make
     276                 :      * this a long-lived context until it's finished.
     277                 :      */
     278            9828 :     new_pdcxt = AllocSetContextCreate(CurTransactionContext,
     279                 :                                       "partition descriptor",
     280                 :                                       ALLOCSET_SMALL_SIZES);
     281            9828 :     MemoryContextCopyAndSetIdentifier(new_pdcxt,
     282                 :                                       RelationGetRelationName(rel));
     283                 : 
     284                 :     partdesc = (PartitionDescData *)
     285            9828 :         MemoryContextAllocZero(new_pdcxt, sizeof(PartitionDescData));
     286            9828 :     partdesc->nparts = nparts;
     287            9828 :     partdesc->detached_exist = detached_exist;
     288                 :     /* If there are no partitions, the rest of the partdesc can stay zero */
     289            9828 :     if (nparts > 0)
     290                 :     {
     291            7269 :         oldcxt = MemoryContextSwitchTo(new_pdcxt);
     292            7269 :         partdesc->boundinfo = partition_bounds_copy(boundinfo, key);
     293                 : 
     294                 :         /* Initialize caching fields for speeding up ExecFindPartition */
     295 GNC        7269 :         partdesc->last_found_datum_index = -1;
     296            7269 :         partdesc->last_found_part_index = -1;
     297            7269 :         partdesc->last_found_count = 0;
     298                 : 
     299 GIC        7269 :         partdesc->oids = (Oid *) palloc(nparts * sizeof(Oid));
     300            7269 :         partdesc->is_leaf = (bool *) palloc(nparts * sizeof(bool));
     301 ECB             : 
     302                 :         /*
     303                 :          * Assign OIDs from the original array into mapped indexes of the
     304                 :          * result array.  The order of OIDs in the former is defined by the
     305                 :          * catalog scan that retrieved them, whereas that in the latter is
     306                 :          * defined by canonicalized representation of the partition bounds.
     307                 :          * Also save leaf-ness of each partition.
     308                 :          */
     309 GIC       21818 :         for (i = 0; i < nparts; i++)
     310                 :         {
     311           14549 :             int         index = mapping[i];
     312                 : 
     313           14549 :             partdesc->oids[index] = oids[i];
     314           14549 :             partdesc->is_leaf[index] = is_leaf[i];
     315 ECB             :         }
     316 GIC        7269 :         MemoryContextSwitchTo(oldcxt);
     317 ECB             :     }
     318                 : 
     319                 :     /*
     320                 :      * Are we working with the partdesc that omits the detached partition, or
     321                 :      * the one that includes it?
     322                 :      *
     323                 :      * Note that if a partition was found by the catalog's scan to have been
     324                 :      * detached, but the pg_inherit tuple saying so was not visible to the
     325                 :      * active snapshot (find_inheritance_children_extended will not have set
     326                 :      * detached_xmin in that case), we consider there to be no "omittable"
     327                 :      * detached partitions.
     328                 :      */
     329 GIC        9873 :     is_omit = omit_detached && detached_exist && ActiveSnapshotSet() &&
     330              45 :         TransactionIdIsValid(detached_xmin);
     331                 : 
     332                 :     /*
     333                 :      * We have a fully valid partdesc.  Reparent it so that it has the right
     334                 :      * lifespan.
     335 ECB             :      */
     336 CBC        9828 :     MemoryContextSetParent(new_pdcxt, CacheMemoryContext);
     337                 : 
     338                 :     /*
     339                 :      * Store it into relcache.
     340                 :      *
     341                 :      * But first, a kluge: if there's an old context for this type of
     342 ECB             :      * descriptor, it contains an old partition descriptor that may still be
     343                 :      * referenced somewhere.  Preserve it, while not leaking it, by
     344                 :      * reattaching it as a child context of the new one.  Eventually it will
     345                 :      * get dropped by either RelationClose or RelationClearRelation. (We keep
     346                 :      * the regular partdesc in rd_pdcxt, and the partdesc-excluding-
     347                 :      * detached-partitions in rd_pddcxt.)
     348                 :      */
     349 GIC        9828 :     if (is_omit)
     350                 :     {
     351              33 :         if (rel->rd_pddcxt != NULL)
     352 UIC           0 :             MemoryContextSetParent(rel->rd_pddcxt, new_pdcxt);
     353 GIC          33 :         rel->rd_pddcxt = new_pdcxt;
     354              33 :         rel->rd_partdesc_nodetached = partdesc;
     355 ECB             : 
     356                 :         /*
     357                 :          * For partdescs built excluding detached partitions, which we save
     358 EUB             :          * separately, we also record the pg_inherits.xmin of the detached
     359 ECB             :          * partition that was omitted; this informs a future potential user of
     360                 :          * such a cached partdesc to only use it after cross-checking that the
     361                 :          * xmin is indeed visible to the snapshot it is going to be working
     362                 :          * with.
     363                 :          */
     364 GIC          33 :         Assert(TransactionIdIsValid(detached_xmin));
     365              33 :         rel->rd_partdesc_nodetached_xmin = detached_xmin;
     366                 :     }
     367                 :     else
     368                 :     {
     369            9795 :         if (rel->rd_pdcxt != NULL)
     370 CBC        2050 :             MemoryContextSetParent(rel->rd_pdcxt, new_pdcxt);
     371            9795 :         rel->rd_pdcxt = new_pdcxt;
     372 GIC        9795 :         rel->rd_partdesc = partdesc;
     373                 :     }
     374                 : 
     375 CBC        9828 :     return partdesc;
     376 ECB             : }
     377                 : 
     378                 : /*
     379                 :  * CreatePartitionDirectory
     380                 :  *      Create a new partition directory object.
     381                 :  */
     382                 : PartitionDirectory
     383 GIC        8436 : CreatePartitionDirectory(MemoryContext mcxt, bool omit_detached)
     384                 : {
     385            8436 :     MemoryContext oldcontext = MemoryContextSwitchTo(mcxt);
     386                 :     PartitionDirectory pdir;
     387                 :     HASHCTL     ctl;
     388                 : 
     389 CBC        8436 :     pdir = palloc(sizeof(PartitionDirectoryData));
     390 GIC        8436 :     pdir->pdir_mcxt = mcxt;
     391 ECB             : 
     392 GIC        8436 :     ctl.keysize = sizeof(Oid);
     393            8436 :     ctl.entrysize = sizeof(PartitionDirectoryEntry);
     394            8436 :     ctl.hcxt = mcxt;
     395 ECB             : 
     396 CBC        8436 :     pdir->pdir_hash = hash_create("partition directory", 256, &ctl,
     397                 :                                   HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
     398            8436 :     pdir->omit_detached = omit_detached;
     399 ECB             : 
     400 CBC        8436 :     MemoryContextSwitchTo(oldcontext);
     401 GIC        8436 :     return pdir;
     402 ECB             : }
     403                 : 
     404                 : /*
     405                 :  * PartitionDirectoryLookup
     406                 :  *      Look up the partition descriptor for a relation in the directory.
     407                 :  *
     408                 :  * The purpose of this function is to ensure that we get the same
     409                 :  * PartitionDesc for each relation every time we look it up.  In the
     410                 :  * face of concurrent DDL, different PartitionDescs may be constructed with
     411                 :  * different views of the catalog state, but any single particular OID
     412                 :  * will always get the same PartitionDesc for as long as the same
     413                 :  * PartitionDirectory is used.
     414                 :  */
     415                 : PartitionDesc
     416 GIC       19246 : PartitionDirectoryLookup(PartitionDirectory pdir, Relation rel)
     417                 : {
     418                 :     PartitionDirectoryEntry *pde;
     419           19246 :     Oid         relid = RelationGetRelid(rel);
     420                 :     bool        found;
     421                 : 
     422 CBC       19246 :     pde = hash_search(pdir->pdir_hash, &relid, HASH_ENTER, &found);
     423 GIC       19246 :     if (!found)
     424                 :     {
     425 ECB             :         /*
     426                 :          * We must keep a reference count on the relation so that the
     427                 :          * PartitionDesc to which we are pointing can't get destroyed.
     428                 :          */
     429 CBC       11628 :         RelationIncrementReferenceCount(rel);
     430 GIC       11628 :         pde->rel = rel;
     431           11628 :         pde->pd = RelationGetPartitionDesc(rel, pdir->omit_detached);
     432           11628 :         Assert(pde->pd != NULL);
     433                 :     }
     434           19246 :     return pde->pd;
     435 ECB             : }
     436                 : 
     437                 : /*
     438                 :  * DestroyPartitionDirectory
     439                 :  *      Destroy a partition directory.
     440                 :  *
     441                 :  * Release the reference counts we're holding.
     442                 :  */
     443                 : void
     444 GIC        8096 : DestroyPartitionDirectory(PartitionDirectory pdir)
     445                 : {
     446                 :     HASH_SEQ_STATUS status;
     447                 :     PartitionDirectoryEntry *pde;
     448                 : 
     449            8096 :     hash_seq_init(&status, pdir->pdir_hash);
     450 CBC       19278 :     while ((pde = hash_seq_search(&status)) != NULL)
     451 GIC       11182 :         RelationDecrementReferenceCount(pde->rel);
     452            8096 : }
     453                 : 
     454                 : /*
     455 ECB             :  * get_default_oid_from_partdesc
     456                 :  *
     457                 :  * Given a partition descriptor, return the OID of the default partition, if
     458                 :  * one exists; else, return InvalidOid.
     459                 :  */
     460                 : Oid
     461 GIC        9203 : get_default_oid_from_partdesc(PartitionDesc partdesc)
     462                 : {
     463            9203 :     if (partdesc && partdesc->boundinfo &&
     464            5527 :         partition_bound_has_default(partdesc->boundinfo))
     465             629 :         return partdesc->oids[partdesc->boundinfo->default_index];
     466                 : 
     467 CBC        8574 :     return InvalidOid;
     468                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a