LCOV - differential code coverage report
Current view: top level - src/backend/commands - cluster.c (source / functions) Coverage Total Hit UNC LBC UIC UBC GBC GIC GNC CBC EUB ECB DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 90.6 % 457 414 10 5 34 3 6 276 30 102 32 284 2 14
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 13 13 12 1 13
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * cluster.c
       4                 :  *    CLUSTER a table on an index.  This is now also used for VACUUM FULL.
       5                 :  *
       6                 :  * There is hardly anything left of Paul Brown's original implementation...
       7                 :  *
       8                 :  *
       9                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      10                 :  * Portions Copyright (c) 1994-5, Regents of the University of California
      11                 :  *
      12                 :  *
      13                 :  * IDENTIFICATION
      14                 :  *    src/backend/commands/cluster.c
      15                 :  *
      16                 :  *-------------------------------------------------------------------------
      17                 :  */
      18                 : #include "postgres.h"
      19                 : 
      20                 : #include "access/amapi.h"
      21                 : #include "access/heapam.h"
      22                 : #include "access/multixact.h"
      23                 : #include "access/relscan.h"
      24                 : #include "access/tableam.h"
      25                 : #include "access/toast_internals.h"
      26                 : #include "access/transam.h"
      27                 : #include "access/xact.h"
      28                 : #include "access/xlog.h"
      29                 : #include "catalog/catalog.h"
      30                 : #include "catalog/dependency.h"
      31                 : #include "catalog/heap.h"
      32                 : #include "catalog/index.h"
      33                 : #include "catalog/namespace.h"
      34                 : #include "catalog/objectaccess.h"
      35                 : #include "catalog/partition.h"
      36                 : #include "catalog/pg_am.h"
      37                 : #include "catalog/pg_database.h"
      38                 : #include "catalog/pg_inherits.h"
      39                 : #include "catalog/toasting.h"
      40                 : #include "commands/cluster.h"
      41                 : #include "commands/defrem.h"
      42                 : #include "commands/progress.h"
      43                 : #include "commands/tablecmds.h"
      44                 : #include "commands/vacuum.h"
      45                 : #include "miscadmin.h"
      46                 : #include "optimizer/optimizer.h"
      47                 : #include "pgstat.h"
      48                 : #include "storage/bufmgr.h"
      49                 : #include "storage/lmgr.h"
      50                 : #include "storage/predicate.h"
      51                 : #include "utils/acl.h"
      52                 : #include "utils/fmgroids.h"
      53                 : #include "utils/guc.h"
      54                 : #include "utils/inval.h"
      55                 : #include "utils/lsyscache.h"
      56                 : #include "utils/memutils.h"
      57                 : #include "utils/pg_rusage.h"
      58                 : #include "utils/relmapper.h"
      59                 : #include "utils/snapmgr.h"
      60                 : #include "utils/syscache.h"
      61                 : #include "utils/tuplesort.h"
      62                 : 
      63                 : /*
      64                 :  * This struct is used to pass around the information on tables to be
      65                 :  * clustered. We need this so we can make a list of them when invoked without
      66                 :  * a specific table/index pair.
      67                 :  */
      68                 : typedef struct
      69                 : {
      70                 :     Oid         tableOid;
      71                 :     Oid         indexOid;
      72                 : } RelToCluster;
      73                 : 
      74                 : 
      75                 : static void cluster_multiple_rels(List *rtcs, ClusterParams *params);
      76                 : static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose);
      77                 : static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
      78                 :                             bool verbose, bool *pSwapToastByContent,
      79                 :                             TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
      80                 : static List *get_tables_to_cluster(MemoryContext cluster_context);
      81                 : static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context,
      82                 :                                                Oid indexOid);
      83                 : static bool cluster_is_permitted_for_relation(Oid relid, Oid userid);
      84                 : 
      85                 : 
      86                 : /*---------------------------------------------------------------------------
      87                 :  * This cluster code allows for clustering multiple tables at once. Because
      88                 :  * of this, we cannot just run everything on a single transaction, or we
      89                 :  * would be forced to acquire exclusive locks on all the tables being
      90                 :  * clustered, simultaneously --- very likely leading to deadlock.
      91                 :  *
      92                 :  * To solve this we follow a similar strategy to VACUUM code,
      93                 :  * clustering each relation in a separate transaction. For this to work,
      94                 :  * we need to:
      95                 :  *  - provide a separate memory context so that we can pass information in
      96                 :  *    a way that survives across transactions
      97                 :  *  - start a new transaction every time a new relation is clustered
      98                 :  *  - check for validity of the information on to-be-clustered relations,
      99                 :  *    as someone might have deleted a relation behind our back, or
     100                 :  *    clustered one on a different index
     101                 :  *  - end the transaction
     102                 :  *
     103                 :  * The single-relation case does not have any such overhead.
     104                 :  *
     105                 :  * We also allow a relation to be specified without index.  In that case,
     106                 :  * the indisclustered bit will be looked up, and an ERROR will be thrown
     107                 :  * if there is no index with the bit set.
     108                 :  *---------------------------------------------------------------------------
     109                 :  */
     110                 : void
     111 GIC         106 : cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
     112                 : {
     113                 :     ListCell   *lc;
     114 CBC         106 :     ClusterParams params = {0};
     115 GIC         106 :     bool        verbose = false;
     116             106 :     Relation    rel = NULL;
     117 CBC         106 :     Oid         indexOid = InvalidOid;
     118 ECB             :     MemoryContext cluster_context;
     119                 :     List       *rtcs;
     120                 : 
     121                 :     /* Parse option list */
     122 GIC         112 :     foreach(lc, stmt->params)
     123                 :     {
     124               6 :         DefElem    *opt = (DefElem *) lfirst(lc);
     125 ECB             : 
     126 GIC           6 :         if (strcmp(opt->defname, "verbose") == 0)
     127 CBC           6 :             verbose = defGetBoolean(opt);
     128                 :         else
     129 LBC           0 :             ereport(ERROR,
     130 ECB             :                     (errcode(ERRCODE_SYNTAX_ERROR),
     131                 :                      errmsg("unrecognized CLUSTER option \"%s\"",
     132 EUB             :                             opt->defname),
     133                 :                      parser_errposition(pstate, opt->location)));
     134                 :     }
     135                 : 
     136 GIC         106 :     params.options = (verbose ? CLUOPT_VERBOSE : 0);
     137                 : 
     138             106 :     if (stmt->relation != NULL)
     139 ECB             :     {
     140                 :         /* This is the single-relation case. */
     141                 :         Oid         tableOid;
     142                 : 
     143                 :         /*
     144                 :          * Find, lock, and check permissions on the table.  We obtain
     145                 :          * AccessExclusiveLock right away to avoid lock-upgrade hazard in the
     146                 :          * single-transaction case.
     147                 :          */
     148 GIC          94 :         tableOid = RangeVarGetRelidExtended(stmt->relation,
     149                 :                                             AccessExclusiveLock,
     150                 :                                             0,
     151                 :                                             RangeVarCallbackMaintainsTable,
     152                 :                                             NULL);
     153              91 :         rel = table_open(tableOid, NoLock);
     154                 : 
     155                 :         /*
     156                 :          * Reject clustering a remote temp table ... their local buffer
     157 ECB             :          * manager is not going to cope.
     158                 :          */
     159 GIC          91 :         if (RELATION_IS_OTHER_TEMP(rel))
     160 UIC           0 :             ereport(ERROR,
     161                 :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     162                 :                      errmsg("cannot cluster temporary tables of other sessions")));
     163 ECB             : 
     164 GBC          91 :         if (stmt->indexname == NULL)
     165                 :         {
     166                 :             ListCell   *index;
     167                 : 
     168 ECB             :             /* We need to find the index that has indisclustered set. */
     169 GIC          21 :             foreach(index, RelationGetIndexList(rel))
     170                 :             {
     171              15 :                 indexOid = lfirst_oid(index);
     172              15 :                 if (get_index_isclustered(indexOid))
     173 CBC           9 :                     break;
     174 GIC           6 :                 indexOid = InvalidOid;
     175 ECB             :             }
     176                 : 
     177 CBC          15 :             if (!OidIsValid(indexOid))
     178               6 :                 ereport(ERROR,
     179                 :                         (errcode(ERRCODE_UNDEFINED_OBJECT),
     180                 :                          errmsg("there is no previously clustered index for table \"%s\"",
     181 ECB             :                                 stmt->relation->relname)));
     182                 :         }
     183                 :         else
     184                 :         {
     185                 :             /*
     186                 :              * The index is expected to be in the same namespace as the
     187                 :              * relation.
     188                 :              */
     189 GIC          76 :             indexOid = get_relname_relid(stmt->indexname,
     190              76 :                                          rel->rd_rel->relnamespace);
     191              76 :             if (!OidIsValid(indexOid))
     192 UIC           0 :                 ereport(ERROR,
     193 ECB             :                         (errcode(ERRCODE_UNDEFINED_OBJECT),
     194                 :                          errmsg("index \"%s\" for table \"%s\" does not exist",
     195                 :                                 stmt->indexname, stmt->relation->relname)));
     196 EUB             :         }
     197                 : 
     198 GIC          85 :         if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
     199                 :         {
     200                 :             /* close relation, keep lock till commit */
     201              72 :             table_close(rel, NoLock);
     202 ECB             : 
     203                 :             /* Do the job. */
     204 GIC          72 :             cluster_rel(tableOid, indexOid, &params);
     205 ECB             : 
     206 GIC          72 :             return;
     207                 :         }
     208 ECB             :     }
     209                 : 
     210                 :     /*
     211                 :      * By here, we know we are in a multi-table situation.  In order to avoid
     212                 :      * holding locks for too long, we want to process each table in its own
     213                 :      * transaction.  This forces us to disallow running inside a user
     214                 :      * transaction block.
     215                 :      */
     216 GIC          25 :     PreventInTransactionBlock(isTopLevel, "CLUSTER");
     217                 : 
     218                 :     /* Also, we need a memory context to hold our list of relations */
     219              25 :     cluster_context = AllocSetContextCreate(PortalContext,
     220 ECB             :                                             "Cluster",
     221                 :                                             ALLOCSET_DEFAULT_SIZES);
     222                 : 
     223                 :     /*
     224                 :      * Either we're processing a partitioned table, or we were not given any
     225                 :      * table name at all.  In either case, obtain a list of relations to
     226                 :      * process.
     227                 :      *
     228                 :      * In the former case, an index name must have been given, so we don't
     229                 :      * need to recheck its "indisclustered" bit, but we have to check that it
     230                 :      * is an index that we can cluster on.  In the latter case, we set the
     231                 :      * option bit to have indisclustered verified.
     232                 :      *
     233                 :      * Rechecking the relation itself is necessary here in all cases.
     234                 :      */
     235 GIC          25 :     params.options |= CLUOPT_RECHECK;
     236              25 :     if (rel != NULL)
     237                 :     {
     238              13 :         Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
     239 CBC          13 :         check_index_is_clusterable(rel, indexOid, AccessShareLock);
     240              10 :         rtcs = get_tables_to_cluster_partitioned(cluster_context, indexOid);
     241                 : 
     242 ECB             :         /* close relation, releasing lock on parent table */
     243 CBC          10 :         table_close(rel, AccessExclusiveLock);
     244 ECB             :     }
     245                 :     else
     246                 :     {
     247 CBC          12 :         rtcs = get_tables_to_cluster(cluster_context);
     248 GIC          12 :         params.options |= CLUOPT_RECHECK_ISCLUSTERED;
     249                 :     }
     250                 : 
     251 ECB             :     /* Do the job. */
     252 CBC          22 :     cluster_multiple_rels(rtcs, &params);
     253                 : 
     254                 :     /* Start a new transaction for the cleanup work. */
     255 GIC          22 :     StartTransactionCommand();
     256 ECB             : 
     257                 :     /* Clean up working storage */
     258 GIC          22 :     MemoryContextDelete(cluster_context);
     259 ECB             : }
     260                 : 
     261                 : /*
     262                 :  * Given a list of relations to cluster, process each of them in a separate
     263                 :  * transaction.
     264                 :  *
     265                 :  * We expect to be in a transaction at start, but there isn't one when we
     266                 :  * return.
     267                 :  */
     268                 : static void
     269 GIC          22 : cluster_multiple_rels(List *rtcs, ClusterParams *params)
     270                 : {
     271                 :     ListCell   *lc;
     272                 : 
     273 ECB             :     /* Commit to get out of starting transaction */
     274 GIC          22 :     PopActiveSnapshot();
     275              22 :     CommitTransactionCommand();
     276                 : 
     277                 :     /* Cluster the tables, each in a separate transaction */
     278 CBC          48 :     foreach(lc, rtcs)
     279 ECB             :     {
     280 GIC          26 :         RelToCluster *rtc = (RelToCluster *) lfirst(lc);
     281                 : 
     282 ECB             :         /* Start a new transaction for each relation. */
     283 GIC          26 :         StartTransactionCommand();
     284 ECB             : 
     285                 :         /* functions in indexes may want a snapshot set */
     286 GIC          26 :         PushActiveSnapshot(GetTransactionSnapshot());
     287 ECB             : 
     288                 :         /* Do the job. */
     289 GIC          26 :         cluster_rel(rtc->tableOid, rtc->indexOid, params);
     290 ECB             : 
     291 GIC          26 :         PopActiveSnapshot();
     292              26 :         CommitTransactionCommand();
     293 ECB             :     }
     294 GIC          22 : }
     295 ECB             : 
     296                 : /*
     297                 :  * cluster_rel
     298                 :  *
     299                 :  * This clusters the table by creating a new, clustered table and
     300                 :  * swapping the relfilenumbers of the new table and the old table, so
     301                 :  * the OID of the original table is preserved.  Thus we do not lose
     302                 :  * GRANT, inheritance nor references to this table (this was a bug
     303                 :  * in releases through 7.3).
     304                 :  *
     305                 :  * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading
     306                 :  * the new table, it's better to create the indexes afterwards than to fill
     307                 :  * them incrementally while we load the table.
     308                 :  *
     309                 :  * If indexOid is InvalidOid, the table will be rewritten in physical order
     310                 :  * instead of index order.  This is the new implementation of VACUUM FULL,
     311                 :  * and error messages should refer to the operation as VACUUM not CLUSTER.
     312                 :  */
     313                 : void
     314 GIC         262 : cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
     315                 : {
     316                 :     Relation    OldHeap;
     317                 :     Oid         save_userid;
     318 ECB             :     int         save_sec_context;
     319                 :     int         save_nestlevel;
     320 GIC         262 :     bool        verbose = ((params->options & CLUOPT_VERBOSE) != 0);
     321             262 :     bool        recheck = ((params->options & CLUOPT_RECHECK) != 0);
     322                 : 
     323                 :     /* Check for user-requested abort. */
     324 CBC         262 :     CHECK_FOR_INTERRUPTS();
     325 ECB             : 
     326 GIC         262 :     pgstat_progress_start_command(PROGRESS_COMMAND_CLUSTER, tableOid);
     327             262 :     if (OidIsValid(indexOid))
     328 CBC          98 :         pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND,
     329                 :                                      PROGRESS_CLUSTER_COMMAND_CLUSTER);
     330 ECB             :     else
     331 CBC         164 :         pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND,
     332 ECB             :                                      PROGRESS_CLUSTER_COMMAND_VACUUM_FULL);
     333                 : 
     334                 :     /*
     335                 :      * We grab exclusive access to the target rel and index for the duration
     336                 :      * of the transaction.  (This is redundant for the single-transaction
     337                 :      * case, since cluster() already did it.)  The index lock is taken inside
     338                 :      * check_index_is_clusterable.
     339                 :      */
     340 GIC         262 :     OldHeap = try_relation_open(tableOid, AccessExclusiveLock);
     341                 : 
     342                 :     /* If the table has gone away, we can skip processing it */
     343             262 :     if (!OldHeap)
     344 ECB             :     {
     345 UIC           0 :         pgstat_progress_end_command();
     346               0 :         return;
     347 ECB             :     }
     348                 : 
     349 EUB             :     /*
     350                 :      * Switch to the table owner's userid, so that any index functions are run
     351                 :      * as that user.  Also lock down security-restricted operations and
     352                 :      * arrange to make GUC variable changes local to this command.
     353                 :      */
     354 GIC         262 :     GetUserIdAndSecContext(&save_userid, &save_sec_context);
     355             262 :     SetUserIdAndSecContext(OldHeap->rd_rel->relowner,
     356                 :                            save_sec_context | SECURITY_RESTRICTED_OPERATION);
     357             262 :     save_nestlevel = NewGUCNestLevel();
     358 ECB             : 
     359                 :     /*
     360                 :      * Since we may open a new transaction for each relation, we have to check
     361                 :      * that the relation still is what we think it is.
     362                 :      *
     363                 :      * If this is a single-transaction CLUSTER, we can skip these tests. We
     364                 :      * *must* skip the one on indisclustered since it would reject an attempt
     365                 :      * to cluster a not-previously-clustered index.
     366                 :      */
     367 GIC         262 :     if (recheck)
     368                 :     {
     369                 :         /* Check that the user still has privileges for the relation */
     370 GNC          26 :         if (!cluster_is_permitted_for_relation(tableOid, save_userid))
     371 ECB             :         {
     372 UIC           0 :             relation_close(OldHeap, AccessExclusiveLock);
     373               0 :             goto out;
     374 ECB             :         }
     375                 : 
     376 EUB             :         /*
     377                 :          * Silently skip a temp table for a remote session.  Only doing this
     378                 :          * check in the "recheck" case is appropriate (which currently means
     379                 :          * somebody is executing a database-wide CLUSTER or on a partitioned
     380                 :          * table), because there is another check in cluster() which will stop
     381                 :          * any attempt to cluster remote temp tables by name.  There is
     382                 :          * another check in cluster_rel which is redundant, but we leave it
     383                 :          * for extra safety.
     384                 :          */
     385 GIC          26 :         if (RELATION_IS_OTHER_TEMP(OldHeap))
     386                 :         {
     387 UIC           0 :             relation_close(OldHeap, AccessExclusiveLock);
     388               0 :             goto out;
     389 ECB             :         }
     390                 : 
     391 GBC          26 :         if (OidIsValid(indexOid))
     392 EUB             :         {
     393                 :             /*
     394                 :              * Check that the index still exists
     395 ECB             :              */
     396 GIC          26 :             if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(indexOid)))
     397                 :             {
     398 UIC           0 :                 relation_close(OldHeap, AccessExclusiveLock);
     399               0 :                 goto out;
     400 ECB             :             }
     401                 : 
     402 EUB             :             /*
     403                 :              * Check that the index is still the one with indisclustered set,
     404                 :              * if needed.
     405                 :              */
     406 GIC          26 :             if ((params->options & CLUOPT_RECHECK_ISCLUSTERED) != 0 &&
     407               3 :                 !get_index_isclustered(indexOid))
     408                 :             {
     409 UIC           0 :                 relation_close(OldHeap, AccessExclusiveLock);
     410 LBC           0 :                 goto out;
     411 ECB             :             }
     412                 :         }
     413 EUB             :     }
     414                 : 
     415                 :     /*
     416                 :      * We allow VACUUM FULL, but not CLUSTER, on shared catalogs.  CLUSTER
     417                 :      * would work in most respects, but the index would only get marked as
     418                 :      * indisclustered in the current database, leading to unexpected behavior
     419                 :      * if CLUSTER were later invoked in another database.
     420                 :      */
     421 GIC         262 :     if (OidIsValid(indexOid) && OldHeap->rd_rel->relisshared)
     422 UIC           0 :         ereport(ERROR,
     423                 :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     424                 :                  errmsg("cannot cluster a shared catalog")));
     425 ECB             : 
     426 EUB             :     /*
     427                 :      * Don't process temp tables of other backends ... their local buffer
     428                 :      * manager is not going to cope.
     429                 :      */
     430 GIC         262 :     if (RELATION_IS_OTHER_TEMP(OldHeap))
     431                 :     {
     432 UIC           0 :         if (OidIsValid(indexOid))
     433               0 :             ereport(ERROR,
     434 ECB             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     435                 :                      errmsg("cannot cluster temporary tables of other sessions")));
     436 EUB             :         else
     437 UBC           0 :             ereport(ERROR,
     438                 :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     439                 :                      errmsg("cannot vacuum temporary tables of other sessions")));
     440                 :     }
     441 EUB             : 
     442                 :     /*
     443                 :      * Also check for active uses of the relation in the current transaction,
     444                 :      * including open scans and pending AFTER trigger events.
     445                 :      */
     446 GIC         262 :     CheckTableNotInUse(OldHeap, OidIsValid(indexOid) ? "CLUSTER" : "VACUUM");
     447                 : 
     448                 :     /* Check heap and index are valid to cluster on */
     449             262 :     if (OidIsValid(indexOid))
     450 CBC          98 :         check_index_is_clusterable(OldHeap, indexOid, AccessExclusiveLock);
     451                 : 
     452                 :     /*
     453 ECB             :      * Quietly ignore the request if this is a materialized view which has not
     454                 :      * been populated from its query. No harm is done because there is no data
     455                 :      * to deal with, and we don't want to throw an error if this is part of a
     456                 :      * multi-relation request -- for example, CLUSTER was run on the entire
     457                 :      * database.
     458                 :      */
     459 GIC         262 :     if (OldHeap->rd_rel->relkind == RELKIND_MATVIEW &&
     460 UIC           0 :         !RelationIsPopulated(OldHeap))
     461                 :     {
     462               0 :         relation_close(OldHeap, AccessExclusiveLock);
     463 LBC           0 :         goto out;
     464 EUB             :     }
     465                 : 
     466 GBC         262 :     Assert(OldHeap->rd_rel->relkind == RELKIND_RELATION ||
     467 EUB             :            OldHeap->rd_rel->relkind == RELKIND_MATVIEW ||
     468                 :            OldHeap->rd_rel->relkind == RELKIND_TOASTVALUE);
     469                 : 
     470 ECB             :     /*
     471                 :      * All predicate locks on the tuples or pages are about to be made
     472                 :      * invalid, because we move tuples around.  Promote them to relation
     473                 :      * locks.  Predicate locks on indexes will be promoted when they are
     474                 :      * reindexed.
     475                 :      */
     476 GIC         262 :     TransferPredicateLocksToHeapRelation(OldHeap);
     477                 : 
     478                 :     /* rebuild_relation does all the dirty work */
     479             262 :     rebuild_relation(OldHeap, indexOid, verbose);
     480 ECB             : 
     481                 :     /* NB: rebuild_relation does table_close() on OldHeap */
     482                 : 
     483 CBC         259 : out:
     484                 :     /* Roll back any GUC changes executed by index functions */
     485 GIC         259 :     AtEOXact_GUC(false, save_nestlevel);
     486                 : 
     487 ECB             :     /* Restore userid and security context */
     488 GIC         259 :     SetUserIdAndSecContext(save_userid, save_sec_context);
     489 ECB             : 
     490 GIC         259 :     pgstat_progress_end_command();
     491                 : }
     492 ECB             : 
     493                 : /*
     494                 :  * Verify that the specified heap and index are valid to cluster on
     495                 :  *
     496                 :  * Side effect: obtains lock on the index.  The caller may
     497                 :  * in some cases already have AccessExclusiveLock on the table, but
     498                 :  * not in all cases so we can't rely on the table-level lock for
     499                 :  * protection here.
     500                 :  */
     501                 : void
     502 GIC         143 : check_index_is_clusterable(Relation OldHeap, Oid indexOid, LOCKMODE lockmode)
     503                 : {
     504                 :     Relation    OldIndex;
     505                 : 
     506 CBC         143 :     OldIndex = index_open(indexOid, lockmode);
     507                 : 
     508                 :     /*
     509                 :      * Check that index is in fact an index on the given relation
     510 ECB             :      */
     511 GIC         143 :     if (OldIndex->rd_index == NULL ||
     512             143 :         OldIndex->rd_index->indrelid != RelationGetRelid(OldHeap))
     513 UIC           0 :         ereport(ERROR,
     514                 :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
     515 ECB             :                  errmsg("\"%s\" is not an index for table \"%s\"",
     516                 :                         RelationGetRelationName(OldIndex),
     517 EUB             :                         RelationGetRelationName(OldHeap))));
     518                 : 
     519                 :     /* Index AM must allow clustering */
     520 GIC         143 :     if (!OldIndex->rd_indam->amclusterable)
     521 UIC           0 :         ereport(ERROR,
     522                 :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     523                 :                  errmsg("cannot cluster on index \"%s\" because access method does not support clustering",
     524 ECB             :                         RelationGetRelationName(OldIndex))));
     525 EUB             : 
     526                 :     /*
     527                 :      * Disallow clustering on incomplete indexes (those that might not index
     528                 :      * every row of the relation).  We could relax this by making a separate
     529                 :      * seqscan pass over the table to copy the missing rows, but that seems
     530                 :      * expensive and tedious.
     531                 :      */
     532 GIC         143 :     if (!heap_attisnull(OldIndex->rd_indextuple, Anum_pg_index_indpred, NULL))
     533 UIC           0 :         ereport(ERROR,
     534                 :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     535                 :                  errmsg("cannot cluster on partial index \"%s\"",
     536 ECB             :                         RelationGetRelationName(OldIndex))));
     537 EUB             : 
     538                 :     /*
     539                 :      * Disallow if index is left over from a failed CREATE INDEX CONCURRENTLY;
     540                 :      * it might well not contain entries for every heap row, or might not even
     541                 :      * be internally consistent.  (But note that we don't check indcheckxmin;
     542                 :      * the worst consequence of following broken HOT chains would be that we
     543                 :      * might put recently-dead tuples out-of-order in the new table, and there
     544                 :      * is little harm in that.)
     545                 :      */
     546 GIC         143 :     if (!OldIndex->rd_index->indisvalid)
     547               3 :         ereport(ERROR,
     548                 :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     549                 :                  errmsg("cannot cluster on invalid index \"%s\"",
     550 ECB             :                         RelationGetRelationName(OldIndex))));
     551                 : 
     552                 :     /* Drop relcache refcnt on OldIndex, but keep lock */
     553 GIC         140 :     index_close(OldIndex, NoLock);
     554             140 : }
     555                 : 
     556                 : /*
     557 ECB             :  * mark_index_clustered: mark the specified index as the one clustered on
     558                 :  *
     559                 :  * With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
     560                 :  */
     561                 : void
     562 GIC         139 : mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
     563                 : {
     564                 :     HeapTuple   indexTuple;
     565                 :     Form_pg_index indexForm;
     566 ECB             :     Relation    pg_index;
     567                 :     ListCell   *index;
     568                 : 
     569                 :     /* Disallow applying to a partitioned table */
     570 GIC         139 :     if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
     571               6 :         ereport(ERROR,
     572                 :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     573                 :                  errmsg("cannot mark index clustered in partitioned table")));
     574 ECB             : 
     575                 :     /*
     576                 :      * If the index is already marked clustered, no need to do anything.
     577                 :      */
     578 GIC         133 :     if (OidIsValid(indexOid))
     579                 :     {
     580             127 :         if (get_index_isclustered(indexOid))
     581              18 :             return;
     582 ECB             :     }
     583                 : 
     584                 :     /*
     585                 :      * Check each index of the relation and set/clear the bit as needed.
     586                 :      */
     587 GIC         115 :     pg_index = table_open(IndexRelationId, RowExclusiveLock);
     588                 : 
     589             336 :     foreach(index, RelationGetIndexList(rel))
     590                 :     {
     591 CBC         221 :         Oid         thisIndexOid = lfirst_oid(index);
     592                 : 
     593             221 :         indexTuple = SearchSysCacheCopy1(INDEXRELID,
     594                 :                                          ObjectIdGetDatum(thisIndexOid));
     595             221 :         if (!HeapTupleIsValid(indexTuple))
     596 UIC           0 :             elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
     597 CBC         221 :         indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
     598                 : 
     599 ECB             :         /*
     600 EUB             :          * Unset the bit if set.  We know it's wrong because we checked this
     601 ECB             :          * earlier.
     602                 :          */
     603 GIC         221 :         if (indexForm->indisclustered)
     604                 :         {
     605              15 :             indexForm->indisclustered = false;
     606              15 :             CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
     607 ECB             :         }
     608 GIC         206 :         else if (thisIndexOid == indexOid)
     609 ECB             :         {
     610                 :             /* this was checked earlier, but let's be real sure */
     611 GIC         109 :             if (!indexForm->indisvalid)
     612 LBC           0 :                 elog(ERROR, "cannot cluster on invalid index %u", indexOid);
     613 GIC         109 :             indexForm->indisclustered = true;
     614             109 :             CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
     615 ECB             :         }
     616 EUB             : 
     617 CBC         221 :         InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
     618 ECB             :                                      InvalidOid, is_internal);
     619                 : 
     620 GIC         221 :         heap_freetuple(indexTuple);
     621 ECB             :     }
     622                 : 
     623 GIC         115 :     table_close(pg_index, RowExclusiveLock);
     624 ECB             : }
     625                 : 
     626                 : /*
     627                 :  * rebuild_relation: rebuild an existing relation in index or physical order
     628                 :  *
     629                 :  * OldHeap: table to rebuild --- must be opened and exclusive-locked!
     630                 :  * indexOid: index to cluster by, or InvalidOid to rewrite in physical order.
     631                 :  *
     632                 :  * NB: this routine closes OldHeap at the right time; caller should not.
     633                 :  */
     634                 : static void
     635 GIC         262 : rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
     636                 : {
     637             262 :     Oid         tableOid = RelationGetRelid(OldHeap);
     638             262 :     Oid         accessMethod = OldHeap->rd_rel->relam;
     639 CBC         262 :     Oid         tableSpace = OldHeap->rd_rel->reltablespace;
     640                 :     Oid         OIDNewHeap;
     641 ECB             :     char        relpersistence;
     642                 :     bool        is_system_catalog;
     643                 :     bool        swap_toast_by_content;
     644                 :     TransactionId frozenXid;
     645                 :     MultiXactId cutoffMulti;
     646                 : 
     647 GIC         262 :     if (OidIsValid(indexOid))
     648                 :         /* Mark the correct index as clustered */
     649              98 :         mark_index_clustered(OldHeap, indexOid, true);
     650                 : 
     651 ECB             :     /* Remember info about rel before closing OldHeap */
     652 GIC         262 :     relpersistence = OldHeap->rd_rel->relpersistence;
     653 CBC         262 :     is_system_catalog = IsSystemRelation(OldHeap);
     654                 : 
     655                 :     /* Close relcache entry, but keep lock until transaction commit */
     656             262 :     table_close(OldHeap, NoLock);
     657 ECB             : 
     658                 :     /* Create the transient table that will receive the re-ordered data */
     659 GIC         262 :     OIDNewHeap = make_new_heap(tableOid, tableSpace,
     660 ECB             :                                accessMethod,
     661                 :                                relpersistence,
     662                 :                                AccessExclusiveLock);
     663                 : 
     664                 :     /* Copy the heap data into the new table in the desired order */
     665 GIC         262 :     copy_table_data(OIDNewHeap, tableOid, indexOid, verbose,
     666                 :                     &swap_toast_by_content, &frozenXid, &cutoffMulti);
     667                 : 
     668                 :     /*
     669 ECB             :      * Swap the physical files of the target and transient tables, then
     670                 :      * rebuild the target's indexes and throw away the transient table.
     671                 :      */
     672 GIC         262 :     finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog,
     673                 :                      swap_toast_by_content, false, true,
     674                 :                      frozenXid, cutoffMulti,
     675                 :                      relpersistence);
     676 CBC         259 : }
     677                 : 
     678                 : 
     679                 : /*
     680 ECB             :  * Create the transient table that will be filled with new data during
     681                 :  * CLUSTER, ALTER TABLE, and similar operations.  The transient table
     682                 :  * duplicates the logical structure of the OldHeap; but will have the
     683                 :  * specified physical storage properties NewTableSpace, NewAccessMethod, and
     684                 :  * relpersistence.
     685                 :  *
     686                 :  * After this, the caller should load the new heap with transferred/modified
     687                 :  * data, then call finish_heap_swap to complete the operation.
     688                 :  */
     689                 : Oid
     690 GIC         744 : make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, Oid NewAccessMethod,
     691                 :               char relpersistence, LOCKMODE lockmode)
     692                 : {
     693                 :     TupleDesc   OldHeapDesc;
     694 ECB             :     char        NewHeapName[NAMEDATALEN];
     695                 :     Oid         OIDNewHeap;
     696                 :     Oid         toastid;
     697                 :     Relation    OldHeap;
     698                 :     HeapTuple   tuple;
     699                 :     Datum       reloptions;
     700                 :     bool        isNull;
     701                 :     Oid         namespaceid;
     702                 : 
     703 GIC         744 :     OldHeap = table_open(OIDOldHeap, lockmode);
     704             744 :     OldHeapDesc = RelationGetDescr(OldHeap);
     705                 : 
     706                 :     /*
     707 ECB             :      * Note that the NewHeap will not receive any of the defaults or
     708                 :      * constraints associated with the OldHeap; we don't need 'em, and there's
     709                 :      * no reason to spend cycles inserting them into the catalogs only to
     710                 :      * delete them.
     711                 :      */
     712                 : 
     713                 :     /*
     714                 :      * But we do want to use reloptions of the old heap for new heap.
     715                 :      */
     716 GIC         744 :     tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(OIDOldHeap));
     717             744 :     if (!HeapTupleIsValid(tuple))
     718 UIC           0 :         elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
     719 GIC         744 :     reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
     720 ECB             :                                  &isNull);
     721 CBC         744 :     if (isNull)
     722 GBC         725 :         reloptions = (Datum) 0;
     723 ECB             : 
     724 GIC         744 :     if (relpersistence == RELPERSISTENCE_TEMP)
     725 CBC          70 :         namespaceid = LookupCreationNamespace("pg_temp");
     726 ECB             :     else
     727 GIC         674 :         namespaceid = RelationGetNamespace(OldHeap);
     728 ECB             : 
     729                 :     /*
     730                 :      * Create the new heap, using a temporary name in the same namespace as
     731                 :      * the existing table.  NOTE: there is some risk of collision with user
     732                 :      * relnames.  Working around this seems more trouble than it's worth; in
     733                 :      * particular, we can't create the new heap in a different namespace from
     734                 :      * the old, or we will have problems with the TEMP status of temp tables.
     735                 :      *
     736                 :      * Note: the new heap is not a shared relation, even if we are rebuilding
     737                 :      * a shared rel.  However, we do make the new heap mapped if the source is
     738                 :      * mapped.  This simplifies swap_relation_files, and is absolutely
     739                 :      * necessary for rebuilding pg_class, for reasons explained there.
     740                 :      */
     741 GIC         744 :     snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap);
     742                 : 
     743             744 :     OIDNewHeap = heap_create_with_catalog(NewHeapName,
     744                 :                                           namespaceid,
     745 ECB             :                                           NewTableSpace,
     746                 :                                           InvalidOid,
     747                 :                                           InvalidOid,
     748                 :                                           InvalidOid,
     749 GIC         744 :                                           OldHeap->rd_rel->relowner,
     750                 :                                           NewAccessMethod,
     751                 :                                           OldHeapDesc,
     752                 :                                           NIL,
     753 ECB             :                                           RELKIND_RELATION,
     754                 :                                           relpersistence,
     755                 :                                           false,
     756 GIC         744 :                                           RelationIsMapped(OldHeap),
     757                 :                                           ONCOMMIT_NOOP,
     758                 :                                           reloptions,
     759                 :                                           false,
     760 ECB             :                                           true,
     761                 :                                           true,
     762                 :                                           OIDOldHeap,
     763                 :                                           NULL);
     764 GIC         744 :     Assert(OIDNewHeap != InvalidOid);
     765                 : 
     766             744 :     ReleaseSysCache(tuple);
     767                 : 
     768 ECB             :     /*
     769                 :      * Advance command counter so that the newly-created relation's catalog
     770                 :      * tuples will be visible to table_open.
     771                 :      */
     772 GIC         744 :     CommandCounterIncrement();
     773                 : 
     774                 :     /*
     775                 :      * If necessary, create a TOAST table for the new relation.
     776 ECB             :      *
     777                 :      * If the relation doesn't have a TOAST table already, we can't need one
     778                 :      * for the new relation.  The other way around is possible though: if some
     779                 :      * wide columns have been dropped, NewHeapCreateToastTable can decide that
     780                 :      * no TOAST table is needed for the new table.
     781                 :      *
     782                 :      * Note that NewHeapCreateToastTable ends with CommandCounterIncrement, so
     783                 :      * that the TOAST table will be visible for insertion.
     784                 :      */
     785 GIC         744 :     toastid = OldHeap->rd_rel->reltoastrelid;
     786             744 :     if (OidIsValid(toastid))
     787                 :     {
     788                 :         /* keep the existing toast table's reloptions, if any */
     789 CBC         294 :         tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(toastid));
     790             294 :         if (!HeapTupleIsValid(tuple))
     791 UIC           0 :             elog(ERROR, "cache lookup failed for relation %u", toastid);
     792 GIC         294 :         reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
     793 ECB             :                                      &isNull);
     794 CBC         294 :         if (isNull)
     795 GBC         294 :             reloptions = (Datum) 0;
     796 ECB             : 
     797 GIC         294 :         NewHeapCreateToastTable(OIDNewHeap, reloptions, lockmode, toastid);
     798 ECB             : 
     799 CBC         294 :         ReleaseSysCache(tuple);
     800                 :     }
     801 ECB             : 
     802 GIC         744 :     table_close(OldHeap, NoLock);
     803 ECB             : 
     804 GIC         744 :     return OIDNewHeap;
     805                 : }
     806 ECB             : 
     807                 : /*
     808                 :  * Do the physical copying of table data.
     809                 :  *
     810                 :  * There are three output parameters:
     811                 :  * *pSwapToastByContent is set true if toast tables must be swapped by content.
     812                 :  * *pFreezeXid receives the TransactionId used as freeze cutoff point.
     813                 :  * *pCutoffMulti receives the MultiXactId used as a cutoff point.
     814                 :  */
     815                 : static void
     816 GIC         262 : copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
     817                 :                 bool *pSwapToastByContent, TransactionId *pFreezeXid,
     818                 :                 MultiXactId *pCutoffMulti)
     819                 : {
     820 ECB             :     Relation    NewHeap,
     821                 :                 OldHeap,
     822                 :                 OldIndex;
     823                 :     Relation    relRelation;
     824                 :     HeapTuple   reltup;
     825                 :     Form_pg_class relform;
     826                 :     TupleDesc   oldTupDesc PG_USED_FOR_ASSERTS_ONLY;
     827                 :     TupleDesc   newTupDesc PG_USED_FOR_ASSERTS_ONLY;
     828                 :     VacuumParams params;
     829                 :     struct VacuumCutoffs cutoffs;
     830                 :     bool        use_sort;
     831 GIC         262 :     double      num_tuples = 0,
     832             262 :                 tups_vacuumed = 0,
     833 CBC         262 :                 tups_recently_dead = 0;
     834 ECB             :     BlockNumber num_pages;
     835 CBC         262 :     int         elevel = verbose ? INFO : DEBUG2;
     836                 :     PGRUsage    ru0;
     837 ECB             :     char       *nspname;
     838                 : 
     839 GIC         262 :     pg_rusage_init(&ru0);
     840                 : 
     841 ECB             :     /*
     842                 :      * Open the relations we need.
     843                 :      */
     844 GIC         262 :     NewHeap = table_open(OIDNewHeap, AccessExclusiveLock);
     845             262 :     OldHeap = table_open(OIDOldHeap, AccessExclusiveLock);
     846 CBC         262 :     if (OidIsValid(OIDOldIndex))
     847              98 :         OldIndex = index_open(OIDOldIndex, AccessExclusiveLock);
     848 ECB             :     else
     849 CBC         164 :         OldIndex = NULL;
     850                 : 
     851 ECB             :     /* Store a copy of the namespace name for logging purposes */
     852 GIC         262 :     nspname = get_namespace_name(RelationGetNamespace(OldHeap));
     853                 : 
     854 ECB             :     /*
     855                 :      * Their tuple descriptors should be exactly alike, but here we only need
     856                 :      * assume that they have the same number of columns.
     857                 :      */
     858 GIC         262 :     oldTupDesc = RelationGetDescr(OldHeap);
     859             262 :     newTupDesc = RelationGetDescr(NewHeap);
     860 CBC         262 :     Assert(newTupDesc->natts == oldTupDesc->natts);
     861 ECB             : 
     862                 :     /*
     863                 :      * If the OldHeap has a toast table, get lock on the toast table to keep
     864                 :      * it from being vacuumed.  This is needed because autovacuum processes
     865                 :      * toast tables independently of their main tables, with no lock on the
     866                 :      * latter.  If an autovacuum were to start on the toast table after we
     867                 :      * compute our OldestXmin below, it would use a later OldestXmin, and then
     868                 :      * possibly remove as DEAD toast tuples belonging to main tuples we think
     869                 :      * are only RECENTLY_DEAD.  Then we'd fail while trying to copy those
     870                 :      * tuples.
     871                 :      *
     872                 :      * We don't need to open the toast relation here, just lock it.  The lock
     873                 :      * will be held till end of transaction.
     874                 :      */
     875 GIC         262 :     if (OldHeap->rd_rel->reltoastrelid)
     876              87 :         LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
     877 ECB             : 
     878                 :     /*
     879                 :      * If both tables have TOAST tables, perform toast swap by content.  It is
     880                 :      * possible that the old table has a toast table but the new one doesn't,
     881                 :      * if toastable columns have been dropped.  In that case we have to do
     882                 :      * swap by links.  This is okay because swap by content is only essential
     883                 :      * for system catalogs, and we don't support schema changes for them.
     884                 :      */
     885 GIC         262 :     if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid)
     886                 :     {
     887 CBC          87 :         *pSwapToastByContent = true;
     888                 : 
     889 ECB             :         /*
     890                 :          * When doing swap by content, any toast pointers written into NewHeap
     891                 :          * must use the old toast table's OID, because that's where the toast
     892                 :          * data will eventually be found.  Set this up by setting rd_toastoid.
     893                 :          * This also tells toast_save_datum() to preserve the toast value
     894                 :          * OIDs, which we want so as not to invalidate toast pointers in
     895                 :          * system catalog caches, and to avoid making multiple copies of a
     896                 :          * single toast value.
     897                 :          *
     898                 :          * Note that we must hold NewHeap open until we are done writing data,
     899                 :          * since the relcache will not guarantee to remember this setting once
     900                 :          * the relation is closed.  Also, this technique depends on the fact
     901                 :          * that no one will try to read from the NewHeap until after we've
     902                 :          * finished writing it and swapping the rels --- otherwise they could
     903                 :          * follow the toast pointers to the wrong place.  (It would actually
     904                 :          * work for values copied over from the old toast table, but not for
     905                 :          * any values that we toast which were previously not toasted.)
     906                 :          */
     907 GIC          87 :         NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid;
     908                 :     }
     909 ECB             :     else
     910 GIC         175 :         *pSwapToastByContent = false;
     911                 : 
     912 ECB             :     /*
     913                 :      * Compute xids used to freeze and weed out dead tuples and multixacts.
     914                 :      * Since we're going to rewrite the whole table anyway, there's no reason
     915                 :      * not to be aggressive about this.
     916                 :      */
     917 GNC         262 :     memset(&params, 0, sizeof(VacuumParams));
     918             262 :     vacuum_get_cutoffs(OldHeap, &params, &cutoffs);
     919 ECB             : 
     920                 :     /*
     921                 :      * FreezeXid will become the table's new relfrozenxid, and that mustn't go
     922                 :      * backwards, so take the max.
     923                 :      */
     924 GIC         524 :     if (TransactionIdIsValid(OldHeap->rd_rel->relfrozenxid) &&
     925 GNC         262 :         TransactionIdPrecedes(cutoffs.FreezeLimit,
     926             262 :                               OldHeap->rd_rel->relfrozenxid))
     927              52 :         cutoffs.FreezeLimit = OldHeap->rd_rel->relfrozenxid;
     928 ECB             : 
     929                 :     /*
     930                 :      * MultiXactCutoff, similarly, shouldn't go backwards either.
     931                 :      */
     932 GIC         524 :     if (MultiXactIdIsValid(OldHeap->rd_rel->relminmxid) &&
     933 GNC         262 :         MultiXactIdPrecedes(cutoffs.MultiXactCutoff,
     934             262 :                             OldHeap->rd_rel->relminmxid))
     935 UNC           0 :         cutoffs.MultiXactCutoff = OldHeap->rd_rel->relminmxid;
     936 ECB             : 
     937                 :     /*
     938                 :      * Decide whether to use an indexscan or seqscan-and-optional-sort to scan
     939 EUB             :      * the OldHeap.  We know how to use a sort to duplicate the ordering of a
     940                 :      * btree index, and will use seqscan-and-sort for that case if the planner
     941                 :      * tells us it's cheaper.  Otherwise, always indexscan if an index is
     942                 :      * provided, else plain seqscan.
     943                 :      */
     944 GIC         262 :     if (OldIndex != NULL && OldIndex->rd_rel->relam == BTREE_AM_OID)
     945              98 :         use_sort = plan_cluster_use_sort(OIDOldHeap, OIDOldIndex);
     946                 :     else
     947             164 :         use_sort = false;
     948 ECB             : 
     949                 :     /* Log what we're doing */
     950 GIC         262 :     if (OldIndex != NULL && !use_sort)
     951 CBC          48 :         ereport(elevel,
     952                 :                 (errmsg("clustering \"%s.%s\" using index scan on \"%s\"",
     953                 :                         nspname,
     954 ECB             :                         RelationGetRelationName(OldHeap),
     955                 :                         RelationGetRelationName(OldIndex))));
     956 GIC         214 :     else if (use_sort)
     957              50 :         ereport(elevel,
     958                 :                 (errmsg("clustering \"%s.%s\" using sequential scan and sort",
     959                 :                         nspname,
     960 ECB             :                         RelationGetRelationName(OldHeap))));
     961                 :     else
     962 GIC         164 :         ereport(elevel,
     963                 :                 (errmsg("vacuuming \"%s.%s\"",
     964                 :                         nspname,
     965                 :                         RelationGetRelationName(OldHeap))));
     966 ECB             : 
     967                 :     /*
     968                 :      * Hand off the actual copying to AM specific function, the generic code
     969                 :      * cannot know how to deal with visibility across AMs. Note that this
     970                 :      * routine is allowed to set FreezeXid / MultiXactCutoff to different
     971                 :      * values (e.g. because the AM doesn't use freezing).
     972                 :      */
     973 GIC         262 :     table_relation_copy_for_cluster(OldHeap, NewHeap, OldIndex, use_sort,
     974                 :                                     cutoffs.OldestXmin, &cutoffs.FreezeLimit,
     975                 :                                     &cutoffs.MultiXactCutoff,
     976                 :                                     &num_tuples, &tups_vacuumed,
     977                 :                                     &tups_recently_dead);
     978 ECB             : 
     979                 :     /* return selected values to caller, get set as relfrozenxid/minmxid */
     980 GNC         262 :     *pFreezeXid = cutoffs.FreezeLimit;
     981             262 :     *pCutoffMulti = cutoffs.MultiXactCutoff;
     982                 : 
     983                 :     /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
     984 GIC         262 :     NewHeap->rd_toastoid = InvalidOid;
     985 ECB             : 
     986 CBC         262 :     num_pages = RelationGetNumberOfBlocks(NewHeap);
     987                 : 
     988                 :     /* Log what we did */
     989             262 :     ereport(elevel,
     990                 :             (errmsg("\"%s.%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
     991 ECB             :                     nspname,
     992                 :                     RelationGetRelationName(OldHeap),
     993                 :                     tups_vacuumed, num_tuples,
     994                 :                     RelationGetNumberOfBlocks(OldHeap)),
     995                 :              errdetail("%.0f dead row versions cannot be removed yet.\n"
     996                 :                        "%s.",
     997                 :                        tups_recently_dead,
     998                 :                        pg_rusage_show(&ru0))));
     999                 : 
    1000 GIC         262 :     if (OldIndex != NULL)
    1001              98 :         index_close(OldIndex, NoLock);
    1002             262 :     table_close(OldHeap, NoLock);
    1003             262 :     table_close(NewHeap, NoLock);
    1004                 : 
    1005 ECB             :     /* Update pg_class to reflect the correct values of pages and tuples. */
    1006 CBC         262 :     relRelation = table_open(RelationRelationId, RowExclusiveLock);
    1007 ECB             : 
    1008 CBC         262 :     reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDNewHeap));
    1009 GIC         262 :     if (!HeapTupleIsValid(reltup))
    1010 UIC           0 :         elog(ERROR, "cache lookup failed for relation %u", OIDNewHeap);
    1011 CBC         262 :     relform = (Form_pg_class) GETSTRUCT(reltup);
    1012                 : 
    1013             262 :     relform->relpages = num_pages;
    1014             262 :     relform->reltuples = num_tuples;
    1015 EUB             : 
    1016 ECB             :     /* Don't update the stats for pg_class.  See swap_relation_files. */
    1017 GIC         262 :     if (OIDOldHeap != RelationRelationId)
    1018 CBC         250 :         CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
    1019 ECB             :     else
    1020 GIC          12 :         CacheInvalidateRelcacheByTuple(reltup);
    1021                 : 
    1022 ECB             :     /* Clean up. */
    1023 CBC         262 :     heap_freetuple(reltup);
    1024 GIC         262 :     table_close(relRelation, RowExclusiveLock);
    1025 ECB             : 
    1026                 :     /* Make the update visible */
    1027 GIC         262 :     CommandCounterIncrement();
    1028 CBC         262 : }
    1029 ECB             : 
    1030                 : /*
    1031                 :  * Swap the physical files of two given relations.
    1032                 :  *
    1033                 :  * We swap the physical identity (reltablespace, relfilenumber) while keeping
    1034                 :  * the same logical identities of the two relations.  relpersistence is also
    1035                 :  * swapped, which is critical since it determines where buffers live for each
    1036                 :  * relation.
    1037                 :  *
    1038                 :  * We can swap associated TOAST data in either of two ways: recursively swap
    1039                 :  * the physical content of the toast tables (and their indexes), or swap the
    1040                 :  * TOAST links in the given relations' pg_class entries.  The former is needed
    1041                 :  * to manage rewrites of shared catalogs (where we cannot change the pg_class
    1042                 :  * links) while the latter is the only way to handle cases in which a toast
    1043                 :  * table is added or removed altogether.
    1044                 :  *
    1045                 :  * Additionally, the first relation is marked with relfrozenxid set to
    1046                 :  * frozenXid.  It seems a bit ugly to have this here, but the caller would
    1047                 :  * have to do it anyway, so having it here saves a heap_update.  Note: in
    1048                 :  * the swap-toast-links case, we assume we don't need to change the toast
    1049                 :  * table's relfrozenxid: the new version of the toast table should already
    1050                 :  * have relfrozenxid set to RecentXmin, which is good enough.
    1051                 :  *
    1052                 :  * Lastly, if r2 and its toast table and toast index (if any) are mapped,
    1053                 :  * their OIDs are emitted into mapped_tables[].  This is hacky but beats
    1054                 :  * having to look the information up again later in finish_heap_swap.
    1055                 :  */
    1056                 : static void
    1057 GIC         858 : swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
    1058                 :                     bool swap_toast_by_content,
    1059                 :                     bool is_internal,
    1060                 :                     TransactionId frozenXid,
    1061                 :                     MultiXactId cutoffMulti,
    1062 ECB             :                     Oid *mapped_tables)
    1063                 : {
    1064                 :     Relation    relRelation;
    1065                 :     HeapTuple   reltup1,
    1066                 :                 reltup2;
    1067                 :     Form_pg_class relform1,
    1068                 :                 relform2;
    1069                 :     RelFileNumber relfilenumber1,
    1070                 :                 relfilenumber2;
    1071                 :     RelFileNumber swaptemp;
    1072                 :     char        swptmpchr;
    1073                 : 
    1074                 :     /* We need writable copies of both pg_class tuples. */
    1075 GIC         858 :     relRelation = table_open(RelationRelationId, RowExclusiveLock);
    1076                 : 
    1077             858 :     reltup1 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r1));
    1078             858 :     if (!HeapTupleIsValid(reltup1))
    1079 UIC           0 :         elog(ERROR, "cache lookup failed for relation %u", r1);
    1080 CBC         858 :     relform1 = (Form_pg_class) GETSTRUCT(reltup1);
    1081                 : 
    1082             858 :     reltup2 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r2));
    1083             858 :     if (!HeapTupleIsValid(reltup2))
    1084 UBC           0 :         elog(ERROR, "cache lookup failed for relation %u", r2);
    1085 CBC         858 :     relform2 = (Form_pg_class) GETSTRUCT(reltup2);
    1086                 : 
    1087 GNC         858 :     relfilenumber1 = relform1->relfilenode;
    1088             858 :     relfilenumber2 = relform2->relfilenode;
    1089 EUB             : 
    1090 GNC         858 :     if (RelFileNumberIsValid(relfilenumber1) &&
    1091                 :         RelFileNumberIsValid(relfilenumber2))
    1092                 :     {
    1093 ECB             :         /*
    1094                 :          * Normal non-mapped relations: swap relfilenumbers, reltablespaces,
    1095                 :          * relpersistence
    1096                 :          */
    1097 GIC         780 :         Assert(!target_is_pg_class);
    1098                 : 
    1099             780 :         swaptemp = relform1->relfilenode;
    1100             780 :         relform1->relfilenode = relform2->relfilenode;
    1101             780 :         relform2->relfilenode = swaptemp;
    1102                 : 
    1103 CBC         780 :         swaptemp = relform1->reltablespace;
    1104 GIC         780 :         relform1->reltablespace = relform2->reltablespace;
    1105 CBC         780 :         relform2->reltablespace = swaptemp;
    1106 ECB             : 
    1107 CBC         780 :         swaptemp = relform1->relam;
    1108 GIC         780 :         relform1->relam = relform2->relam;
    1109 CBC         780 :         relform2->relam = swaptemp;
    1110 ECB             : 
    1111 CBC         780 :         swptmpchr = relform1->relpersistence;
    1112 GIC         780 :         relform1->relpersistence = relform2->relpersistence;
    1113 CBC         780 :         relform2->relpersistence = swptmpchr;
    1114 ECB             : 
    1115                 :         /* Also swap toast links, if we're swapping by links */
    1116 GIC         780 :         if (!swap_toast_by_content)
    1117 ECB             :         {
    1118 CBC         582 :             swaptemp = relform1->reltoastrelid;
    1119             582 :             relform1->reltoastrelid = relform2->reltoastrelid;
    1120 GIC         582 :             relform2->reltoastrelid = swaptemp;
    1121                 :         }
    1122 ECB             :     }
    1123                 :     else
    1124                 :     {
    1125                 :         /*
    1126                 :          * Mapped-relation case.  Here we have to swap the relation mappings
    1127                 :          * instead of modifying the pg_class columns.  Both must be mapped.
    1128                 :          */
    1129 GNC          78 :         if (RelFileNumberIsValid(relfilenumber1) ||
    1130                 :             RelFileNumberIsValid(relfilenumber2))
    1131 UIC           0 :             elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation",
    1132                 :                  NameStr(relform1->relname));
    1133                 : 
    1134                 :         /*
    1135                 :          * We can't change the tablespace nor persistence of a mapped rel, and
    1136 ECB             :          * we can't handle toast link swapping for one either, because we must
    1137                 :          * not apply any critical changes to its pg_class row.  These cases
    1138 EUB             :          * should be prevented by upstream permissions tests, so these checks
    1139                 :          * are non-user-facing emergency backstop.
    1140                 :          */
    1141 GIC          78 :         if (relform1->reltablespace != relform2->reltablespace)
    1142 UIC           0 :             elog(ERROR, "cannot change tablespace of mapped relation \"%s\"",
    1143                 :                  NameStr(relform1->relname));
    1144 GIC          78 :         if (relform1->relpersistence != relform2->relpersistence)
    1145 UIC           0 :             elog(ERROR, "cannot change persistence of mapped relation \"%s\"",
    1146                 :                  NameStr(relform1->relname));
    1147 GIC          78 :         if (relform1->relam != relform2->relam)
    1148 LBC           0 :             elog(ERROR, "cannot change access method of mapped relation \"%s\"",
    1149 EUB             :                  NameStr(relform1->relname));
    1150 GIC          78 :         if (!swap_toast_by_content &&
    1151 CBC          15 :             (relform1->reltoastrelid || relform2->reltoastrelid))
    1152 UBC           0 :             elog(ERROR, "cannot swap toast by links for mapped relation \"%s\"",
    1153                 :                  NameStr(relform1->relname));
    1154 ECB             : 
    1155 EUB             :         /*
    1156                 :          * Fetch the mappings --- shouldn't fail, but be paranoid
    1157 ECB             :          */
    1158 GNC          78 :         relfilenumber1 = RelationMapOidToFilenumber(r1, relform1->relisshared);
    1159              78 :         if (!RelFileNumberIsValid(relfilenumber1))
    1160 UIC           0 :             elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
    1161                 :                  NameStr(relform1->relname), r1);
    1162 GNC          78 :         relfilenumber2 = RelationMapOidToFilenumber(r2, relform2->relisshared);
    1163              78 :         if (!RelFileNumberIsValid(relfilenumber2))
    1164 UIC           0 :             elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
    1165 ECB             :                  NameStr(relform2->relname), r2);
    1166                 : 
    1167 EUB             :         /*
    1168                 :          * Send replacement mappings to relmapper.  Note these won't actually
    1169 ECB             :          * take effect until CommandCounterIncrement.
    1170                 :          */
    1171 GNC          78 :         RelationMapUpdateMap(r1, relfilenumber2, relform1->relisshared, false);
    1172              78 :         RelationMapUpdateMap(r2, relfilenumber1, relform2->relisshared, false);
    1173                 : 
    1174                 :         /* Pass OIDs of mapped r2 tables back to caller */
    1175 GIC          78 :         *mapped_tables++ = r2;
    1176                 :     }
    1177                 : 
    1178 ECB             :     /*
    1179                 :      * Recognize that rel1's relfilenumber (swapped from rel2) is new in this
    1180                 :      * subtransaction. The rel2 storage (swapped from rel1) may or may not be
    1181                 :      * new.
    1182                 :      */
    1183                 :     {
    1184                 :         Relation    rel1,
    1185                 :                     rel2;
    1186                 : 
    1187 GIC         858 :         rel1 = relation_open(r1, NoLock);
    1188             858 :         rel2 = relation_open(r2, NoLock);
    1189             858 :         rel2->rd_createSubid = rel1->rd_createSubid;
    1190 GNC         858 :         rel2->rd_newRelfilelocatorSubid = rel1->rd_newRelfilelocatorSubid;
    1191             858 :         rel2->rd_firstRelfilelocatorSubid = rel1->rd_firstRelfilelocatorSubid;
    1192             858 :         RelationAssumeNewRelfilelocator(rel1);
    1193 GIC         858 :         relation_close(rel1, NoLock);
    1194 CBC         858 :         relation_close(rel2, NoLock);
    1195 ECB             :     }
    1196                 : 
    1197                 :     /*
    1198                 :      * In the case of a shared catalog, these next few steps will only affect
    1199                 :      * our own database's pg_class row; but that's okay, because they are all
    1200                 :      * noncritical updates.  That's also an important fact for the case of a
    1201                 :      * mapped catalog, because it's possible that we'll commit the map change
    1202                 :      * and then fail to commit the pg_class update.
    1203                 :      */
    1204                 : 
    1205                 :     /* set rel1's frozen Xid and minimum MultiXid */
    1206 GIC         858 :     if (relform1->relkind != RELKIND_INDEX)
    1207                 :     {
    1208             771 :         Assert(!TransactionIdIsValid(frozenXid) ||
    1209                 :                TransactionIdIsNormal(frozenXid));
    1210             771 :         relform1->relfrozenxid = frozenXid;
    1211             771 :         relform1->relminmxid = cutoffMulti;
    1212                 :     }
    1213 ECB             : 
    1214                 :     /* swap size statistics too, since new rel has freshly-updated stats */
    1215                 :     {
    1216                 :         int32       swap_pages;
    1217                 :         float4      swap_tuples;
    1218                 :         int32       swap_allvisible;
    1219                 : 
    1220 GIC         858 :         swap_pages = relform1->relpages;
    1221             858 :         relform1->relpages = relform2->relpages;
    1222             858 :         relform2->relpages = swap_pages;
    1223                 : 
    1224             858 :         swap_tuples = relform1->reltuples;
    1225             858 :         relform1->reltuples = relform2->reltuples;
    1226             858 :         relform2->reltuples = swap_tuples;
    1227 ECB             : 
    1228 CBC         858 :         swap_allvisible = relform1->relallvisible;
    1229             858 :         relform1->relallvisible = relform2->relallvisible;
    1230 GIC         858 :         relform2->relallvisible = swap_allvisible;
    1231 ECB             :     }
    1232                 : 
    1233                 :     /*
    1234                 :      * Update the tuples in pg_class --- unless the target relation of the
    1235                 :      * swap is pg_class itself.  In that case, there is zero point in making
    1236                 :      * changes because we'd be updating the old data that we're about to throw
    1237                 :      * away.  Because the real work being done here for a mapped relation is
    1238                 :      * just to change the relation map settings, it's all right to not update
    1239                 :      * the pg_class rows in this case. The most important changes will instead
    1240                 :      * performed later, in finish_heap_swap() itself.
    1241                 :      */
    1242 GIC         858 :     if (!target_is_pg_class)
    1243                 :     {
    1244                 :         CatalogIndexState indstate;
    1245                 : 
    1246             846 :         indstate = CatalogOpenIndexes(relRelation);
    1247             846 :         CatalogTupleUpdateWithInfo(relRelation, &reltup1->t_self, reltup1,
    1248                 :                                    indstate);
    1249 CBC         846 :         CatalogTupleUpdateWithInfo(relRelation, &reltup2->t_self, reltup2,
    1250                 :                                    indstate);
    1251 GIC         846 :         CatalogCloseIndexes(indstate);
    1252                 :     }
    1253 ECB             :     else
    1254                 :     {
    1255                 :         /* no update ... but we do still need relcache inval */
    1256 CBC          12 :         CacheInvalidateRelcacheByTuple(reltup1);
    1257 GIC          12 :         CacheInvalidateRelcacheByTuple(reltup2);
    1258 ECB             :     }
    1259                 : 
    1260                 :     /*
    1261                 :      * Post alter hook for modified relations. The change to r2 is always
    1262                 :      * internal, but r1 depends on the invocation context.
    1263                 :      */
    1264 CBC         858 :     InvokeObjectPostAlterHookArg(RelationRelationId, r1, 0,
    1265                 :                                  InvalidOid, is_internal);
    1266 GIC         858 :     InvokeObjectPostAlterHookArg(RelationRelationId, r2, 0,
    1267                 :                                  InvalidOid, true);
    1268                 : 
    1269                 :     /*
    1270                 :      * If we have toast tables associated with the relations being swapped,
    1271 ECB             :      * deal with them too.
    1272                 :      */
    1273 CBC         858 :     if (relform1->reltoastrelid || relform2->reltoastrelid)
    1274                 :     {
    1275 GIC         273 :         if (swap_toast_by_content)
    1276                 :         {
    1277              87 :             if (relform1->reltoastrelid && relform2->reltoastrelid)
    1278                 :             {
    1279                 :                 /* Recursively swap the contents of the toast tables */
    1280 CBC          87 :                 swap_relation_files(relform1->reltoastrelid,
    1281                 :                                     relform2->reltoastrelid,
    1282 ECB             :                                     target_is_pg_class,
    1283                 :                                     swap_toast_by_content,
    1284                 :                                     is_internal,
    1285                 :                                     frozenXid,
    1286                 :                                     cutoffMulti,
    1287                 :                                     mapped_tables);
    1288                 :             }
    1289                 :             else
    1290                 :             {
    1291                 :                 /* caller messed up */
    1292 UIC           0 :                 elog(ERROR, "cannot swap toast files by content when there's only one");
    1293                 :             }
    1294                 :         }
    1295                 :         else
    1296                 :         {
    1297                 :             /*
    1298                 :              * We swapped the ownership links, so we need to change dependency
    1299 EUB             :              * data to match.
    1300                 :              *
    1301                 :              * NOTE: it is possible that only one table has a toast table.
    1302                 :              *
    1303                 :              * NOTE: at present, a TOAST table's only dependency is the one on
    1304                 :              * its owning table.  If more are ever created, we'd need to use
    1305                 :              * something more selective than deleteDependencyRecordsFor() to
    1306                 :              * get rid of just the link we want.
    1307                 :              */
    1308                 :             ObjectAddress baseobject,
    1309                 :                         toastobject;
    1310                 :             long        count;
    1311                 : 
    1312                 :             /*
    1313                 :              * We disallow this case for system catalogs, to avoid the
    1314                 :              * possibility that the catalog we're rebuilding is one of the
    1315                 :              * ones the dependency changes would change.  It's too late to be
    1316                 :              * making any data changes to the target catalog.
    1317                 :              */
    1318 GIC         186 :             if (IsSystemClass(r1, relform1))
    1319 UIC           0 :                 elog(ERROR, "cannot swap toast files by links for system catalogs");
    1320                 : 
    1321                 :             /* Delete old dependencies */
    1322 GIC         186 :             if (relform1->reltoastrelid)
    1323                 :             {
    1324             170 :                 count = deleteDependencyRecordsFor(RelationRelationId,
    1325 ECB             :                                                    relform1->reltoastrelid,
    1326 EUB             :                                                    false);
    1327 GIC         170 :                 if (count != 1)
    1328 UIC           0 :                     elog(ERROR, "expected one dependency record for TOAST table, found %ld",
    1329 ECB             :                          count);
    1330                 :             }
    1331 CBC         186 :             if (relform2->reltoastrelid)
    1332                 :             {
    1333 GIC         186 :                 count = deleteDependencyRecordsFor(RelationRelationId,
    1334 ECB             :                                                    relform2->reltoastrelid,
    1335 EUB             :                                                    false);
    1336 GIC         186 :                 if (count != 1)
    1337 UIC           0 :                     elog(ERROR, "expected one dependency record for TOAST table, found %ld",
    1338 ECB             :                          count);
    1339                 :             }
    1340                 : 
    1341                 :             /* Register new dependencies */
    1342 GIC         186 :             baseobject.classId = RelationRelationId;
    1343 CBC         186 :             baseobject.objectSubId = 0;
    1344 GBC         186 :             toastobject.classId = RelationRelationId;
    1345 GIC         186 :             toastobject.objectSubId = 0;
    1346                 : 
    1347             186 :             if (relform1->reltoastrelid)
    1348                 :             {
    1349 CBC         170 :                 baseobject.objectId = r1;
    1350             170 :                 toastobject.objectId = relform1->reltoastrelid;
    1351             170 :                 recordDependencyOn(&toastobject, &baseobject,
    1352 ECB             :                                    DEPENDENCY_INTERNAL);
    1353                 :             }
    1354                 : 
    1355 GIC         186 :             if (relform2->reltoastrelid)
    1356 ECB             :             {
    1357 CBC         186 :                 baseobject.objectId = r2;
    1358             186 :                 toastobject.objectId = relform2->reltoastrelid;
    1359 GIC         186 :                 recordDependencyOn(&toastobject, &baseobject,
    1360                 :                                    DEPENDENCY_INTERNAL);
    1361                 :             }
    1362 ECB             :         }
    1363                 :     }
    1364                 : 
    1365                 :     /*
    1366                 :      * If we're swapping two toast tables by content, do the same for their
    1367                 :      * valid index. The swap can actually be safely done only if the relations
    1368                 :      * have indexes.
    1369                 :      */
    1370 GIC         858 :     if (swap_toast_by_content &&
    1371             261 :         relform1->relkind == RELKIND_TOASTVALUE &&
    1372              87 :         relform2->relkind == RELKIND_TOASTVALUE)
    1373                 :     {
    1374                 :         Oid         toastIndex1,
    1375                 :                     toastIndex2;
    1376                 : 
    1377 ECB             :         /* Get valid index for each relation */
    1378 CBC          87 :         toastIndex1 = toast_get_valid_index(r1,
    1379 ECB             :                                             AccessExclusiveLock);
    1380 GIC          87 :         toastIndex2 = toast_get_valid_index(r2,
    1381                 :                                             AccessExclusiveLock);
    1382                 : 
    1383              87 :         swap_relation_files(toastIndex1,
    1384                 :                             toastIndex2,
    1385 ECB             :                             target_is_pg_class,
    1386                 :                             swap_toast_by_content,
    1387                 :                             is_internal,
    1388                 :                             InvalidTransactionId,
    1389                 :                             InvalidMultiXactId,
    1390                 :                             mapped_tables);
    1391                 :     }
    1392                 : 
    1393                 :     /* Clean up. */
    1394 GIC         858 :     heap_freetuple(reltup1);
    1395             858 :     heap_freetuple(reltup2);
    1396                 : 
    1397             858 :     table_close(relRelation, RowExclusiveLock);
    1398                 : 
    1399                 :     /*
    1400                 :      * Close both relcache entries' smgr links.  We need this kluge because
    1401 ECB             :      * both links will be invalidated during upcoming CommandCounterIncrement.
    1402                 :      * Whichever of the rels is the second to be cleared will have a dangling
    1403                 :      * reference to the other's smgr entry.  Rather than trying to avoid this
    1404                 :      * by ordering operations just so, it's easiest to close the links first.
    1405                 :      * (Fortunately, since one of the entries is local in our transaction,
    1406                 :      * it's sufficient to clear out our own relcache this way; the problem
    1407                 :      * cannot arise for other backends when they see our update on the
    1408                 :      * non-transient relation.)
    1409                 :      *
    1410                 :      * Caution: the placement of this step interacts with the decision to
    1411                 :      * handle toast rels by recursion.  When we are trying to rebuild pg_class
    1412                 :      * itself, the smgr close on pg_class must happen after all accesses in
    1413                 :      * this function.
    1414                 :      */
    1415 GIC         858 :     RelationCloseSmgrByOid(r1);
    1416             858 :     RelationCloseSmgrByOid(r2);
    1417             858 : }
    1418                 : 
    1419                 : /*
    1420                 :  * Remove the transient table that was built by make_new_heap, and finish
    1421                 :  * cleaning up (including rebuilding all indexes on the old heap).
    1422 ECB             :  */
    1423                 : void
    1424 CBC         684 : finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
    1425                 :                  bool is_system_catalog,
    1426                 :                  bool swap_toast_by_content,
    1427                 :                  bool check_constraints,
    1428                 :                  bool is_internal,
    1429                 :                  TransactionId frozenXid,
    1430                 :                  MultiXactId cutoffMulti,
    1431 ECB             :                  char newrelpersistence)
    1432                 : {
    1433                 :     ObjectAddress object;
    1434                 :     Oid         mapped_tables[4];
    1435                 :     int         reindex_flags;
    1436 GIC         684 :     ReindexParams reindex_params = {0};
    1437                 :     int         i;
    1438                 : 
    1439                 :     /* Report that we are now swapping relation files */
    1440             684 :     pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
    1441                 :                                  PROGRESS_CLUSTER_PHASE_SWAP_REL_FILES);
    1442                 : 
    1443 ECB             :     /* Zero out possible results from swapped_relation_files */
    1444 GIC         684 :     memset(mapped_tables, 0, sizeof(mapped_tables));
    1445                 : 
    1446                 :     /*
    1447 ECB             :      * Swap the contents of the heap relations (including any toast tables).
    1448                 :      * Also set old heap's relfrozenxid to frozenXid.
    1449                 :      */
    1450 GIC         684 :     swap_relation_files(OIDOldHeap, OIDNewHeap,
    1451 ECB             :                         (OIDOldHeap == RelationRelationId),
    1452                 :                         swap_toast_by_content, is_internal,
    1453                 :                         frozenXid, cutoffMulti, mapped_tables);
    1454                 : 
    1455                 :     /*
    1456                 :      * If it's a system catalog, queue a sinval message to flush all catcaches
    1457                 :      * on the catalog when we reach CommandCounterIncrement.
    1458                 :      */
    1459 GIC         684 :     if (is_system_catalog)
    1460             100 :         CacheInvalidateCatalog(OIDOldHeap);
    1461                 : 
    1462                 :     /*
    1463                 :      * Rebuild each index on the relation (but not the toast table, which is
    1464                 :      * all-new at this point).  It is important to do this before the DROP
    1465                 :      * step because if we are processing a system catalog that will be used
    1466 ECB             :      * during DROP, we want to have its indexes available.  There is no
    1467                 :      * advantage to the other order anyway because this is all transactional,
    1468                 :      * so no chance to reclaim disk space before commit.  We do not need a
    1469                 :      * final CommandCounterIncrement() because reindex_relation does it.
    1470                 :      *
    1471                 :      * Note: because index_build is called via reindex_relation, it will never
    1472                 :      * set indcheckxmin true for the indexes.  This is OK even though in some
    1473                 :      * sense we are building new indexes rather than rebuilding existing ones,
    1474                 :      * because the new heap won't contain any HOT chains at all, let alone
    1475                 :      * broken ones, so it can't be necessary to set indcheckxmin.
    1476                 :      */
    1477 GIC         684 :     reindex_flags = REINDEX_REL_SUPPRESS_INDEX_USE;
    1478             684 :     if (check_constraints)
    1479             422 :         reindex_flags |= REINDEX_REL_CHECK_CONSTRAINTS;
    1480                 : 
    1481                 :     /*
    1482                 :      * Ensure that the indexes have the same persistence as the parent
    1483                 :      * relation.
    1484 ECB             :      */
    1485 CBC         684 :     if (newrelpersistence == RELPERSISTENCE_UNLOGGED)
    1486              10 :         reindex_flags |= REINDEX_REL_FORCE_INDEXES_UNLOGGED;
    1487 GIC         674 :     else if (newrelpersistence == RELPERSISTENCE_PERMANENT)
    1488             637 :         reindex_flags |= REINDEX_REL_FORCE_INDEXES_PERMANENT;
    1489                 : 
    1490                 :     /* Report that we are now reindexing relations */
    1491             684 :     pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
    1492 ECB             :                                  PROGRESS_CLUSTER_PHASE_REBUILD_INDEX);
    1493                 : 
    1494 CBC         684 :     reindex_relation(OIDOldHeap, reindex_flags, &reindex_params);
    1495 ECB             : 
    1496                 :     /* Report that we are now doing clean up */
    1497 GIC         675 :     pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
    1498 ECB             :                                  PROGRESS_CLUSTER_PHASE_FINAL_CLEANUP);
    1499                 : 
    1500                 :     /*
    1501                 :      * If the relation being rebuilt is pg_class, swap_relation_files()
    1502                 :      * couldn't update pg_class's own pg_class entry (check comments in
    1503                 :      * swap_relation_files()), thus relfrozenxid was not updated. That's
    1504                 :      * annoying because a potential reason for doing a VACUUM FULL is a
    1505                 :      * imminent or actual anti-wraparound shutdown.  So, now that we can
    1506                 :      * access the new relation using its indices, update relfrozenxid.
    1507                 :      * pg_class doesn't have a toast relation, so we don't need to update the
    1508                 :      * corresponding toast relation. Not that there's little point moving all
    1509                 :      * relfrozenxid updates here since swap_relation_files() needs to write to
    1510                 :      * pg_class for non-mapped relations anyway.
    1511                 :      */
    1512 GIC         675 :     if (OIDOldHeap == RelationRelationId)
    1513                 :     {
    1514                 :         Relation    relRelation;
    1515                 :         HeapTuple   reltup;
    1516                 :         Form_pg_class relform;
    1517                 : 
    1518              12 :         relRelation = table_open(RelationRelationId, RowExclusiveLock);
    1519 ECB             : 
    1520 GIC          12 :         reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDOldHeap));
    1521              12 :         if (!HeapTupleIsValid(reltup))
    1522 UIC           0 :             elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
    1523 GIC          12 :         relform = (Form_pg_class) GETSTRUCT(reltup);
    1524                 : 
    1525 CBC          12 :         relform->relfrozenxid = frozenXid;
    1526 GIC          12 :         relform->relminmxid = cutoffMulti;
    1527 ECB             : 
    1528 CBC          12 :         CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
    1529 EUB             : 
    1530 CBC          12 :         table_close(relRelation, RowExclusiveLock);
    1531                 :     }
    1532 ECB             : 
    1533                 :     /* Destroy new heap with old filenumber */
    1534 GIC         675 :     object.classId = RelationRelationId;
    1535 CBC         675 :     object.objectId = OIDNewHeap;
    1536 GIC         675 :     object.objectSubId = 0;
    1537 ECB             : 
    1538                 :     /*
    1539                 :      * The new relation is local to our transaction and we know nothing
    1540                 :      * depends on it, so DROP_RESTRICT should be OK.
    1541                 :      */
    1542 CBC         675 :     performDeletion(&object, DROP_RESTRICT, PERFORM_DELETION_INTERNAL);
    1543 ECB             : 
    1544                 :     /* performDeletion does CommandCounterIncrement at end */
    1545                 : 
    1546                 :     /*
    1547                 :      * Now we must remove any relation mapping entries that we set up for the
    1548                 :      * transient table, as well as its toast table and toast index if any. If
    1549                 :      * we fail to do this before commit, the relmapper will complain about new
    1550                 :      * permanent map entries being added post-bootstrap.
    1551                 :      */
    1552 GIC         753 :     for (i = 0; OidIsValid(mapped_tables[i]); i++)
    1553              78 :         RelationMapRemoveMapping(mapped_tables[i]);
    1554                 : 
    1555                 :     /*
    1556                 :      * At this point, everything is kosher except that, if we did toast swap
    1557                 :      * by links, the toast table's name corresponds to the transient table.
    1558                 :      * The name is irrelevant to the backend because it's referenced by OID,
    1559 ECB             :      * but users looking at the catalogs could be confused.  Rename it to
    1560                 :      * prevent this problem.
    1561                 :      *
    1562                 :      * Note no lock required on the relation, because we already hold an
    1563                 :      * exclusive lock on it.
    1564                 :      */
    1565 GIC         675 :     if (!swap_toast_by_content)
    1566                 :     {
    1567                 :         Relation    newrel;
    1568                 : 
    1569             588 :         newrel = table_open(OIDOldHeap, NoLock);
    1570             588 :         if (OidIsValid(newrel->rd_rel->reltoastrelid))
    1571                 :         {
    1572 ECB             :             Oid         toastidx;
    1573                 :             char        NewToastName[NAMEDATALEN];
    1574                 : 
    1575                 :             /* Get the associated valid index to be renamed */
    1576 CBC         170 :             toastidx = toast_get_valid_index(newrel->rd_rel->reltoastrelid,
    1577 ECB             :                                              NoLock);
    1578                 : 
    1579                 :             /* rename the toast table ... */
    1580 GIC         170 :             snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u",
    1581                 :                      OIDOldHeap);
    1582             170 :             RenameRelationInternal(newrel->rd_rel->reltoastrelid,
    1583 ECB             :                                    NewToastName, true, false);
    1584                 : 
    1585                 :             /* ... and its valid index too. */
    1586 GIC         170 :             snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index",
    1587 ECB             :                      OIDOldHeap);
    1588                 : 
    1589 CBC         170 :             RenameRelationInternal(toastidx,
    1590                 :                                    NewToastName, true, true);
    1591                 : 
    1592                 :             /*
    1593 ECB             :              * Reset the relrewrite for the toast. The command-counter
    1594                 :              * increment is required here as we are about to update the tuple
    1595                 :              * that is updated as part of RenameRelationInternal.
    1596                 :              */
    1597 GIC         170 :             CommandCounterIncrement();
    1598             170 :             ResetRelRewrite(newrel->rd_rel->reltoastrelid);
    1599                 :         }
    1600             588 :         relation_close(newrel, NoLock);
    1601                 :     }
    1602                 : 
    1603                 :     /* if it's not a catalog table, clear any missing attribute settings */
    1604 CBC         675 :     if (!is_system_catalog)
    1605 ECB             :     {
    1606                 :         Relation    newrel;
    1607                 : 
    1608 GIC         575 :         newrel = table_open(OIDOldHeap, NoLock);
    1609             575 :         RelationClearMissing(newrel);
    1610             575 :         relation_close(newrel, NoLock);
    1611 ECB             :     }
    1612 GIC         675 : }
    1613                 : 
    1614                 : 
    1615 ECB             : /*
    1616                 :  * Get a list of tables that the current user has privileges on and
    1617                 :  * have indisclustered set.  Return the list in a List * of RelToCluster
    1618                 :  * (stored in the specified memory context), each one giving the tableOid
    1619                 :  * and the indexOid on which the table is already clustered.
    1620                 :  */
    1621                 : static List *
    1622 GIC          12 : get_tables_to_cluster(MemoryContext cluster_context)
    1623                 : {
    1624                 :     Relation    indRelation;
    1625                 :     TableScanDesc scan;
    1626                 :     ScanKeyData entry;
    1627                 :     HeapTuple   indexTuple;
    1628                 :     Form_pg_index index;
    1629 ECB             :     MemoryContext old_context;
    1630 GIC          12 :     List       *rtcs = NIL;
    1631                 : 
    1632                 :     /*
    1633                 :      * Get all indexes that have indisclustered set and that the current user
    1634                 :      * has the appropriate privileges for.
    1635                 :      */
    1636              12 :     indRelation = table_open(IndexRelationId, AccessShareLock);
    1637 CBC          12 :     ScanKeyInit(&entry,
    1638                 :                 Anum_pg_index_indisclustered,
    1639                 :                 BTEqualStrategyNumber, F_BOOLEQ,
    1640                 :                 BoolGetDatum(true));
    1641 GIC          12 :     scan = table_beginscan_catalog(indRelation, 1, &entry);
    1642              21 :     while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
    1643 ECB             :     {
    1644                 :         RelToCluster *rtc;
    1645                 : 
    1646 GIC           9 :         index = (Form_pg_index) GETSTRUCT(indexTuple);
    1647                 : 
    1648 GNC           9 :         if (!cluster_is_permitted_for_relation(index->indrelid, GetUserId()))
    1649 CBC           6 :             continue;
    1650                 : 
    1651                 :         /* Use a permanent memory context for the result list */
    1652 GIC           3 :         old_context = MemoryContextSwitchTo(cluster_context);
    1653 ECB             : 
    1654 GIC           3 :         rtc = (RelToCluster *) palloc(sizeof(RelToCluster));
    1655 CBC           3 :         rtc->tableOid = index->indrelid;
    1656               3 :         rtc->indexOid = index->indexrelid;
    1657 GIC           3 :         rtcs = lappend(rtcs, rtc);
    1658                 : 
    1659 CBC           3 :         MemoryContextSwitchTo(old_context);
    1660                 :     }
    1661              12 :     table_endscan(scan);
    1662 ECB             : 
    1663 CBC          12 :     relation_close(indRelation, AccessShareLock);
    1664 ECB             : 
    1665 GIC          12 :     return rtcs;
    1666 ECB             : }
    1667                 : 
    1668                 : /*
    1669                 :  * Given an index on a partitioned table, return a list of RelToCluster for
    1670                 :  * all the children leaves tables/indexes.
    1671                 :  *
    1672                 :  * Like expand_vacuum_rel, but here caller must hold AccessExclusiveLock
    1673                 :  * on the table containing the index.
    1674                 :  */
    1675                 : static List *
    1676 GIC          10 : get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid)
    1677                 : {
    1678                 :     List       *inhoids;
    1679                 :     ListCell   *lc;
    1680              10 :     List       *rtcs = NIL;
    1681                 :     MemoryContext old_context;
    1682                 : 
    1683 ECB             :     /* Do not lock the children until they're processed */
    1684 GIC          10 :     inhoids = find_all_inheritors(indexOid, NoLock, NULL);
    1685                 : 
    1686              52 :     foreach(lc, inhoids)
    1687 ECB             :     {
    1688 GIC          42 :         Oid         indexrelid = lfirst_oid(lc);
    1689              42 :         Oid         relid = IndexGetRelation(indexrelid, false);
    1690                 :         RelToCluster *rtc;
    1691 ECB             : 
    1692                 :         /* consider only leaf indexes */
    1693 CBC          42 :         if (get_rel_relkind(indexrelid) != RELKIND_INDEX)
    1694 GIC          19 :             continue;
    1695 ECB             : 
    1696                 :         /*
    1697                 :          * We already checked that the user has privileges to CLUSTER the
    1698                 :          * partitioned table when we locked it earlier, so there's no need to
    1699                 :          * check the privileges again here.
    1700                 :          */
    1701                 : 
    1702                 :         /* Use a permanent memory context for the result list */
    1703 GIC          23 :         old_context = MemoryContextSwitchTo(cluster_context);
    1704                 : 
    1705              23 :         rtc = (RelToCluster *) palloc(sizeof(RelToCluster));
    1706              23 :         rtc->tableOid = relid;
    1707              23 :         rtc->indexOid = indexrelid;
    1708              23 :         rtcs = lappend(rtcs, rtc);
    1709                 : 
    1710 CBC          23 :         MemoryContextSwitchTo(old_context);
    1711                 :     }
    1712 ECB             : 
    1713 CBC          10 :     return rtcs;
    1714 ECB             : }
    1715                 : 
    1716                 : /*
    1717                 :  * Return whether userid has privileges to CLUSTER relid.  If not, this
    1718                 :  * function emits a WARNING.
    1719                 :  */
    1720                 : static bool
    1721 GNC          35 : cluster_is_permitted_for_relation(Oid relid, Oid userid)
    1722                 : {
    1723              52 :     if (pg_class_aclcheck(relid, userid, ACL_MAINTAIN) == ACLCHECK_OK ||
    1724              17 :         has_partition_ancestor_privs(relid, userid, ACL_MAINTAIN))
    1725              29 :         return true;
    1726                 : 
    1727               6 :     ereport(WARNING,
    1728                 :             (errmsg("permission denied to cluster \"%s\", skipping it",
    1729                 :                     get_rel_name(relid))));
    1730               6 :     return false;
    1731                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a