Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * cluster.c
4 : * CLUSTER a table on an index. This is now also used for VACUUM FULL.
5 : *
6 : * There is hardly anything left of Paul Brown's original implementation...
7 : *
8 : *
9 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
10 : * Portions Copyright (c) 1994-5, Regents of the University of California
11 : *
12 : *
13 : * IDENTIFICATION
14 : * src/backend/commands/cluster.c
15 : *
16 : *-------------------------------------------------------------------------
17 : */
18 : #include "postgres.h"
19 :
20 : #include "access/amapi.h"
21 : #include "access/heapam.h"
22 : #include "access/multixact.h"
23 : #include "access/relscan.h"
24 : #include "access/tableam.h"
25 : #include "access/toast_internals.h"
26 : #include "access/transam.h"
27 : #include "access/xact.h"
28 : #include "access/xlog.h"
29 : #include "catalog/catalog.h"
30 : #include "catalog/dependency.h"
31 : #include "catalog/heap.h"
32 : #include "catalog/index.h"
33 : #include "catalog/namespace.h"
34 : #include "catalog/objectaccess.h"
35 : #include "catalog/partition.h"
36 : #include "catalog/pg_am.h"
37 : #include "catalog/pg_database.h"
38 : #include "catalog/pg_inherits.h"
39 : #include "catalog/toasting.h"
40 : #include "commands/cluster.h"
41 : #include "commands/defrem.h"
42 : #include "commands/progress.h"
43 : #include "commands/tablecmds.h"
44 : #include "commands/vacuum.h"
45 : #include "miscadmin.h"
46 : #include "optimizer/optimizer.h"
47 : #include "pgstat.h"
48 : #include "storage/bufmgr.h"
49 : #include "storage/lmgr.h"
50 : #include "storage/predicate.h"
51 : #include "utils/acl.h"
52 : #include "utils/fmgroids.h"
53 : #include "utils/guc.h"
54 : #include "utils/inval.h"
55 : #include "utils/lsyscache.h"
56 : #include "utils/memutils.h"
57 : #include "utils/pg_rusage.h"
58 : #include "utils/relmapper.h"
59 : #include "utils/snapmgr.h"
60 : #include "utils/syscache.h"
61 : #include "utils/tuplesort.h"
62 :
63 : /*
64 : * This struct is used to pass around the information on tables to be
65 : * clustered. We need this so we can make a list of them when invoked without
66 : * a specific table/index pair.
67 : */
68 : typedef struct
69 : {
70 : Oid tableOid;
71 : Oid indexOid;
72 : } RelToCluster;
73 :
74 :
75 : static void cluster_multiple_rels(List *rtcs, ClusterParams *params);
76 : static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose);
77 : static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
78 : bool verbose, bool *pSwapToastByContent,
79 : TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
80 : static List *get_tables_to_cluster(MemoryContext cluster_context);
81 : static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context,
82 : Oid indexOid);
83 : static bool cluster_is_permitted_for_relation(Oid relid, Oid userid);
84 :
85 :
86 : /*---------------------------------------------------------------------------
87 : * This cluster code allows for clustering multiple tables at once. Because
88 : * of this, we cannot just run everything on a single transaction, or we
89 : * would be forced to acquire exclusive locks on all the tables being
90 : * clustered, simultaneously --- very likely leading to deadlock.
91 : *
92 : * To solve this we follow a similar strategy to VACUUM code,
93 : * clustering each relation in a separate transaction. For this to work,
94 : * we need to:
95 : * - provide a separate memory context so that we can pass information in
96 : * a way that survives across transactions
97 : * - start a new transaction every time a new relation is clustered
98 : * - check for validity of the information on to-be-clustered relations,
99 : * as someone might have deleted a relation behind our back, or
100 : * clustered one on a different index
101 : * - end the transaction
102 : *
103 : * The single-relation case does not have any such overhead.
104 : *
105 : * We also allow a relation to be specified without index. In that case,
106 : * the indisclustered bit will be looked up, and an ERROR will be thrown
107 : * if there is no index with the bit set.
108 : *---------------------------------------------------------------------------
109 : */
110 : void
857 michael 111 GIC 106 : cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel)
112 : {
113 : ListCell *lc;
811 michael 114 CBC 106 : ClusterParams params = {0};
857 michael 115 GIC 106 : bool verbose = false;
372 alvherre 116 106 : Relation rel = NULL;
372 alvherre 117 CBC 106 : Oid indexOid = InvalidOid;
372 alvherre 118 ECB : MemoryContext cluster_context;
119 : List *rtcs;
857 michael 120 :
121 : /* Parse option list */
857 michael 122 GIC 112 : foreach(lc, stmt->params)
123 : {
124 6 : DefElem *opt = (DefElem *) lfirst(lc);
857 michael 125 ECB :
857 michael 126 GIC 6 : if (strcmp(opt->defname, "verbose") == 0)
857 michael 127 CBC 6 : verbose = defGetBoolean(opt);
128 : else
857 michael 129 LBC 0 : ereport(ERROR,
857 michael 130 ECB : (errcode(ERRCODE_SYNTAX_ERROR),
131 : errmsg("unrecognized CLUSTER option \"%s\"",
857 michael 132 EUB : opt->defname),
133 : parser_errposition(pstate, opt->location)));
134 : }
135 :
811 michael 136 GIC 106 : params.options = (verbose ? CLUOPT_VERBOSE : 0);
137 :
7405 tgl 138 106 : if (stmt->relation != NULL)
7405 tgl 139 ECB : {
140 : /* This is the single-relation case. */
372 alvherre 141 : Oid tableOid;
142 :
143 : /*
144 : * Find, lock, and check permissions on the table. We obtain
145 : * AccessExclusiveLock right away to avoid lock-upgrade hazard in the
146 : * single-transaction case.
147 : */
4127 rhaas 148 GIC 94 : tableOid = RangeVarGetRelidExtended(stmt->relation,
149 : AccessExclusiveLock,
150 : 0,
151 : RangeVarCallbackMaintainsTable,
152 : NULL);
1539 andres 153 91 : rel = table_open(tableOid, NoLock);
154 :
155 : /*
156 : * Reject clustering a remote temp table ... their local buffer
5624 bruce 157 ECB : * manager is not going to cope.
158 : */
5122 tgl 159 GIC 91 : if (RELATION_IS_OTHER_TEMP(rel))
5690 alvherre 160 UIC 0 : ereport(ERROR,
161 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
162 : errmsg("cannot cluster temporary tables of other sessions")));
5690 alvherre 163 ECB :
7405 tgl 164 GBC 91 : if (stmt->indexname == NULL)
165 : {
166 : ListCell *index;
167 :
7405 tgl 168 ECB : /* We need to find the index that has indisclustered set. */
7188 bruce 169 GIC 21 : foreach(index, RelationGetIndexList(rel))
170 : {
6892 neilc 171 15 : indexOid = lfirst_oid(index);
1098 michael 172 15 : if (get_index_isclustered(indexOid))
7405 tgl 173 CBC 9 : break;
7405 tgl 174 GIC 6 : indexOid = InvalidOid;
7405 tgl 175 ECB : }
176 :
7405 tgl 177 CBC 15 : if (!OidIsValid(indexOid))
7203 178 6 : ereport(ERROR,
179 : (errcode(ERRCODE_UNDEFINED_OBJECT),
180 : errmsg("there is no previously clustered index for table \"%s\"",
7203 tgl 181 ECB : stmt->relation->relname)));
7405 182 : }
183 : else
184 : {
185 : /*
186 : * The index is expected to be in the same namespace as the
187 : * relation.
188 : */
7405 tgl 189 GIC 76 : indexOid = get_relname_relid(stmt->indexname,
190 76 : rel->rd_rel->relnamespace);
191 76 : if (!OidIsValid(indexOid))
7203 tgl 192 UIC 0 : ereport(ERROR,
7203 tgl 193 ECB : (errcode(ERRCODE_UNDEFINED_OBJECT),
2118 194 : errmsg("index \"%s\" for table \"%s\" does not exist",
195 : stmt->indexname, stmt->relation->relname)));
7405 tgl 196 EUB : }
197 :
372 alvherre 198 GIC 85 : if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
199 : {
200 : /* close relation, keep lock till commit */
201 72 : table_close(rel, NoLock);
7405 tgl 202 ECB :
203 : /* Do the job. */
372 alvherre 204 GIC 72 : cluster_rel(tableOid, indexOid, ¶ms);
372 alvherre 205 ECB :
372 alvherre 206 GIC 72 : return;
207 : }
372 alvherre 208 ECB : }
209 :
210 : /*
211 : * By here, we know we are in a multi-table situation. In order to avoid
212 : * holding locks for too long, we want to process each table in its own
213 : * transaction. This forces us to disallow running inside a user
214 : * transaction block.
215 : */
372 alvherre 216 GIC 25 : PreventInTransactionBlock(isTopLevel, "CLUSTER");
217 :
218 : /* Also, we need a memory context to hold our list of relations */
219 25 : cluster_context = AllocSetContextCreate(PortalContext,
372 alvherre 220 ECB : "Cluster",
221 : ALLOCSET_DEFAULT_SIZES);
222 :
223 : /*
224 : * Either we're processing a partitioned table, or we were not given any
225 : * table name at all. In either case, obtain a list of relations to
226 : * process.
227 : *
228 : * In the former case, an index name must have been given, so we don't
229 : * need to recheck its "indisclustered" bit, but we have to check that it
230 : * is an index that we can cluster on. In the latter case, we set the
231 : * option bit to have indisclustered verified.
232 : *
233 : * Rechecking the relation itself is necessary here in all cases.
234 : */
372 alvherre 235 GIC 25 : params.options |= CLUOPT_RECHECK;
236 25 : if (rel != NULL)
237 : {
238 13 : Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
361 michael 239 CBC 13 : check_index_is_clusterable(rel, indexOid, AccessShareLock);
372 alvherre 240 10 : rtcs = get_tables_to_cluster_partitioned(cluster_context, indexOid);
241 :
372 alvherre 242 ECB : /* close relation, releasing lock on parent table */
372 alvherre 243 CBC 10 : table_close(rel, AccessExclusiveLock);
7405 tgl 244 ECB : }
245 : else
246 : {
372 alvherre 247 CBC 12 : rtcs = get_tables_to_cluster(cluster_context);
372 alvherre 248 GIC 12 : params.options |= CLUOPT_RECHECK_ISCLUSTERED;
249 : }
250 :
372 alvherre 251 ECB : /* Do the job. */
372 alvherre 252 CBC 22 : cluster_multiple_rels(rtcs, ¶ms);
253 :
254 : /* Start a new transaction for the cleanup work. */
372 alvherre 255 GIC 22 : StartTransactionCommand();
7405 tgl 256 ECB :
257 : /* Clean up working storage */
372 alvherre 258 GIC 22 : MemoryContextDelete(cluster_context);
372 alvherre 259 ECB : }
260 :
261 : /*
262 : * Given a list of relations to cluster, process each of them in a separate
263 : * transaction.
264 : *
265 : * We expect to be in a transaction at start, but there isn't one when we
266 : * return.
267 : */
268 : static void
372 alvherre 269 GIC 22 : cluster_multiple_rels(List *rtcs, ClusterParams *params)
270 : {
271 : ListCell *lc;
272 :
372 alvherre 273 ECB : /* Commit to get out of starting transaction */
372 alvherre 274 GIC 22 : PopActiveSnapshot();
275 22 : CommitTransactionCommand();
276 :
277 : /* Cluster the tables, each in a separate transaction */
372 alvherre 278 CBC 48 : foreach(lc, rtcs)
372 alvherre 279 ECB : {
372 alvherre 280 GIC 26 : RelToCluster *rtc = (RelToCluster *) lfirst(lc);
281 :
372 alvherre 282 ECB : /* Start a new transaction for each relation. */
7270 tgl 283 GIC 26 : StartTransactionCommand();
7547 bruce 284 ECB :
285 : /* functions in indexes may want a snapshot set */
372 alvherre 286 GIC 26 : PushActiveSnapshot(GetTransactionSnapshot());
372 alvherre 287 ECB :
288 : /* Do the job. */
372 alvherre 289 GIC 26 : cluster_rel(rtc->tableOid, rtc->indexOid, params);
372 alvherre 290 ECB :
372 alvherre 291 GIC 26 : PopActiveSnapshot();
292 26 : CommitTransactionCommand();
7405 tgl 293 ECB : }
7405 tgl 294 GIC 22 : }
9364 bruce 295 ECB :
9770 scrappy 296 : /*
297 : * cluster_rel
298 : *
299 : * This clusters the table by creating a new, clustered table and
300 : * swapping the relfilenumbers of the new table and the old table, so
301 : * the OID of the original table is preserved. Thus we do not lose
302 : * GRANT, inheritance nor references to this table (this was a bug
303 : * in releases through 7.3).
304 : *
305 : * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading
306 : * the new table, it's better to create the indexes afterwards than to fill
307 : * them incrementally while we load the table.
308 : *
309 : * If indexOid is InvalidOid, the table will be rewritten in physical order
310 : * instead of index order. This is the new implementation of VACUUM FULL,
311 : * and error messages should refer to the operation as VACUUM not CLUSTER.
312 : */
313 : void
811 michael 314 GIC 262 : cluster_rel(Oid tableOid, Oid indexOid, ClusterParams *params)
315 : {
316 : Relation OldHeap;
317 : Oid save_userid;
335 noah 318 ECB : int save_sec_context;
319 : int save_nestlevel;
811 michael 320 GIC 262 : bool verbose = ((params->options & CLUOPT_VERBOSE) != 0);
321 262 : bool recheck = ((params->options & CLUOPT_RECHECK) != 0);
322 :
323 : /* Check for user-requested abort. */
7450 bruce 324 CBC 262 : CHECK_FOR_INTERRUPTS();
7450 bruce 325 ECB :
1476 rhaas 326 GIC 262 : pgstat_progress_start_command(PROGRESS_COMMAND_CLUSTER, tableOid);
327 262 : if (OidIsValid(indexOid))
1476 rhaas 328 CBC 98 : pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND,
329 : PROGRESS_CLUSTER_COMMAND_CLUSTER);
1476 rhaas 330 ECB : else
1476 rhaas 331 CBC 164 : pgstat_progress_update_param(PROGRESS_CLUSTER_COMMAND,
1476 rhaas 332 ECB : PROGRESS_CLUSTER_COMMAND_VACUUM_FULL);
333 :
334 : /*
6078 tgl 335 : * We grab exclusive access to the target rel and index for the duration
336 : * of the transaction. (This is redundant for the single-transaction
337 : * case, since cluster() already did it.) The index lock is taken inside
338 : * check_index_is_clusterable.
339 : */
4841 itagaki.takahiro 340 GIC 262 : OldHeap = try_relation_open(tableOid, AccessExclusiveLock);
341 :
342 : /* If the table has gone away, we can skip processing it */
6078 tgl 343 262 : if (!OldHeap)
1476 rhaas 344 ECB : {
1476 rhaas 345 UIC 0 : pgstat_progress_end_command();
6078 tgl 346 0 : return;
1476 rhaas 347 ECB : }
348 :
335 noah 349 EUB : /*
350 : * Switch to the table owner's userid, so that any index functions are run
351 : * as that user. Also lock down security-restricted operations and
352 : * arrange to make GUC variable changes local to this command.
353 : */
335 noah 354 GIC 262 : GetUserIdAndSecContext(&save_userid, &save_sec_context);
355 262 : SetUserIdAndSecContext(OldHeap->rd_rel->relowner,
356 : save_sec_context | SECURITY_RESTRICTED_OPERATION);
357 262 : save_nestlevel = NewGUCNestLevel();
335 noah 358 ECB :
7405 tgl 359 : /*
360 : * Since we may open a new transaction for each relation, we have to check
6385 bruce 361 : * that the relation still is what we think it is.
362 : *
363 : * If this is a single-transaction CLUSTER, we can skip these tests. We
364 : * *must* skip the one on indisclustered since it would reject an attempt
365 : * to cluster a not-previously-clustered index.
366 : */
7405 tgl 367 GIC 262 : if (recheck)
368 : {
369 : /* Check that the user still has privileges for the relation */
86 jdavis 370 GNC 26 : if (!cluster_is_permitted_for_relation(tableOid, save_userid))
6078 tgl 371 ECB : {
6078 tgl 372 UIC 0 : relation_close(OldHeap, AccessExclusiveLock);
335 noah 373 0 : goto out;
6078 tgl 374 ECB : }
375 :
5690 alvherre 376 EUB : /*
377 : * Silently skip a temp table for a remote session. Only doing this
378 : * check in the "recheck" case is appropriate (which currently means
379 : * somebody is executing a database-wide CLUSTER or on a partitioned
380 : * table), because there is another check in cluster() which will stop
381 : * any attempt to cluster remote temp tables by name. There is
382 : * another check in cluster_rel which is redundant, but we leave it
383 : * for extra safety.
384 : */
5122 tgl 385 GIC 26 : if (RELATION_IS_OTHER_TEMP(OldHeap))
386 : {
5690 alvherre 387 UIC 0 : relation_close(OldHeap, AccessExclusiveLock);
335 noah 388 0 : goto out;
5690 alvherre 389 ECB : }
390 :
4841 itagaki.takahiro 391 GBC 26 : if (OidIsValid(indexOid))
6078 tgl 392 EUB : {
393 : /*
394 : * Check that the index still exists
4841 itagaki.takahiro 395 ECB : */
4802 rhaas 396 GIC 26 : if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(indexOid)))
397 : {
4841 itagaki.takahiro 398 UIC 0 : relation_close(OldHeap, AccessExclusiveLock);
335 noah 399 0 : goto out;
4841 itagaki.takahiro 400 ECB : }
401 :
4841 itagaki.takahiro 402 EUB : /*
372 alvherre 403 : * Check that the index is still the one with indisclustered set,
404 : * if needed.
405 : */
372 alvherre 406 GIC 26 : if ((params->options & CLUOPT_RECHECK_ISCLUSTERED) != 0 &&
407 3 : !get_index_isclustered(indexOid))
408 : {
4841 itagaki.takahiro 409 UIC 0 : relation_close(OldHeap, AccessExclusiveLock);
335 noah 410 LBC 0 : goto out;
4841 itagaki.takahiro 411 ECB : }
412 : }
7450 bruce 413 EUB : }
414 :
415 : /*
416 : * We allow VACUUM FULL, but not CLUSTER, on shared catalogs. CLUSTER
417 : * would work in most respects, but the index would only get marked as
418 : * indisclustered in the current database, leading to unexpected behavior
419 : * if CLUSTER were later invoked in another database.
420 : */
4809 tgl 421 GIC 262 : if (OidIsValid(indexOid) && OldHeap->rd_rel->relisshared)
4809 tgl 422 UIC 0 : ereport(ERROR,
423 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
424 : errmsg("cannot cluster a shared catalog")));
4809 tgl 425 ECB :
4809 tgl 426 EUB : /*
427 : * Don't process temp tables of other backends ... their local buffer
428 : * manager is not going to cope.
429 : */
4809 tgl 430 GIC 262 : if (RELATION_IS_OTHER_TEMP(OldHeap))
431 : {
4809 tgl 432 UIC 0 : if (OidIsValid(indexOid))
433 0 : ereport(ERROR,
4809 tgl 434 ECB : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
435 : errmsg("cannot cluster temporary tables of other sessions")));
4809 tgl 436 EUB : else
4809 tgl 437 UBC 0 : ereport(ERROR,
438 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
439 : errmsg("cannot vacuum temporary tables of other sessions")));
440 : }
4809 tgl 441 EUB :
442 : /*
443 : * Also check for active uses of the relation in the current transaction,
444 : * including open scans and pending AFTER trigger events.
445 : */
4809 tgl 446 GIC 262 : CheckTableNotInUse(OldHeap, OidIsValid(indexOid) ? "CLUSTER" : "VACUUM");
447 :
448 : /* Check heap and index are valid to cluster on */
449 262 : if (OidIsValid(indexOid))
361 michael 450 CBC 98 : check_index_is_clusterable(OldHeap, indexOid, AccessExclusiveLock);
451 :
452 : /*
3652 kgrittn 453 ECB : * Quietly ignore the request if this is a materialized view which has not
454 : * been populated from its query. No harm is done because there is no data
455 : * to deal with, and we don't want to throw an error if this is part of a
456 : * multi-relation request -- for example, CLUSTER was run on the entire
457 : * database.
458 : */
3689 kgrittn 459 GIC 262 : if (OldHeap->rd_rel->relkind == RELKIND_MATVIEW &&
3625 tgl 460 UIC 0 : !RelationIsPopulated(OldHeap))
461 : {
3689 kgrittn 462 0 : relation_close(OldHeap, AccessExclusiveLock);
335 noah 463 LBC 0 : goto out;
3689 kgrittn 464 EUB : }
465 :
372 alvherre 466 GBC 262 : Assert(OldHeap->rd_rel->relkind == RELKIND_RELATION ||
372 alvherre 467 EUB : OldHeap->rd_rel->relkind == RELKIND_MATVIEW ||
468 : OldHeap->rd_rel->relkind == RELKIND_TOASTVALUE);
469 :
4323 heikki.linnakangas 470 ECB : /*
471 : * All predicate locks on the tuples or pages are about to be made
472 : * invalid, because we move tuples around. Promote them to relation
473 : * locks. Predicate locks on indexes will be promoted when they are
474 : * reindexed.
475 : */
4323 heikki.linnakangas 476 GIC 262 : TransferPredicateLocksToHeapRelation(OldHeap);
477 :
478 : /* rebuild_relation does all the dirty work */
893 andres 479 262 : rebuild_relation(OldHeap, indexOid, verbose);
6912 tgl 480 ECB :
481 : /* NB: rebuild_relation does table_close() on OldHeap */
482 :
335 noah 483 CBC 259 : out:
484 : /* Roll back any GUC changes executed by index functions */
335 noah 485 GIC 259 : AtEOXact_GUC(false, save_nestlevel);
486 :
335 noah 487 ECB : /* Restore userid and security context */
335 noah 488 GIC 259 : SetUserIdAndSecContext(save_userid, save_sec_context);
335 noah 489 ECB :
1476 rhaas 490 GIC 259 : pgstat_progress_end_command();
491 : }
6912 tgl 492 ECB :
493 : /*
4841 itagaki.takahiro 494 : * Verify that the specified heap and index are valid to cluster on
495 : *
496 : * Side effect: obtains lock on the index. The caller may
497 : * in some cases already have AccessExclusiveLock on the table, but
498 : * not in all cases so we can't rely on the table-level lock for
499 : * protection here.
500 : */
501 : void
361 michael 502 GIC 143 : check_index_is_clusterable(Relation OldHeap, Oid indexOid, LOCKMODE lockmode)
503 : {
504 : Relation OldIndex;
505 :
4637 simon 506 CBC 143 : OldIndex = index_open(indexOid, lockmode);
507 :
508 : /*
509 : * Check that index is in fact an index on the given relation
8368 tgl 510 ECB : */
7546 tgl 511 GIC 143 : if (OldIndex->rd_index == NULL ||
6912 512 143 : OldIndex->rd_index->indrelid != RelationGetRelid(OldHeap))
7203 tgl 513 UIC 0 : ereport(ERROR,
514 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
7203 tgl 515 ECB : errmsg("\"%s\" is not an index for table \"%s\"",
516 : RelationGetRelationName(OldIndex),
7203 tgl 517 EUB : RelationGetRelationName(OldHeap))));
518 :
519 : /* Index AM must allow clustering */
1539 andres 520 GIC 143 : if (!OldIndex->rd_indam->amclusterable)
4809 tgl 521 UIC 0 : ereport(ERROR,
522 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
523 : errmsg("cannot cluster on index \"%s\" because access method does not support clustering",
4809 tgl 524 ECB : RelationGetRelationName(OldIndex))));
4809 tgl 525 EUB :
526 : /*
527 : * Disallow clustering on incomplete indexes (those that might not index
528 : * every row of the relation). We could relax this by making a separate
529 : * seqscan pass over the table to copy the missing rows, but that seems
530 : * expensive and tedious.
531 : */
1838 andrew 532 GIC 143 : if (!heap_attisnull(OldIndex->rd_indextuple, Anum_pg_index_indpred, NULL))
7203 tgl 533 UIC 0 : ereport(ERROR,
534 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
535 : errmsg("cannot cluster on partial index \"%s\"",
6543 bruce 536 ECB : RelationGetRelationName(OldIndex))));
6385 bruce 537 EUB :
538 : /*
539 : * Disallow if index is left over from a failed CREATE INDEX CONCURRENTLY;
540 : * it might well not contain entries for every heap row, or might not even
541 : * be internally consistent. (But note that we don't check indcheckxmin;
542 : * the worst consequence of following broken HOT chains would be that we
543 : * might put recently-dead tuples out-of-order in the new table, and there
544 : * is little harm in that.)
545 : */
1564 peter_e 546 GIC 143 : if (!OldIndex->rd_index->indisvalid)
5671 tgl 547 3 : ereport(ERROR,
548 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
549 : errmsg("cannot cluster on invalid index \"%s\"",
5671 tgl 550 ECB : RelationGetRelationName(OldIndex))));
551 :
552 : /* Drop relcache refcnt on OldIndex, but keep lock */
6096 tgl 553 GIC 140 : index_close(OldIndex, NoLock);
7442 bruce 554 140 : }
555 :
556 : /*
6910 tgl 557 ECB : * mark_index_clustered: mark the specified index as the one clustered on
7405 558 : *
559 : * With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
560 : */
561 : void
3675 rhaas 562 GIC 139 : mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
563 : {
564 : HeapTuple indexTuple;
565 : Form_pg_index indexForm;
6910 tgl 566 ECB : Relation pg_index;
567 : ListCell *index;
568 :
569 : /* Disallow applying to a partitioned table */
1900 alvherre 570 GIC 139 : if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
571 6 : ereport(ERROR,
572 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
573 : errmsg("cannot mark index clustered in partitioned table")));
1900 alvherre 574 ECB :
6910 tgl 575 : /*
576 : * If the index is already marked clustered, no need to do anything.
577 : */
6910 tgl 578 GIC 133 : if (OidIsValid(indexOid))
579 : {
1098 michael 580 127 : if (get_index_isclustered(indexOid))
6910 tgl 581 18 : return;
6910 tgl 582 ECB : }
583 :
584 : /*
585 : * Check each index of the relation and set/clear the bit as needed.
586 : */
1539 andres 587 GIC 115 : pg_index = table_open(IndexRelationId, RowExclusiveLock);
588 :
6910 tgl 589 336 : foreach(index, RelationGetIndexList(rel))
590 : {
6797 bruce 591 CBC 221 : Oid thisIndexOid = lfirst_oid(index);
592 :
4802 rhaas 593 221 : indexTuple = SearchSysCacheCopy1(INDEXRELID,
594 : ObjectIdGetDatum(thisIndexOid));
6910 tgl 595 221 : if (!HeapTupleIsValid(indexTuple))
6910 tgl 596 UIC 0 : elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
6910 tgl 597 CBC 221 : indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
598 :
6910 tgl 599 ECB : /*
6385 bruce 600 EUB : * Unset the bit if set. We know it's wrong because we checked this
6385 bruce 601 ECB : * earlier.
602 : */
6910 tgl 603 GIC 221 : if (indexForm->indisclustered)
604 : {
605 15 : indexForm->indisclustered = false;
2259 alvherre 606 15 : CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
6910 tgl 607 ECB : }
6910 tgl 608 GIC 206 : else if (thisIndexOid == indexOid)
6910 tgl 609 ECB : {
3784 610 : /* this was checked earlier, but let's be real sure */
1564 peter_e 611 GIC 109 : if (!indexForm->indisvalid)
3784 tgl 612 LBC 0 : elog(ERROR, "cannot cluster on invalid index %u", indexOid);
6910 tgl 613 GIC 109 : indexForm->indisclustered = true;
2259 alvherre 614 109 : CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
6910 tgl 615 ECB : }
3675 rhaas 616 EUB :
3675 rhaas 617 CBC 221 : InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
3675 rhaas 618 ECB : InvalidOid, is_internal);
619 :
6910 tgl 620 GIC 221 : heap_freetuple(indexTuple);
6910 tgl 621 ECB : }
622 :
1539 andres 623 GIC 115 : table_close(pg_index, RowExclusiveLock);
6910 tgl 624 ECB : }
625 :
626 : /*
4841 itagaki.takahiro 627 : * rebuild_relation: rebuild an existing relation in index or physical order
628 : *
629 : * OldHeap: table to rebuild --- must be opened and exclusive-locked!
630 : * indexOid: index to cluster by, or InvalidOid to rewrite in physical order.
631 : *
632 : * NB: this routine closes OldHeap at the right time; caller should not.
633 : */
634 : static void
893 andres 635 GIC 262 : rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
636 : {
7405 tgl 637 262 : Oid tableOid = RelationGetRelid(OldHeap);
620 michael 638 262 : Oid accessMethod = OldHeap->rd_rel->relam;
6846 tgl 639 CBC 262 : Oid tableSpace = OldHeap->rd_rel->reltablespace;
640 : Oid OIDNewHeap;
2227 tgl 641 ECB : char relpersistence;
4809 642 : bool is_system_catalog;
4812 643 : bool swap_toast_by_content;
644 : TransactionId frozenXid;
645 : MultiXactId cutoffMulti;
646 :
4841 itagaki.takahiro 647 GIC 262 : if (OidIsValid(indexOid))
648 : /* Mark the correct index as clustered */
3675 rhaas 649 98 : mark_index_clustered(OldHeap, indexOid, true);
650 :
2227 tgl 651 ECB : /* Remember info about rel before closing OldHeap */
2227 tgl 652 GIC 262 : relpersistence = OldHeap->rd_rel->relpersistence;
4809 tgl 653 CBC 262 : is_system_catalog = IsSystemRelation(OldHeap);
654 :
655 : /* Close relcache entry, but keep lock until transaction commit */
1539 andres 656 262 : table_close(OldHeap, NoLock);
7405 tgl 657 ECB :
658 : /* Create the transient table that will receive the re-ordered data */
3152 alvherre 659 GIC 262 : OIDNewHeap = make_new_heap(tableOid, tableSpace,
620 michael 660 ECB : accessMethod,
661 : relpersistence,
662 : AccessExclusiveLock);
7576 tgl 663 :
664 : /* Copy the heap data into the new table in the desired order */
893 andres 665 GIC 262 : copy_table_data(OIDNewHeap, tableOid, indexOid, verbose,
666 : &swap_toast_by_content, &frozenXid, &cutoffMulti);
667 :
668 : /*
4809 tgl 669 ECB : * Swap the physical files of the target and transient tables, then
670 : * rebuild the target's indexes and throw away the transient table.
671 : */
4809 tgl 672 GIC 262 : finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog,
673 : swap_toast_by_content, false, true,
674 : frozenXid, cutoffMulti,
675 : relpersistence);
9770 scrappy 676 CBC 259 : }
677 :
678 :
679 : /*
4812 tgl 680 ECB : * Create the transient table that will be filled with new data during
681 : * CLUSTER, ALTER TABLE, and similar operations. The transient table
682 : * duplicates the logical structure of the OldHeap; but will have the
683 : * specified physical storage properties NewTableSpace, NewAccessMethod, and
684 : * relpersistence.
685 : *
686 : * After this, the caller should load the new heap with transferred/modified
687 : * data, then call finish_heap_swap to complete the operation.
688 : */
689 : Oid
620 michael 690 GIC 744 : make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, Oid NewAccessMethod,
691 : char relpersistence, LOCKMODE lockmode)
692 : {
693 : TupleDesc OldHeapDesc;
4812 tgl 694 ECB : char NewHeapName[NAMEDATALEN];
695 : Oid OIDNewHeap;
696 : Oid toastid;
697 : Relation OldHeap;
698 : HeapTuple tuple;
699 : Datum reloptions;
700 : bool isNull;
701 : Oid namespaceid;
702 :
1539 andres 703 GIC 744 : OldHeap = table_open(OIDOldHeap, lockmode);
8986 bruce 704 744 : OldHeapDesc = RelationGetDescr(OldHeap);
705 :
706 : /*
3955 bruce 707 ECB : * Note that the NewHeap will not receive any of the defaults or
708 : * constraints associated with the OldHeap; we don't need 'em, and there's
709 : * no reason to spend cycles inserting them into the catalogs only to
710 : * delete them.
711 : */
712 :
713 : /*
714 : * But we do want to use reloptions of the old heap for new heap.
715 : */
4802 rhaas 716 GIC 744 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(OIDOldHeap));
6124 tgl 717 744 : if (!HeapTupleIsValid(tuple))
6124 tgl 718 UIC 0 : elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
6124 tgl 719 GIC 744 : reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
6124 tgl 720 ECB : &isNull);
6124 tgl 721 CBC 744 : if (isNull)
6124 tgl 722 GBC 725 : reloptions = (Datum) 0;
6125 bruce 723 ECB :
3152 alvherre 724 GIC 744 : if (relpersistence == RELPERSISTENCE_TEMP)
3554 kgrittn 725 CBC 70 : namespaceid = LookupCreationNamespace("pg_temp");
3554 kgrittn 726 ECB : else
3554 kgrittn 727 GIC 674 : namespaceid = RelationGetNamespace(OldHeap);
3554 kgrittn 728 ECB :
4812 tgl 729 : /*
730 : * Create the new heap, using a temporary name in the same namespace as
3260 bruce 731 : * the existing table. NOTE: there is some risk of collision with user
732 : * relnames. Working around this seems more trouble than it's worth; in
733 : * particular, we can't create the new heap in a different namespace from
734 : * the old, or we will have problems with the TEMP status of temp tables.
735 : *
736 : * Note: the new heap is not a shared relation, even if we are rebuilding
737 : * a shared rel. However, we do make the new heap mapped if the source is
738 : * mapped. This simplifies swap_relation_files, and is absolutely
739 : * necessary for rebuilding pg_class, for reasons explained there.
740 : */
4812 tgl 741 GIC 744 : snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap);
742 :
743 744 : OIDNewHeap = heap_create_with_catalog(NewHeapName,
744 : namespaceid,
6797 bruce 745 ECB : NewTableSpace,
746 : InvalidOid,
4943 tgl 747 : InvalidOid,
748 : InvalidOid,
6435 tgl 749 GIC 744 : OldHeap->rd_rel->relowner,
750 : NewAccessMethod,
751 : OldHeapDesc,
752 : NIL,
3554 kgrittn 753 ECB : RELKIND_RELATION,
754 : relpersistence,
755 : false,
4809 tgl 756 GIC 744 : RelationIsMapped(OldHeap),
757 : ONCOMMIT_NOOP,
758 : reloptions,
759 : false,
3820 alvherre 760 ECB : true,
761 : true,
762 : OIDOldHeap,
763 : NULL);
4641 rhaas 764 GIC 744 : Assert(OIDNewHeap != InvalidOid);
765 :
6125 bruce 766 744 : ReleaseSysCache(tuple);
767 :
8133 tgl 768 ECB : /*
769 : * Advance command counter so that the newly-created relation's catalog
1539 andres 770 : * tuples will be visible to table_open.
771 : */
8133 tgl 772 GIC 744 : CommandCounterIncrement();
773 :
774 : /*
775 : * If necessary, create a TOAST table for the new relation.
4809 tgl 776 ECB : *
777 : * If the relation doesn't have a TOAST table already, we can't need one
778 : * for the new relation. The other way around is possible though: if some
779 : * wide columns have been dropped, NewHeapCreateToastTable can decide that
780 : * no TOAST table is needed for the new table.
781 : *
782 : * Note that NewHeapCreateToastTable ends with CommandCounterIncrement, so
783 : * that the TOAST table will be visible for insertion.
784 : */
5179 alvherre 785 GIC 744 : toastid = OldHeap->rd_rel->reltoastrelid;
786 744 : if (OidIsValid(toastid))
787 : {
788 : /* keep the existing toast table's reloptions, if any */
4802 rhaas 789 CBC 294 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(toastid));
5179 alvherre 790 294 : if (!HeapTupleIsValid(tuple))
5179 alvherre 791 UIC 0 : elog(ERROR, "cache lookup failed for relation %u", toastid);
5179 alvherre 792 GIC 294 : reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
5179 alvherre 793 ECB : &isNull);
5179 alvherre 794 CBC 294 : if (isNull)
5179 alvherre 795 GBC 294 : reloptions = (Datum) 0;
5179 alvherre 796 ECB :
592 akapila 797 GIC 294 : NewHeapCreateToastTable(OIDNewHeap, reloptions, lockmode, toastid);
4809 tgl 798 ECB :
5179 alvherre 799 CBC 294 : ReleaseSysCache(tuple);
800 : }
9345 bruce 801 ECB :
1539 andres 802 GIC 744 : table_close(OldHeap, NoLock);
9345 bruce 803 ECB :
8187 tgl 804 GIC 744 : return OIDNewHeap;
805 : }
9770 scrappy 806 ECB :
807 : /*
1473 andres 808 : * Do the physical copying of table data.
809 : *
810 : * There are three output parameters:
811 : * *pSwapToastByContent is set true if toast tables must be swapped by content.
812 : * *pFreezeXid receives the TransactionId used as freeze cutoff point.
813 : * *pCutoffMulti receives the MultiXactId used as a cutoff point.
814 : */
815 : static void
893 andres 816 GIC 262 : copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
817 : bool *pSwapToastByContent, TransactionId *pFreezeXid,
818 : MultiXactId *pCutoffMulti)
819 : {
7546 tgl 820 ECB : Relation NewHeap,
821 : OldHeap,
822 : OldIndex;
823 : Relation relRelation;
824 : HeapTuple reltup;
825 : Form_pg_class relform;
826 : TupleDesc oldTupDesc PG_USED_FOR_ASSERTS_ONLY;
827 : TupleDesc newTupDesc PG_USED_FOR_ASSERTS_ONLY;
828 : VacuumParams params;
829 : struct VacuumCutoffs cutoffs;
830 : bool use_sort;
4567 tgl 831 GIC 262 : double num_tuples = 0,
832 262 : tups_vacuumed = 0,
4567 tgl 833 CBC 262 : tups_recently_dead = 0;
1929 teodor 834 ECB : BlockNumber num_pages;
4567 tgl 835 CBC 262 : int elevel = verbose ? INFO : DEBUG2;
836 : PGRUsage ru0;
601 dgustafsson 837 ECB : char *nspname;
838 :
4567 tgl 839 GIC 262 : pg_rusage_init(&ru0);
840 :
9345 bruce 841 ECB : /*
842 : * Open the relations we need.
843 : */
1539 andres 844 GIC 262 : NewHeap = table_open(OIDNewHeap, AccessExclusiveLock);
845 262 : OldHeap = table_open(OIDOldHeap, AccessExclusiveLock);
4841 itagaki.takahiro 846 CBC 262 : if (OidIsValid(OIDOldIndex))
847 98 : OldIndex = index_open(OIDOldIndex, AccessExclusiveLock);
4841 itagaki.takahiro 848 ECB : else
4841 itagaki.takahiro 849 CBC 164 : OldIndex = NULL;
850 :
601 dgustafsson 851 ECB : /* Store a copy of the namespace name for logging purposes */
601 dgustafsson 852 GIC 262 : nspname = get_namespace_name(RelationGetNamespace(OldHeap));
853 :
6636 tgl 854 ECB : /*
855 : * Their tuple descriptors should be exactly alike, but here we only need
856 : * assume that they have the same number of columns.
857 : */
6636 tgl 858 GIC 262 : oldTupDesc = RelationGetDescr(OldHeap);
859 262 : newTupDesc = RelationGetDescr(NewHeap);
6636 tgl 860 CBC 262 : Assert(newTupDesc->natts == oldTupDesc->natts);
6636 tgl 861 ECB :
4361 862 : /*
863 : * If the OldHeap has a toast table, get lock on the toast table to keep
864 : * it from being vacuumed. This is needed because autovacuum processes
865 : * toast tables independently of their main tables, with no lock on the
866 : * latter. If an autovacuum were to start on the toast table after we
867 : * compute our OldestXmin below, it would use a later OldestXmin, and then
868 : * possibly remove as DEAD toast tuples belonging to main tuples we think
869 : * are only RECENTLY_DEAD. Then we'd fail while trying to copy those
870 : * tuples.
871 : *
872 : * We don't need to open the toast relation here, just lock it. The lock
873 : * will be held till end of transaction.
874 : */
4361 tgl 875 GIC 262 : if (OldHeap->rd_rel->reltoastrelid)
876 87 : LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
4361 tgl 877 ECB :
4812 878 : /*
879 : * If both tables have TOAST tables, perform toast swap by content. It is
880 : * possible that the old table has a toast table but the new one doesn't,
881 : * if toastable columns have been dropped. In that case we have to do
882 : * swap by links. This is okay because swap by content is only essential
883 : * for system catalogs, and we don't support schema changes for them.
884 : */
4812 tgl 885 GIC 262 : if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid)
886 : {
4812 tgl 887 CBC 87 : *pSwapToastByContent = true;
888 :
4812 tgl 889 ECB : /*
890 : * When doing swap by content, any toast pointers written into NewHeap
891 : * must use the old toast table's OID, because that's where the toast
892 : * data will eventually be found. Set this up by setting rd_toastoid.
893 : * This also tells toast_save_datum() to preserve the toast value
894 : * OIDs, which we want so as not to invalidate toast pointers in
895 : * system catalog caches, and to avoid making multiple copies of a
896 : * single toast value.
897 : *
898 : * Note that we must hold NewHeap open until we are done writing data,
899 : * since the relcache will not guarantee to remember this setting once
900 : * the relation is closed. Also, this technique depends on the fact
901 : * that no one will try to read from the NewHeap until after we've
902 : * finished writing it and swapping the rels --- otherwise they could
903 : * follow the toast pointers to the wrong place. (It would actually
904 : * work for values copied over from the old toast table, but not for
905 : * any values that we toast which were previously not toasted.)
906 : */
4812 tgl 907 GIC 87 : NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid;
908 : }
4812 tgl 909 ECB : else
4812 tgl 910 GIC 175 : *pSwapToastByContent = false;
911 :
5806 alvherre 912 ECB : /*
913 : * Compute xids used to freeze and weed out dead tuples and multixacts.
914 : * Since we're going to rewrite the whole table anyway, there's no reason
915 : * not to be aggressive about this.
916 : */
137 pg 917 GNC 262 : memset(¶ms, 0, sizeof(VacuumParams));
108 918 262 : vacuum_get_cutoffs(OldHeap, ¶ms, &cutoffs);
5845 tgl 919 ECB :
5563 920 : /*
921 : * FreezeXid will become the table's new relfrozenxid, and that mustn't go
922 : * backwards, so take the max.
923 : */
1447 andres 924 GIC 524 : if (TransactionIdIsValid(OldHeap->rd_rel->relfrozenxid) &&
108 pg 925 GNC 262 : TransactionIdPrecedes(cutoffs.FreezeLimit,
926 262 : OldHeap->rd_rel->relfrozenxid))
927 52 : cutoffs.FreezeLimit = OldHeap->rd_rel->relfrozenxid;
5563 tgl 928 ECB :
3208 alvherre 929 : /*
930 : * MultiXactCutoff, similarly, shouldn't go backwards either.
931 : */
1447 andres 932 GIC 524 : if (MultiXactIdIsValid(OldHeap->rd_rel->relminmxid) &&
108 pg 933 GNC 262 : MultiXactIdPrecedes(cutoffs.MultiXactCutoff,
934 262 : OldHeap->rd_rel->relminmxid))
108 pg 935 UNC 0 : cutoffs.MultiXactCutoff = OldHeap->rd_rel->relminmxid;
3208 alvherre 936 ECB :
6636 tgl 937 : /*
4382 bruce 938 : * Decide whether to use an indexscan or seqscan-and-optional-sort to scan
4382 bruce 939 EUB : * the OldHeap. We know how to use a sort to duplicate the ordering of a
940 : * btree index, and will use seqscan-and-sort for that case if the planner
941 : * tells us it's cheaper. Otherwise, always indexscan if an index is
942 : * provided, else plain seqscan.
943 : */
4567 tgl 944 GIC 262 : if (OldIndex != NULL && OldIndex->rd_rel->relam == BTREE_AM_OID)
945 98 : use_sort = plan_cluster_use_sort(OIDOldHeap, OIDOldIndex);
946 : else
947 164 : use_sort = false;
4567 tgl 948 ECB :
949 : /* Log what we're doing */
1473 andres 950 GIC 262 : if (OldIndex != NULL && !use_sort)
4567 tgl 951 CBC 48 : ereport(elevel,
952 : (errmsg("clustering \"%s.%s\" using index scan on \"%s\"",
953 : nspname,
4567 tgl 954 ECB : RelationGetRelationName(OldHeap),
955 : RelationGetRelationName(OldIndex))));
1473 andres 956 GIC 214 : else if (use_sort)
4567 tgl 957 50 : ereport(elevel,
958 : (errmsg("clustering \"%s.%s\" using sequential scan and sort",
959 : nspname,
4567 tgl 960 ECB : RelationGetRelationName(OldHeap))));
961 : else
4567 tgl 962 GIC 164 : ereport(elevel,
963 : (errmsg("vacuuming \"%s.%s\"",
964 : nspname,
965 : RelationGetRelationName(OldHeap))));
4567 tgl 966 ECB :
967 : /*
968 : * Hand off the actual copying to AM specific function, the generic code
969 : * cannot know how to deal with visibility across AMs. Note that this
970 : * routine is allowed to set FreezeXid / MultiXactCutoff to different
971 : * values (e.g. because the AM doesn't use freezing).
972 : */
1473 andres 973 GIC 262 : table_relation_copy_for_cluster(OldHeap, NewHeap, OldIndex, use_sort,
974 : cutoffs.OldestXmin, &cutoffs.FreezeLimit,
975 : &cutoffs.MultiXactCutoff,
976 : &num_tuples, &tups_vacuumed,
977 : &tups_recently_dead);
6636 tgl 978 ECB :
979 : /* return selected values to caller, get set as relfrozenxid/minmxid */
108 pg 980 GNC 262 : *pFreezeXid = cutoffs.FreezeLimit;
981 262 : *pCutoffMulti = cutoffs.MultiXactCutoff;
982 :
983 : /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
4812 tgl 984 GIC 262 : NewHeap->rd_toastoid = InvalidOid;
4812 tgl 985 ECB :
1929 teodor 986 CBC 262 : num_pages = RelationGetNumberOfBlocks(NewHeap);
987 :
988 : /* Log what we did */
4567 tgl 989 262 : ereport(elevel,
990 : (errmsg("\"%s.%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
601 dgustafsson 991 ECB : nspname,
992 : RelationGetRelationName(OldHeap),
993 : tups_vacuumed, num_tuples,
4567 tgl 994 : RelationGetNumberOfBlocks(OldHeap)),
995 : errdetail("%.0f dead row versions cannot be removed yet.\n"
996 : "%s.",
997 : tups_recently_dead,
998 : pg_rusage_show(&ru0))));
999 :
4841 itagaki.takahiro 1000 GIC 262 : if (OldIndex != NULL)
1001 98 : index_close(OldIndex, NoLock);
1539 andres 1002 262 : table_close(OldHeap, NoLock);
1003 262 : table_close(NewHeap, NoLock);
1004 :
1929 teodor 1005 ECB : /* Update pg_class to reflect the correct values of pages and tuples. */
1539 andres 1006 CBC 262 : relRelation = table_open(RelationRelationId, RowExclusiveLock);
1929 teodor 1007 ECB :
1929 teodor 1008 CBC 262 : reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDNewHeap));
1929 teodor 1009 GIC 262 : if (!HeapTupleIsValid(reltup))
1929 teodor 1010 UIC 0 : elog(ERROR, "cache lookup failed for relation %u", OIDNewHeap);
1929 teodor 1011 CBC 262 : relform = (Form_pg_class) GETSTRUCT(reltup);
1012 :
1013 262 : relform->relpages = num_pages;
1014 262 : relform->reltuples = num_tuples;
1929 teodor 1015 EUB :
1929 teodor 1016 ECB : /* Don't update the stats for pg_class. See swap_relation_files. */
1929 teodor 1017 GIC 262 : if (OIDOldHeap != RelationRelationId)
1929 teodor 1018 CBC 250 : CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
1929 teodor 1019 ECB : else
1929 teodor 1020 GIC 12 : CacheInvalidateRelcacheByTuple(reltup);
1021 :
1929 teodor 1022 ECB : /* Clean up. */
1929 teodor 1023 CBC 262 : heap_freetuple(reltup);
1539 andres 1024 GIC 262 : table_close(relRelation, RowExclusiveLock);
1929 teodor 1025 ECB :
1026 : /* Make the update visible */
1929 teodor 1027 GIC 262 : CommandCounterIncrement();
9770 scrappy 1028 CBC 262 : }
7547 bruce 1029 ECB :
1030 : /*
1031 : * Swap the physical files of two given relations.
6846 tgl 1032 : *
1033 : * We swap the physical identity (reltablespace, relfilenumber) while keeping
1034 : * the same logical identities of the two relations. relpersistence is also
1035 : * swapped, which is critical since it determines where buffers live for each
1036 : * relation.
1037 : *
1038 : * We can swap associated TOAST data in either of two ways: recursively swap
1039 : * the physical content of the toast tables (and their indexes), or swap the
1040 : * TOAST links in the given relations' pg_class entries. The former is needed
1041 : * to manage rewrites of shared catalogs (where we cannot change the pg_class
1042 : * links) while the latter is the only way to handle cases in which a toast
1043 : * table is added or removed altogether.
1044 : *
1045 : * Additionally, the first relation is marked with relfrozenxid set to
1046 : * frozenXid. It seems a bit ugly to have this here, but the caller would
1047 : * have to do it anyway, so having it here saves a heap_update. Note: in
1048 : * the swap-toast-links case, we assume we don't need to change the toast
1049 : * table's relfrozenxid: the new version of the toast table should already
1050 : * have relfrozenxid set to RecentXmin, which is good enough.
1051 : *
1052 : * Lastly, if r2 and its toast table and toast index (if any) are mapped,
1053 : * their OIDs are emitted into mapped_tables[]. This is hacky but beats
1054 : * having to look the information up again later in finish_heap_swap.
1055 : */
1056 : static void
4809 tgl 1057 GIC 858 : swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
1058 : bool swap_toast_by_content,
1059 : bool is_internal,
1060 : TransactionId frozenXid,
1061 : MultiXactId cutoffMulti,
4809 tgl 1062 ECB : Oid *mapped_tables)
1063 : {
1064 : Relation relRelation;
1065 : HeapTuple reltup1,
1066 : reltup2;
1067 : Form_pg_class relform1,
1068 : relform2;
1069 : RelFileNumber relfilenumber1,
1070 : relfilenumber2;
1071 : RelFileNumber swaptemp;
1072 : char swptmpchr;
1073 :
1074 : /* We need writable copies of both pg_class tuples. */
1539 andres 1075 GIC 858 : relRelation = table_open(RelationRelationId, RowExclusiveLock);
1076 :
4802 rhaas 1077 858 : reltup1 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r1));
7546 tgl 1078 858 : if (!HeapTupleIsValid(reltup1))
7203 tgl 1079 UIC 0 : elog(ERROR, "cache lookup failed for relation %u", r1);
7546 tgl 1080 CBC 858 : relform1 = (Form_pg_class) GETSTRUCT(reltup1);
1081 :
4802 rhaas 1082 858 : reltup2 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r2));
7546 tgl 1083 858 : if (!HeapTupleIsValid(reltup2))
7203 tgl 1084 UBC 0 : elog(ERROR, "cache lookup failed for relation %u", r2);
7546 tgl 1085 CBC 858 : relform2 = (Form_pg_class) GETSTRUCT(reltup2);
1086 :
277 rhaas 1087 GNC 858 : relfilenumber1 = relform1->relfilenode;
1088 858 : relfilenumber2 = relform2->relfilenode;
7547 bruce 1089 EUB :
277 rhaas 1090 GNC 858 : if (RelFileNumberIsValid(relfilenumber1) &&
1091 : RelFileNumberIsValid(relfilenumber2))
1092 : {
3152 alvherre 1093 ECB : /*
1094 : * Normal non-mapped relations: swap relfilenumbers, reltablespaces,
1095 : * relpersistence
1096 : */
4809 tgl 1097 GIC 780 : Assert(!target_is_pg_class);
1098 :
1099 780 : swaptemp = relform1->relfilenode;
1100 780 : relform1->relfilenode = relform2->relfilenode;
1101 780 : relform2->relfilenode = swaptemp;
1102 :
4809 tgl 1103 CBC 780 : swaptemp = relform1->reltablespace;
4809 tgl 1104 GIC 780 : relform1->reltablespace = relform2->reltablespace;
4809 tgl 1105 CBC 780 : relform2->reltablespace = swaptemp;
4809 tgl 1106 ECB :
620 michael 1107 CBC 780 : swaptemp = relform1->relam;
620 michael 1108 GIC 780 : relform1->relam = relform2->relam;
620 michael 1109 CBC 780 : relform2->relam = swaptemp;
620 michael 1110 ECB :
3152 alvherre 1111 CBC 780 : swptmpchr = relform1->relpersistence;
3152 alvherre 1112 GIC 780 : relform1->relpersistence = relform2->relpersistence;
3152 alvherre 1113 CBC 780 : relform2->relpersistence = swptmpchr;
3152 alvherre 1114 ECB :
4809 tgl 1115 : /* Also swap toast links, if we're swapping by links */
4809 tgl 1116 GIC 780 : if (!swap_toast_by_content)
4809 tgl 1117 ECB : {
4809 tgl 1118 CBC 582 : swaptemp = relform1->reltoastrelid;
1119 582 : relform1->reltoastrelid = relform2->reltoastrelid;
4809 tgl 1120 GIC 582 : relform2->reltoastrelid = swaptemp;
1121 : }
4809 tgl 1122 ECB : }
1123 : else
4812 1124 : {
4809 1125 : /*
1126 : * Mapped-relation case. Here we have to swap the relation mappings
1127 : * instead of modifying the pg_class columns. Both must be mapped.
1128 : */
277 rhaas 1129 GNC 78 : if (RelFileNumberIsValid(relfilenumber1) ||
1130 : RelFileNumberIsValid(relfilenumber2))
4809 tgl 1131 UIC 0 : elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation",
1132 : NameStr(relform1->relname));
1133 :
1134 : /*
1135 : * We can't change the tablespace nor persistence of a mapped rel, and
3152 alvherre 1136 ECB : * we can't handle toast link swapping for one either, because we must
1137 : * not apply any critical changes to its pg_class row. These cases
3152 alvherre 1138 EUB : * should be prevented by upstream permissions tests, so these checks
1139 : * are non-user-facing emergency backstop.
1140 : */
4809 tgl 1141 GIC 78 : if (relform1->reltablespace != relform2->reltablespace)
4809 tgl 1142 UIC 0 : elog(ERROR, "cannot change tablespace of mapped relation \"%s\"",
1143 : NameStr(relform1->relname));
3152 alvherre 1144 GIC 78 : if (relform1->relpersistence != relform2->relpersistence)
3152 alvherre 1145 UIC 0 : elog(ERROR, "cannot change persistence of mapped relation \"%s\"",
1146 : NameStr(relform1->relname));
620 michael 1147 GIC 78 : if (relform1->relam != relform2->relam)
620 michael 1148 LBC 0 : elog(ERROR, "cannot change access method of mapped relation \"%s\"",
620 michael 1149 EUB : NameStr(relform1->relname));
4809 tgl 1150 GIC 78 : if (!swap_toast_by_content &&
4809 tgl 1151 CBC 15 : (relform1->reltoastrelid || relform2->reltoastrelid))
4809 tgl 1152 UBC 0 : elog(ERROR, "cannot swap toast by links for mapped relation \"%s\"",
1153 : NameStr(relform1->relname));
7547 bruce 1154 ECB :
4809 tgl 1155 EUB : /*
1156 : * Fetch the mappings --- shouldn't fail, but be paranoid
4809 tgl 1157 ECB : */
277 rhaas 1158 GNC 78 : relfilenumber1 = RelationMapOidToFilenumber(r1, relform1->relisshared);
1159 78 : if (!RelFileNumberIsValid(relfilenumber1))
4809 tgl 1160 UIC 0 : elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
1161 : NameStr(relform1->relname), r1);
277 rhaas 1162 GNC 78 : relfilenumber2 = RelationMapOidToFilenumber(r2, relform2->relisshared);
1163 78 : if (!RelFileNumberIsValid(relfilenumber2))
4809 tgl 1164 UIC 0 : elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
4809 tgl 1165 ECB : NameStr(relform2->relname), r2);
1166 :
4809 tgl 1167 EUB : /*
1168 : * Send replacement mappings to relmapper. Note these won't actually
4809 tgl 1169 ECB : * take effect until CommandCounterIncrement.
1170 : */
277 rhaas 1171 GNC 78 : RelationMapUpdateMap(r1, relfilenumber2, relform1->relisshared, false);
1172 78 : RelationMapUpdateMap(r2, relfilenumber1, relform2->relisshared, false);
1173 :
1174 : /* Pass OIDs of mapped r2 tables back to caller */
4809 tgl 1175 GIC 78 : *mapped_tables++ = r2;
1176 : }
1177 :
1100 noah 1178 ECB : /*
1179 : * Recognize that rel1's relfilenumber (swapped from rel2) is new in this
1180 : * subtransaction. The rel2 storage (swapped from rel1) may or may not be
1181 : * new.
1182 : */
1183 : {
1184 : Relation rel1,
1185 : rel2;
1186 :
1100 noah 1187 GIC 858 : rel1 = relation_open(r1, NoLock);
1188 858 : rel2 = relation_open(r2, NoLock);
1189 858 : rel2->rd_createSubid = rel1->rd_createSubid;
277 rhaas 1190 GNC 858 : rel2->rd_newRelfilelocatorSubid = rel1->rd_newRelfilelocatorSubid;
1191 858 : rel2->rd_firstRelfilelocatorSubid = rel1->rd_firstRelfilelocatorSubid;
1192 858 : RelationAssumeNewRelfilelocator(rel1);
1100 noah 1193 GIC 858 : relation_close(rel1, NoLock);
1100 noah 1194 CBC 858 : relation_close(rel2, NoLock);
1100 noah 1195 ECB : }
1196 :
4812 tgl 1197 : /*
4809 1198 : * In the case of a shared catalog, these next few steps will only affect
4790 bruce 1199 : * our own database's pg_class row; but that's okay, because they are all
1200 : * noncritical updates. That's also an important fact for the case of a
1201 : * mapped catalog, because it's possible that we'll commit the map change
1202 : * and then fail to commit the pg_class update.
1203 : */
1204 :
1205 : /* set rel1's frozen Xid and minimum MultiXid */
4812 tgl 1206 GIC 858 : if (relform1->relkind != RELKIND_INDEX)
1207 : {
1447 andres 1208 771 : Assert(!TransactionIdIsValid(frozenXid) ||
1209 : TransactionIdIsNormal(frozenXid));
4812 tgl 1210 771 : relform1->relfrozenxid = frozenXid;
3492 alvherre 1211 771 : relform1->relminmxid = cutoffMulti;
1212 : }
5805 alvherre 1213 ECB :
1214 : /* swap size statistics too, since new rel has freshly-updated stats */
7463 tgl 1215 : {
1216 : int32 swap_pages;
7188 bruce 1217 : float4 swap_tuples;
3940 peter_e 1218 : int32 swap_allvisible;
1219 :
7463 tgl 1220 GIC 858 : swap_pages = relform1->relpages;
1221 858 : relform1->relpages = relform2->relpages;
1222 858 : relform2->relpages = swap_pages;
1223 :
1224 858 : swap_tuples = relform1->reltuples;
1225 858 : relform1->reltuples = relform2->reltuples;
1226 858 : relform2->reltuples = swap_tuples;
4195 tgl 1227 ECB :
4195 tgl 1228 CBC 858 : swap_allvisible = relform1->relallvisible;
1229 858 : relform1->relallvisible = relform2->relallvisible;
4195 tgl 1230 GIC 858 : relform2->relallvisible = swap_allvisible;
7463 tgl 1231 ECB : }
1232 :
4809 1233 : /*
1234 : * Update the tuples in pg_class --- unless the target relation of the
1235 : * swap is pg_class itself. In that case, there is zero point in making
4790 bruce 1236 : * changes because we'd be updating the old data that we're about to throw
1237 : * away. Because the real work being done here for a mapped relation is
1238 : * just to change the relation map settings, it's all right to not update
1239 : * the pg_class rows in this case. The most important changes will instead
1240 : * performed later, in finish_heap_swap() itself.
1241 : */
4809 tgl 1242 GIC 858 : if (!target_is_pg_class)
1243 : {
1244 : CatalogIndexState indstate;
1245 :
1246 846 : indstate = CatalogOpenIndexes(relRelation);
2258 1247 846 : CatalogTupleUpdateWithInfo(relRelation, &reltup1->t_self, reltup1,
1248 : indstate);
2258 tgl 1249 CBC 846 : CatalogTupleUpdateWithInfo(relRelation, &reltup2->t_self, reltup2,
1250 : indstate);
4809 tgl 1251 GIC 846 : CatalogCloseIndexes(indstate);
1252 : }
4809 tgl 1253 ECB : else
1254 : {
1255 : /* no update ... but we do still need relcache inval */
4809 tgl 1256 CBC 12 : CacheInvalidateRelcacheByTuple(reltup1);
4809 tgl 1257 GIC 12 : CacheInvalidateRelcacheByTuple(reltup2);
4809 tgl 1258 ECB : }
1259 :
1260 : /*
1261 : * Post alter hook for modified relations. The change to r2 is always
1262 : * internal, but r1 depends on the invocation context.
3675 rhaas 1263 : */
3675 rhaas 1264 CBC 858 : InvokeObjectPostAlterHookArg(RelationRelationId, r1, 0,
1265 : InvalidOid, is_internal);
3675 rhaas 1266 GIC 858 : InvokeObjectPostAlterHookArg(RelationRelationId, r2, 0,
1267 : InvalidOid, true);
1268 :
1269 : /*
1270 : * If we have toast tables associated with the relations being swapped,
4812 tgl 1271 ECB : * deal with them too.
1272 : */
7546 tgl 1273 CBC 858 : if (relform1->reltoastrelid || relform2->reltoastrelid)
1274 : {
4812 tgl 1275 GIC 273 : if (swap_toast_by_content)
1276 : {
1277 87 : if (relform1->reltoastrelid && relform2->reltoastrelid)
1278 : {
1279 : /* Recursively swap the contents of the toast tables */
4812 tgl 1280 CBC 87 : swap_relation_files(relform1->reltoastrelid,
1281 : relform2->reltoastrelid,
4809 tgl 1282 ECB : target_is_pg_class,
1283 : swap_toast_by_content,
3675 rhaas 1284 : is_internal,
1285 : frozenXid,
1286 : cutoffMulti,
4809 tgl 1287 : mapped_tables);
1288 : }
1289 : else
1290 : {
1291 : /* caller messed up */
4812 tgl 1292 UIC 0 : elog(ERROR, "cannot swap toast files by content when there's only one");
1293 : }
1294 : }
1295 : else
1296 : {
1297 : /*
1298 : * We swapped the ownership links, so we need to change dependency
4812 tgl 1299 EUB : * data to match.
1300 : *
1301 : * NOTE: it is possible that only one table has a toast table.
1302 : *
1303 : * NOTE: at present, a TOAST table's only dependency is the one on
1304 : * its owning table. If more are ever created, we'd need to use
1305 : * something more selective than deleteDependencyRecordsFor() to
1306 : * get rid of just the link we want.
1307 : */
1308 : ObjectAddress baseobject,
1309 : toastobject;
1310 : long count;
1311 :
1312 : /*
1313 : * We disallow this case for system catalogs, to avoid the
1314 : * possibility that the catalog we're rebuilding is one of the
1315 : * ones the dependency changes would change. It's too late to be
1316 : * making any data changes to the target catalog.
1317 : */
3419 rhaas 1318 GIC 186 : if (IsSystemClass(r1, relform1))
4809 tgl 1319 UIC 0 : elog(ERROR, "cannot swap toast files by links for system catalogs");
1320 :
1321 : /* Delete old dependencies */
4812 tgl 1322 GIC 186 : if (relform1->reltoastrelid)
1323 : {
1324 170 : count = deleteDependencyRecordsFor(RelationRelationId,
4443 tgl 1325 ECB : relform1->reltoastrelid,
4443 tgl 1326 EUB : false);
4812 tgl 1327 GIC 170 : if (count != 1)
4812 tgl 1328 UIC 0 : elog(ERROR, "expected one dependency record for TOAST table, found %ld",
4812 tgl 1329 ECB : count);
1330 : }
4812 tgl 1331 CBC 186 : if (relform2->reltoastrelid)
1332 : {
4812 tgl 1333 GIC 186 : count = deleteDependencyRecordsFor(RelationRelationId,
4443 tgl 1334 ECB : relform2->reltoastrelid,
4443 tgl 1335 EUB : false);
4812 tgl 1336 GIC 186 : if (count != 1)
4812 tgl 1337 UIC 0 : elog(ERROR, "expected one dependency record for TOAST table, found %ld",
4812 tgl 1338 ECB : count);
1339 : }
7546 1340 :
1341 : /* Register new dependencies */
4812 tgl 1342 GIC 186 : baseobject.classId = RelationRelationId;
4812 tgl 1343 CBC 186 : baseobject.objectSubId = 0;
4812 tgl 1344 GBC 186 : toastobject.classId = RelationRelationId;
4812 tgl 1345 GIC 186 : toastobject.objectSubId = 0;
1346 :
1347 186 : if (relform1->reltoastrelid)
1348 : {
4812 tgl 1349 CBC 170 : baseobject.objectId = r1;
1350 170 : toastobject.objectId = relform1->reltoastrelid;
1351 170 : recordDependencyOn(&toastobject, &baseobject,
4812 tgl 1352 ECB : DEPENDENCY_INTERNAL);
1353 : }
7546 1354 :
4812 tgl 1355 GIC 186 : if (relform2->reltoastrelid)
4812 tgl 1356 ECB : {
4812 tgl 1357 CBC 186 : baseobject.objectId = r2;
1358 186 : toastobject.objectId = relform2->reltoastrelid;
4812 tgl 1359 GIC 186 : recordDependencyOn(&toastobject, &baseobject,
1360 : DEPENDENCY_INTERNAL);
1361 : }
6913 tgl 1362 ECB : }
1363 : }
7546 1364 :
4812 1365 : /*
1366 : * If we're swapping two toast tables by content, do the same for their
1367 : * valid index. The swap can actually be safely done only if the relations
1368 : * have indexes.
1369 : */
4812 tgl 1370 GIC 858 : if (swap_toast_by_content &&
3566 fujii 1371 261 : relform1->relkind == RELKIND_TOASTVALUE &&
1372 87 : relform2->relkind == RELKIND_TOASTVALUE)
1373 : {
1374 : Oid toastIndex1,
1375 : toastIndex2;
1376 :
3566 fujii 1377 ECB : /* Get valid index for each relation */
3566 fujii 1378 CBC 87 : toastIndex1 = toast_get_valid_index(r1,
3566 fujii 1379 ECB : AccessExclusiveLock);
3566 fujii 1380 GIC 87 : toastIndex2 = toast_get_valid_index(r2,
1381 : AccessExclusiveLock);
1382 :
1383 87 : swap_relation_files(toastIndex1,
1384 : toastIndex2,
4790 bruce 1385 ECB : target_is_pg_class,
1386 : swap_toast_by_content,
3675 rhaas 1387 : is_internal,
1388 : InvalidTransactionId,
1389 : InvalidMultiXactId,
4790 bruce 1390 : mapped_tables);
1391 : }
1392 :
1393 : /* Clean up. */
7546 tgl 1394 GIC 858 : heap_freetuple(reltup1);
1395 858 : heap_freetuple(reltup2);
1396 :
1539 andres 1397 858 : table_close(relRelation, RowExclusiveLock);
1398 :
1399 : /*
1400 : * Close both relcache entries' smgr links. We need this kluge because
4809 tgl 1401 ECB : * both links will be invalidated during upcoming CommandCounterIncrement.
1402 : * Whichever of the rels is the second to be cleared will have a dangling
1403 : * reference to the other's smgr entry. Rather than trying to avoid this
1404 : * by ordering operations just so, it's easiest to close the links first.
1405 : * (Fortunately, since one of the entries is local in our transaction,
1406 : * it's sufficient to clear out our own relcache this way; the problem
1407 : * cannot arise for other backends when they see our update on the
1408 : * non-transient relation.)
1409 : *
1410 : * Caution: the placement of this step interacts with the decision to
1411 : * handle toast rels by recursion. When we are trying to rebuild pg_class
1412 : * itself, the smgr close on pg_class must happen after all accesses in
1413 : * this function.
1414 : */
4809 tgl 1415 GIC 858 : RelationCloseSmgrByOid(r1);
1416 858 : RelationCloseSmgrByOid(r2);
7547 bruce 1417 858 : }
1418 :
1419 : /*
1420 : * Remove the transient table that was built by make_new_heap, and finish
1421 : * cleaning up (including rebuilding all indexes on the old heap).
4812 tgl 1422 ECB : */
1423 : void
4809 tgl 1424 CBC 684 : finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
1425 : bool is_system_catalog,
1426 : bool swap_toast_by_content,
1427 : bool check_constraints,
1428 : bool is_internal,
1429 : TransactionId frozenXid,
1430 : MultiXactId cutoffMulti,
3067 alvherre 1431 ECB : char newrelpersistence)
1432 : {
1433 : ObjectAddress object;
1434 : Oid mapped_tables[4];
1435 : int reindex_flags;
811 michael 1436 GIC 684 : ReindexParams reindex_params = {0};
1437 : int i;
1438 :
1439 : /* Report that we are now swapping relation files */
1476 rhaas 1440 684 : pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
1441 : PROGRESS_CLUSTER_PHASE_SWAP_REL_FILES);
1442 :
4809 tgl 1443 ECB : /* Zero out possible results from swapped_relation_files */
4809 tgl 1444 GIC 684 : memset(mapped_tables, 0, sizeof(mapped_tables));
1445 :
1446 : /*
4809 tgl 1447 ECB : * Swap the contents of the heap relations (including any toast tables).
1448 : * Also set old heap's relfrozenxid to frozenXid.
1449 : */
4809 tgl 1450 GIC 684 : swap_relation_files(OIDOldHeap, OIDNewHeap,
4809 tgl 1451 ECB : (OIDOldHeap == RelationRelationId),
1452 : swap_toast_by_content, is_internal,
1453 : frozenXid, cutoffMulti, mapped_tables);
1454 :
1455 : /*
1456 : * If it's a system catalog, queue a sinval message to flush all catcaches
1809 1457 : * on the catalog when we reach CommandCounterIncrement.
1458 : */
4809 tgl 1459 GIC 684 : if (is_system_catalog)
1460 100 : CacheInvalidateCatalog(OIDOldHeap);
1461 :
1462 : /*
1463 : * Rebuild each index on the relation (but not the toast table, which is
1464 : * all-new at this point). It is important to do this before the DROP
1465 : * step because if we are processing a system catalog that will be used
3260 bruce 1466 ECB : * during DROP, we want to have its indexes available. There is no
4809 tgl 1467 : * advantage to the other order anyway because this is all transactional,
1468 : * so no chance to reclaim disk space before commit. We do not need a
1469 : * final CommandCounterIncrement() because reindex_relation does it.
1470 : *
1471 : * Note: because index_build is called via reindex_relation, it will never
1472 : * set indcheckxmin true for the indexes. This is OK even though in some
1473 : * sense we are building new indexes rather than rebuilding existing ones,
1474 : * because the new heap won't contain any HOT chains at all, let alone
1475 : * broken ones, so it can't be necessary to set indcheckxmin.
1476 : */
4376 tgl 1477 GIC 684 : reindex_flags = REINDEX_REL_SUPPRESS_INDEX_USE;
4462 rhaas 1478 684 : if (check_constraints)
4376 tgl 1479 422 : reindex_flags |= REINDEX_REL_CHECK_CONSTRAINTS;
1480 :
1481 : /*
1482 : * Ensure that the indexes have the same persistence as the parent
1483 : * relation.
3067 alvherre 1484 ECB : */
3067 alvherre 1485 CBC 684 : if (newrelpersistence == RELPERSISTENCE_UNLOGGED)
1486 10 : reindex_flags |= REINDEX_REL_FORCE_INDEXES_UNLOGGED;
3067 alvherre 1487 GIC 674 : else if (newrelpersistence == RELPERSISTENCE_PERMANENT)
1488 637 : reindex_flags |= REINDEX_REL_FORCE_INDEXES_PERMANENT;
1489 :
1490 : /* Report that we are now reindexing relations */
1476 rhaas 1491 684 : pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
1476 rhaas 1492 ECB : PROGRESS_CLUSTER_PHASE_REBUILD_INDEX);
1493 :
811 michael 1494 CBC 684 : reindex_relation(OIDOldHeap, reindex_flags, &reindex_params);
4812 tgl 1495 ECB :
1496 : /* Report that we are now doing clean up */
1476 rhaas 1497 GIC 675 : pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
1476 rhaas 1498 ECB : PROGRESS_CLUSTER_PHASE_FINAL_CLEANUP);
1499 :
1500 : /*
730 michael 1501 : * If the relation being rebuilt is pg_class, swap_relation_files()
1502 : * couldn't update pg_class's own pg_class entry (check comments in
1503 : * swap_relation_files()), thus relfrozenxid was not updated. That's
3323 rhaas 1504 : * annoying because a potential reason for doing a VACUUM FULL is a
1505 : * imminent or actual anti-wraparound shutdown. So, now that we can
1506 : * access the new relation using its indices, update relfrozenxid.
1507 : * pg_class doesn't have a toast relation, so we don't need to update the
1508 : * corresponding toast relation. Not that there's little point moving all
1509 : * relfrozenxid updates here since swap_relation_files() needs to write to
1510 : * pg_class for non-mapped relations anyway.
1511 : */
3323 rhaas 1512 GIC 675 : if (OIDOldHeap == RelationRelationId)
1513 : {
1514 : Relation relRelation;
1515 : HeapTuple reltup;
1516 : Form_pg_class relform;
1517 :
1539 andres 1518 12 : relRelation = table_open(RelationRelationId, RowExclusiveLock);
3323 rhaas 1519 ECB :
3323 rhaas 1520 GIC 12 : reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDOldHeap));
1521 12 : if (!HeapTupleIsValid(reltup))
3323 rhaas 1522 UIC 0 : elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
3323 rhaas 1523 GIC 12 : relform = (Form_pg_class) GETSTRUCT(reltup);
1524 :
3323 rhaas 1525 CBC 12 : relform->relfrozenxid = frozenXid;
3323 rhaas 1526 GIC 12 : relform->relminmxid = cutoffMulti;
3323 rhaas 1527 ECB :
2259 alvherre 1528 CBC 12 : CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
3323 rhaas 1529 EUB :
1539 andres 1530 CBC 12 : table_close(relRelation, RowExclusiveLock);
1531 : }
3323 rhaas 1532 ECB :
1533 : /* Destroy new heap with old filenumber */
4812 tgl 1534 GIC 675 : object.classId = RelationRelationId;
4812 tgl 1535 CBC 675 : object.objectId = OIDNewHeap;
4812 tgl 1536 GIC 675 : object.objectSubId = 0;
4812 tgl 1537 ECB :
1538 : /*
1539 : * The new relation is local to our transaction and we know nothing
1540 : * depends on it, so DROP_RESTRICT should be OK.
1541 : */
4091 rhaas 1542 CBC 675 : performDeletion(&object, DROP_RESTRICT, PERFORM_DELETION_INTERNAL);
4812 tgl 1543 ECB :
1544 : /* performDeletion does CommandCounterIncrement at end */
1545 :
1546 : /*
1547 : * Now we must remove any relation mapping entries that we set up for the
1548 : * transient table, as well as its toast table and toast index if any. If
4790 bruce 1549 : * we fail to do this before commit, the relmapper will complain about new
1550 : * permanent map entries being added post-bootstrap.
1551 : */
4809 tgl 1552 GIC 753 : for (i = 0; OidIsValid(mapped_tables[i]); i++)
1553 78 : RelationMapRemoveMapping(mapped_tables[i]);
1554 :
1555 : /*
1556 : * At this point, everything is kosher except that, if we did toast swap
1557 : * by links, the toast table's name corresponds to the transient table.
1558 : * The name is irrelevant to the backend because it's referenced by OID,
4812 tgl 1559 ECB : * but users looking at the catalogs could be confused. Rename it to
1560 : * prevent this problem.
1561 : *
1562 : * Note no lock required on the relation, because we already hold an
1563 : * exclusive lock on it.
1564 : */
4812 tgl 1565 GIC 675 : if (!swap_toast_by_content)
1566 : {
1567 : Relation newrel;
1568 :
1539 andres 1569 588 : newrel = table_open(OIDOldHeap, NoLock);
4812 tgl 1570 588 : if (OidIsValid(newrel->rd_rel->reltoastrelid))
1571 : {
4812 tgl 1572 ECB : Oid toastidx;
1573 : char NewToastName[NAMEDATALEN];
1574 :
1575 : /* Get the associated valid index to be renamed */
3566 fujii 1576 CBC 170 : toastidx = toast_get_valid_index(newrel->rd_rel->reltoastrelid,
1114 noah 1577 ECB : NoLock);
1578 :
1579 : /* rename the toast table ... */
4812 tgl 1580 GIC 170 : snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u",
1581 : OIDOldHeap);
1582 170 : RenameRelationInternal(newrel->rd_rel->reltoastrelid,
1627 peter_e 1583 ECB : NewToastName, true, false);
1584 :
1585 : /* ... and its valid index too. */
4812 tgl 1586 GIC 170 : snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index",
4812 tgl 1587 ECB : OIDOldHeap);
1588 :
4812 tgl 1589 CBC 170 : RenameRelationInternal(toastidx,
1590 : NewToastName, true, true);
1591 :
1592 : /*
592 akapila 1593 ECB : * Reset the relrewrite for the toast. The command-counter
1594 : * increment is required here as we are about to update the tuple
1595 : * that is updated as part of RenameRelationInternal.
1596 : */
592 akapila 1597 GIC 170 : CommandCounterIncrement();
1598 170 : ResetRelRewrite(newrel->rd_rel->reltoastrelid);
1599 : }
4812 tgl 1600 588 : relation_close(newrel, NoLock);
1601 : }
1602 :
1603 : /* if it's not a catalog table, clear any missing attribute settings */
1838 andrew 1604 CBC 675 : if (!is_system_catalog)
1838 andrew 1605 ECB : {
1606 : Relation newrel;
1607 :
1539 andres 1608 GIC 575 : newrel = table_open(OIDOldHeap, NoLock);
1838 andrew 1609 575 : RelationClearMissing(newrel);
1610 575 : relation_close(newrel, NoLock);
1838 andrew 1611 ECB : }
4812 tgl 1612 GIC 675 : }
1613 :
1614 :
7405 tgl 1615 ECB : /*
1616 : * Get a list of tables that the current user has privileges on and
1335 michael 1617 : * have indisclustered set. Return the list in a List * of RelToCluster
1618 : * (stored in the specified memory context), each one giving the tableOid
1149 tgl 1619 : * and the indexOid on which the table is already clustered.
1620 : */
1621 : static List *
7405 tgl 1622 GIC 12 : get_tables_to_cluster(MemoryContext cluster_context)
1623 : {
1624 : Relation indRelation;
1625 : TableScanDesc scan;
1626 : ScanKeyData entry;
1627 : HeapTuple indexTuple;
1628 : Form_pg_index index;
7188 bruce 1629 ECB : MemoryContext old_context;
372 alvherre 1630 GIC 12 : List *rtcs = NIL;
1631 :
1632 : /*
1633 : * Get all indexes that have indisclustered set and that the current user
1634 : * has the appropriate privileges for.
1635 : */
1539 andres 1636 12 : indRelation = table_open(IndexRelationId, AccessShareLock);
7088 tgl 1637 CBC 12 : ScanKeyInit(&entry,
1638 : Anum_pg_index_indisclustered,
1639 : BTEqualStrategyNumber, F_BOOLEQ,
1640 : BoolGetDatum(true));
1490 andres 1641 GIC 12 : scan = table_beginscan_catalog(indRelation, 1, &entry);
7450 bruce 1642 21 : while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
7450 bruce 1643 ECB : {
372 alvherre 1644 : RelToCluster *rtc;
1645 :
7450 bruce 1646 GIC 9 : index = (Form_pg_index) GETSTRUCT(indexTuple);
1647 :
86 jdavis 1648 GNC 9 : if (!cluster_is_permitted_for_relation(index->indrelid, GetUserId()))
7450 bruce 1649 CBC 6 : continue;
1650 :
1651 : /* Use a permanent memory context for the result list */
7450 bruce 1652 GIC 3 : old_context = MemoryContextSwitchTo(cluster_context);
7450 bruce 1653 ECB :
372 alvherre 1654 GIC 3 : rtc = (RelToCluster *) palloc(sizeof(RelToCluster));
372 alvherre 1655 CBC 3 : rtc->tableOid = index->indrelid;
1656 3 : rtc->indexOid = index->indexrelid;
372 alvherre 1657 GIC 3 : rtcs = lappend(rtcs, rtc);
1658 :
7450 bruce 1659 CBC 3 : MemoryContextSwitchTo(old_context);
1660 : }
1490 andres 1661 12 : table_endscan(scan);
7450 bruce 1662 ECB :
7405 tgl 1663 CBC 12 : relation_close(indRelation, AccessShareLock);
7405 tgl 1664 ECB :
372 alvherre 1665 GIC 12 : return rtcs;
372 alvherre 1666 ECB : }
1667 :
1668 : /*
1669 : * Given an index on a partitioned table, return a list of RelToCluster for
1670 : * all the children leaves tables/indexes.
1671 : *
1672 : * Like expand_vacuum_rel, but here caller must hold AccessExclusiveLock
1673 : * on the table containing the index.
1674 : */
1675 : static List *
372 alvherre 1676 GIC 10 : get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid)
1677 : {
1678 : List *inhoids;
1679 : ListCell *lc;
1680 10 : List *rtcs = NIL;
1681 : MemoryContext old_context;
1682 :
372 alvherre 1683 ECB : /* Do not lock the children until they're processed */
372 alvherre 1684 GIC 10 : inhoids = find_all_inheritors(indexOid, NoLock, NULL);
1685 :
1686 52 : foreach(lc, inhoids)
372 alvherre 1687 ECB : {
372 alvherre 1688 GIC 42 : Oid indexrelid = lfirst_oid(lc);
1689 42 : Oid relid = IndexGetRelation(indexrelid, false);
1690 : RelToCluster *rtc;
372 alvherre 1691 ECB :
1692 : /* consider only leaf indexes */
372 alvherre 1693 CBC 42 : if (get_rel_relkind(indexrelid) != RELKIND_INDEX)
372 alvherre 1694 GIC 19 : continue;
372 alvherre 1695 ECB :
1696 : /*
1697 : * We already checked that the user has privileges to CLUSTER the
1698 : * partitioned table when we locked it earlier, so there's no need to
1699 : * check the privileges again here.
1700 : */
360 1701 :
1702 : /* Use a permanent memory context for the result list */
360 alvherre 1703 GIC 23 : old_context = MemoryContextSwitchTo(cluster_context);
1704 :
372 1705 23 : rtc = (RelToCluster *) palloc(sizeof(RelToCluster));
1706 23 : rtc->tableOid = relid;
1707 23 : rtc->indexOid = indexrelid;
1708 23 : rtcs = lappend(rtcs, rtc);
1709 :
360 alvherre 1710 CBC 23 : MemoryContextSwitchTo(old_context);
1711 : }
372 alvherre 1712 ECB :
372 alvherre 1713 CBC 10 : return rtcs;
7450 bruce 1714 ECB : }
1715 :
1716 : /*
1717 : * Return whether userid has privileges to CLUSTER relid. If not, this
1718 : * function emits a WARNING.
1719 : */
1720 : static bool
86 jdavis 1721 GNC 35 : cluster_is_permitted_for_relation(Oid relid, Oid userid)
1722 : {
1723 52 : if (pg_class_aclcheck(relid, userid, ACL_MAINTAIN) == ACLCHECK_OK ||
1724 17 : has_partition_ancestor_privs(relid, userid, ACL_MAINTAIN))
1725 29 : return true;
1726 :
1727 6 : ereport(WARNING,
1728 : (errmsg("permission denied to cluster \"%s\", skipping it",
1729 : get_rel_name(relid))));
1730 6 : return false;
1731 : }
|