Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * vacuum.c
4 : * The postgres vacuum cleaner.
5 : *
6 : * This file includes (a) control and dispatch code for VACUUM and ANALYZE
7 : * commands, (b) code to compute various vacuum thresholds, and (c) index
8 : * vacuum code.
9 : *
10 : * VACUUM for heap AM is implemented in vacuumlazy.c, parallel vacuum in
11 : * vacuumparallel.c, ANALYZE in analyze.c, and VACUUM FULL is a variant of
12 : * CLUSTER, handled in cluster.c.
13 : *
14 : *
15 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
16 : * Portions Copyright (c) 1994, Regents of the University of California
17 : *
18 : *
19 : * IDENTIFICATION
20 : * src/backend/commands/vacuum.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 : #include "postgres.h"
25 :
26 : #include <math.h>
27 :
28 : #include "access/clog.h"
29 : #include "access/commit_ts.h"
30 : #include "access/genam.h"
31 : #include "access/heapam.h"
32 : #include "access/htup_details.h"
33 : #include "access/multixact.h"
34 : #include "access/tableam.h"
35 : #include "access/transam.h"
36 : #include "access/xact.h"
37 : #include "catalog/namespace.h"
38 : #include "catalog/index.h"
39 : #include "catalog/pg_database.h"
40 : #include "catalog/pg_inherits.h"
41 : #include "catalog/pg_namespace.h"
42 : #include "commands/cluster.h"
43 : #include "commands/defrem.h"
44 : #include "commands/tablecmds.h"
45 : #include "commands/vacuum.h"
46 : #include "miscadmin.h"
47 : #include "nodes/makefuncs.h"
48 : #include "pgstat.h"
49 : #include "postmaster/autovacuum.h"
50 : #include "postmaster/bgworker_internals.h"
51 : #include "postmaster/interrupt.h"
52 : #include "storage/bufmgr.h"
53 : #include "storage/lmgr.h"
54 : #include "storage/pmsignal.h"
55 : #include "storage/proc.h"
56 : #include "storage/procarray.h"
57 : #include "utils/acl.h"
58 : #include "utils/fmgroids.h"
59 : #include "utils/guc.h"
60 : #include "utils/guc_hooks.h"
61 : #include "utils/memutils.h"
62 : #include "utils/pg_rusage.h"
63 : #include "utils/snapmgr.h"
64 : #include "utils/syscache.h"
65 :
66 :
67 : /*
68 : * GUC parameters
69 : */
70 : int vacuum_freeze_min_age;
71 : int vacuum_freeze_table_age;
72 : int vacuum_multixact_freeze_min_age;
73 : int vacuum_multixact_freeze_table_age;
74 : int vacuum_failsafe_age;
75 : int vacuum_multixact_failsafe_age;
76 :
77 : /*
78 : * Variables for cost-based vacuum delay. The defaults differ between
79 : * autovacuum and vacuum. They should be set with the appropriate GUC value in
80 : * vacuum code. They are initialized here to the defaults for client backends
81 : * executing VACUUM or ANALYZE.
82 : */
83 : double vacuum_cost_delay = 0;
84 : int vacuum_cost_limit = 200;
85 :
86 : /*
87 : * VacuumFailsafeActive is a defined as a global so that we can determine
88 : * whether or not to re-enable cost-based vacuum delay when vacuuming a table.
89 : * If failsafe mode has been engaged, we will not re-enable cost-based delay
90 : * for the table until after vacuuming has completed, regardless of other
91 : * settings.
92 : *
93 : * Only VACUUM code should inspect this variable and only table access methods
94 : * should set it to true. In Table AM-agnostic VACUUM code, this variable is
95 : * inspected to determine whether or not to allow cost-based delays. Table AMs
96 : * are free to set it if they desire this behavior, but it is false by default
97 : * and reset to false in between vacuuming each relation.
98 : */
99 : bool VacuumFailsafeActive = false;
100 :
101 : /*
102 : * Variables for cost-based parallel vacuum. See comments atop
103 : * compute_parallel_delay to understand how it works.
104 : */
105 : pg_atomic_uint32 *VacuumSharedCostBalance = NULL;
106 : pg_atomic_uint32 *VacuumActiveNWorkers = NULL;
107 : int VacuumCostBalanceLocal = 0;
108 :
109 : /* non-export function prototypes */
110 : static List *expand_vacuum_rel(VacuumRelation *vrel,
111 : MemoryContext vac_context, int options);
112 : static List *get_all_vacuum_rels(MemoryContext vac_context, int options);
113 : static void vac_truncate_clog(TransactionId frozenXID,
114 : MultiXactId minMulti,
115 : TransactionId lastSaneFrozenXid,
116 : MultiXactId lastSaneMinMulti);
117 : static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
118 : bool skip_privs, BufferAccessStrategy bstrategy);
119 : static double compute_parallel_delay(void);
120 : static VacOptValue get_vacoptval_from_boolean(DefElem *def);
121 : static bool vac_tid_reaped(ItemPointer itemptr, void *state);
122 : static int vac_cmp_itemptr(const void *left, const void *right);
123 :
124 : /*
125 : * GUC check function to ensure GUC value specified is within the allowable
126 : * range.
127 : */
128 : bool
2 drowley 129 GNC 1857 : check_vacuum_buffer_usage_limit(int *newval, void **extra,
130 : GucSource source)
131 : {
132 : /* Value upper and lower hard limits are inclusive */
133 1857 : if (*newval == 0 || (*newval >= MIN_BAS_VAC_RING_SIZE_KB &&
134 1857 : *newval <= MAX_BAS_VAC_RING_SIZE_KB))
135 1857 : return true;
136 :
137 : /* Value does not fall within any allowable range */
2 drowley 138 UNC 0 : GUC_check_errdetail("\"vacuum_buffer_usage_limit\" must be 0 or between %d kB and %d kB",
139 : MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB);
140 :
141 0 : return false;
142 : }
143 :
144 : /*
145 : * Primary entry point for manual VACUUM and ANALYZE commands
146 : *
147 : * This is mainly a preparation wrapper for the real operations that will
148 : * happen in vacuum().
149 : */
150 : void
1483 rhaas 151 GIC 4969 : ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
152 : {
153 : VacuumParams params;
3 drowley 154 GNC 4969 : BufferAccessStrategy bstrategy = NULL;
1418 tgl 155 GIC 4969 : bool verbose = false;
156 4969 : bool skip_locked = false;
157 4969 : bool analyze = false;
158 4969 : bool freeze = false;
159 4969 : bool full = false;
160 4969 : bool disable_page_skipping = false;
34 michael 161 GNC 4969 : bool process_main = true;
789 michael 162 GIC 4969 : bool process_toast = true;
163 : int ring_size;
93 tgl 164 GNC 4969 : bool skip_database_stats = false;
165 4969 : bool only_database_stats = false;
166 : MemoryContext vac_context;
167 : ListCell *lc;
168 :
169 : /* index_cleanup and truncate values unspecified for now */
660 pg 170 GIC 4969 : params.index_cleanup = VACOPTVALUE_UNSPECIFIED;
171 4969 : params.truncate = VACOPTVALUE_UNSPECIFIED;
172 :
173 : /* By default parallel vacuum is enabled */
1175 akapila 174 4969 : params.nworkers = 0;
175 :
176 : /*
177 : * Set this to an invalid value so it is clear whether or not a
178 : * BUFFER_USAGE_LIMIT was specified when making the access strategy.
179 : */
2 drowley 180 GNC 4969 : ring_size = -1;
181 :
182 : /* Parse options list */
1483 rhaas 183 GIC 8154 : foreach(lc, vacstmt->options)
1483 rhaas 184 ECB : {
1418 tgl 185 GIC 3203 : DefElem *opt = (DefElem *) lfirst(lc);
186 :
187 : /* Parse common options for VACUUM and ANALYZE */
1483 rhaas 188 CBC 3203 : if (strcmp(opt->defname, "verbose") == 0)
1472 189 14 : verbose = defGetBoolean(opt);
1483 190 3189 : else if (strcmp(opt->defname, "skip_locked") == 0)
1472 rhaas 191 GIC 167 : skip_locked = defGetBoolean(opt);
2 drowley 192 GNC 3022 : else if (strcmp(opt->defname, "buffer_usage_limit") == 0)
193 : {
194 : const char *hintmsg;
195 : int result;
196 : char *vac_buffer_size;
197 :
198 24 : if (opt->arg == NULL)
199 : {
2 drowley 200 UNC 0 : ereport(ERROR,
201 : (errcode(ERRCODE_SYNTAX_ERROR),
202 : errmsg("buffer_usage_limit option requires a valid value"),
203 : parser_errposition(pstate, opt->location)));
204 : }
205 :
2 drowley 206 GNC 24 : vac_buffer_size = defGetString(opt);
207 :
208 24 : if (!parse_int(vac_buffer_size, &result, GUC_UNIT_KB, &hintmsg))
209 : {
210 3 : ereport(ERROR,
211 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
212 : errmsg("value: \"%s\": is invalid for buffer_usage_limit",
213 : vac_buffer_size),
214 : hintmsg ? errhint("%s", _(hintmsg)) : 0));
215 : }
216 :
217 : /*
218 : * Check that the specified size falls within the hard upper and
219 : * lower limits if it is not 0. We explicitly disallow -1 since
220 : * that behavior can be obtained by not specifying
221 : * BUFFER_USAGE_LIMIT.
222 : */
223 21 : if (result != 0 &&
224 15 : (result < MIN_BAS_VAC_RING_SIZE_KB || result > MAX_BAS_VAC_RING_SIZE_KB))
225 : {
226 6 : ereport(ERROR,
227 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
228 : errmsg("buffer_usage_limit option must be 0 or between %d kB and %d kB",
229 : MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB)));
230 : }
231 :
232 15 : ring_size = result;
233 : }
1483 rhaas 234 GIC 2998 : else if (!vacstmt->is_vacuumcmd)
1483 rhaas 235 GBC 3 : ereport(ERROR,
236 : (errcode(ERRCODE_SYNTAX_ERROR),
237 : errmsg("unrecognized ANALYZE option \"%s\"", opt->defname),
1483 rhaas 238 EUB : parser_errposition(pstate, opt->location)));
239 :
240 : /* Parse options available on VACUUM */
1483 rhaas 241 GIC 2995 : else if (strcmp(opt->defname, "analyze") == 0)
1472 242 410 : analyze = defGetBoolean(opt);
1483 243 2585 : else if (strcmp(opt->defname, "freeze") == 0)
1472 244 562 : freeze = defGetBoolean(opt);
1483 245 2023 : else if (strcmp(opt->defname, "full") == 0)
1472 246 173 : full = defGetBoolean(opt);
1483 247 1850 : else if (strcmp(opt->defname, "disable_page_skipping") == 0)
1472 rhaas 248 CBC 90 : disable_page_skipping = defGetBoolean(opt);
1466 rhaas 249 GIC 1760 : else if (strcmp(opt->defname, "index_cleanup") == 0)
250 : {
660 pg 251 ECB : /* Interpret no string as the default, which is 'auto' */
660 pg 252 CBC 86 : if (!opt->arg)
660 pg 253 LBC 0 : params.index_cleanup = VACOPTVALUE_AUTO;
660 pg 254 ECB : else
255 : {
660 pg 256 CBC 86 : char *sval = defGetString(opt);
660 pg 257 ECB :
258 : /* Try matching on 'auto' string, or fall back on boolean */
660 pg 259 CBC 86 : if (pg_strcasecmp(sval, "auto") == 0)
660 pg 260 GIC 3 : params.index_cleanup = VACOPTVALUE_AUTO;
660 pg 261 ECB : else
660 pg 262 CBC 83 : params.index_cleanup = get_vacoptval_from_boolean(opt);
263 : }
264 : }
34 michael 265 GNC 1674 : else if (strcmp(opt->defname, "process_main") == 0)
266 77 : process_main = defGetBoolean(opt);
789 michael 267 GIC 1597 : else if (strcmp(opt->defname, "process_toast") == 0)
268 80 : process_toast = defGetBoolean(opt);
1432 fujii 269 CBC 1517 : else if (strcmp(opt->defname, "truncate") == 0)
660 pg 270 74 : params.truncate = get_vacoptval_from_boolean(opt);
1175 akapila 271 GIC 1443 : else if (strcmp(opt->defname, "parallel") == 0)
272 : {
1175 akapila 273 CBC 169 : if (opt->arg == NULL)
274 : {
1175 akapila 275 GIC 3 : ereport(ERROR,
276 : (errcode(ERRCODE_SYNTAX_ERROR),
277 : errmsg("parallel option requires a value between 0 and %d",
278 : MAX_PARALLEL_WORKER_LIMIT),
1175 akapila 279 ECB : parser_errposition(pstate, opt->location)));
280 : }
281 : else
282 : {
283 : int nworkers;
284 :
1175 akapila 285 GIC 166 : nworkers = defGetInt32(opt);
286 166 : if (nworkers < 0 || nworkers > MAX_PARALLEL_WORKER_LIMIT)
1175 akapila 287 CBC 3 : ereport(ERROR,
1175 akapila 288 ECB : (errcode(ERRCODE_SYNTAX_ERROR),
684 289 : errmsg("parallel workers for vacuum must be between 0 and %d",
1175 290 : MAX_PARALLEL_WORKER_LIMIT),
291 : parser_errposition(pstate, opt->location)));
292 :
293 : /*
294 : * Disable parallel vacuum, if user has specified parallel
295 : * degree as zero.
296 : */
1175 akapila 297 CBC 163 : if (nworkers == 0)
1175 akapila 298 GIC 77 : params.nworkers = -1;
1175 akapila 299 EUB : else
1175 akapila 300 GIC 86 : params.nworkers = nworkers;
301 : }
302 : }
93 tgl 303 GNC 1274 : else if (strcmp(opt->defname, "skip_database_stats") == 0)
304 1234 : skip_database_stats = defGetBoolean(opt);
305 40 : else if (strcmp(opt->defname, "only_database_stats") == 0)
306 40 : only_database_stats = defGetBoolean(opt);
307 : else
1483 rhaas 308 UIC 0 : ereport(ERROR,
1483 rhaas 309 ECB : (errcode(ERRCODE_SYNTAX_ERROR),
310 : errmsg("unrecognized VACUUM option \"%s\"", opt->defname),
311 : parser_errposition(pstate, opt->location)));
312 : }
2944 alvherre 313 :
314 : /* Set vacuum options */
1472 rhaas 315 GIC 4951 : params.options =
316 4951 : (vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) |
317 4951 : (verbose ? VACOPT_VERBOSE : 0) |
318 4951 : (skip_locked ? VACOPT_SKIP_LOCKED : 0) |
319 4951 : (analyze ? VACOPT_ANALYZE : 0) |
320 4951 : (freeze ? VACOPT_FREEZE : 0) |
321 4951 : (full ? VACOPT_FULL : 0) |
789 michael 322 4951 : (disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0) |
34 michael 323 GNC 4951 : (process_main ? VACOPT_PROCESS_MAIN : 0) |
93 tgl 324 4951 : (process_toast ? VACOPT_PROCESS_TOAST : 0) |
325 4951 : (skip_database_stats ? VACOPT_SKIP_DATABASE_STATS : 0) |
326 4951 : (only_database_stats ? VACOPT_ONLY_DATABASE_STATS : 0);
327 :
328 : /* sanity checks on options */
1483 rhaas 329 CBC 4951 : Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
330 4951 : Assert((params.options & VACOPT_VACUUM) ||
331 : !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
2944 alvherre 332 ECB :
1088 akapila 333 GIC 4951 : if ((params.options & VACOPT_FULL) && params.nworkers > 0)
1175 334 3 : ereport(ERROR,
335 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
336 : errmsg("VACUUM FULL cannot be performed in parallel")));
337 :
338 : /*
339 : * BUFFER_USAGE_LIMIT does nothing for VACUUM (FULL) so just raise an
340 : * ERROR for that case. VACUUM (FULL, ANALYZE) does make use of it, so
341 : * we'll permit that.
342 : */
2 drowley 343 GNC 4948 : if (ring_size != -1 && (params.options & VACOPT_FULL) &&
344 3 : !(params.options & VACOPT_ANALYZE))
345 3 : ereport(ERROR,
346 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
347 : errmsg("BUFFER_USAGE_LIMIT cannot be specified for VACUUM FULL")));
348 :
2014 tgl 349 ECB : /*
350 : * Make sure VACOPT_ANALYZE is specified if any column lists are present.
351 : */
1483 rhaas 352 CBC 4945 : if (!(params.options & VACOPT_ANALYZE))
353 : {
2014 tgl 354 GIC 3858 : foreach(lc, vacstmt->rels)
355 : {
2014 tgl 356 CBC 1768 : VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
2014 tgl 357 ECB :
2014 tgl 358 CBC 1768 : if (vrel->va_cols != NIL)
359 3 : ereport(ERROR,
2014 tgl 360 ECB : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
361 : errmsg("ANALYZE option must be specified when a column list is provided")));
362 : }
363 : }
364 :
365 :
366 : /*
367 : * Sanity check DISABLE_PAGE_SKIPPING option.
368 : */
3 drowley 369 GNC 4942 : if ((params.options & VACOPT_FULL) != 0 &&
370 161 : (params.options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
3 drowley 371 UNC 0 : ereport(ERROR,
372 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
373 : errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
374 :
375 : /* sanity check for PROCESS_TOAST */
3 drowley 376 GNC 4942 : if ((params.options & VACOPT_FULL) != 0 &&
377 161 : (params.options & VACOPT_PROCESS_TOAST) == 0)
378 3 : ereport(ERROR,
379 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
380 : errmsg("PROCESS_TOAST required with VACUUM FULL")));
381 :
382 : /* sanity check for ONLY_DATABASE_STATS */
383 4939 : if (params.options & VACOPT_ONLY_DATABASE_STATS)
384 : {
385 40 : Assert(params.options & VACOPT_VACUUM);
386 40 : if (vacstmt->rels != NIL)
387 3 : ereport(ERROR,
388 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
389 : errmsg("ONLY_DATABASE_STATS cannot be specified with a list of tables")));
390 : /* don't require people to turn off PROCESS_TOAST/MAIN explicitly */
391 37 : if (params.options & ~(VACOPT_VACUUM |
392 : VACOPT_VERBOSE |
393 : VACOPT_PROCESS_MAIN |
394 : VACOPT_PROCESS_TOAST |
395 : VACOPT_ONLY_DATABASE_STATS))
3 drowley 396 UNC 0 : ereport(ERROR,
397 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
398 : errmsg("ONLY_DATABASE_STATS cannot be specified with other VACUUM options")));
399 : }
400 :
401 : /*
402 : * All freeze ages are zero if the FREEZE option is given; otherwise pass
2944 alvherre 403 ECB : * them as -1 which means to use the default values.
2944 alvherre 404 EUB : */
1483 rhaas 405 GIC 4936 : if (params.options & VACOPT_FREEZE)
406 : {
2944 alvherre 407 CBC 562 : params.freeze_min_age = 0;
2944 alvherre 408 GIC 562 : params.freeze_table_age = 0;
409 562 : params.multixact_freeze_min_age = 0;
2944 alvherre 410 CBC 562 : params.multixact_freeze_table_age = 0;
2944 alvherre 411 ECB : }
412 : else
413 : {
2944 alvherre 414 GIC 4374 : params.freeze_min_age = -1;
415 4374 : params.freeze_table_age = -1;
2944 alvherre 416 CBC 4374 : params.multixact_freeze_min_age = -1;
417 4374 : params.multixact_freeze_table_age = -1;
2944 alvherre 418 ECB : }
419 :
420 : /* user-invoked vacuum is never "for wraparound" */
2944 alvherre 421 CBC 4936 : params.is_wraparound = false;
2944 alvherre 422 ECB :
423 : /* user-invoked vacuum uses VACOPT_VERBOSE instead of log_min_duration */
2928 alvherre 424 CBC 4936 : params.log_min_duration = -1;
425 :
426 : /*
427 : * Create special memory context for cross-transaction storage.
428 : *
429 : * Since it is a child of PortalContext, it will go away eventually even
430 : * if we suffer an error; there's no need for special abort cleanup logic.
431 : */
3 drowley 432 GNC 4936 : vac_context = AllocSetContextCreate(PortalContext,
433 : "Vacuum",
434 : ALLOCSET_DEFAULT_SIZES);
435 :
436 : /*
437 : * Make a buffer strategy object in the cross-transaction memory context.
438 : * We needn't bother making this for VACUUM (FULL) or VACUUM
439 : * (ONLY_DATABASE_STATS) as they'll not make use of it. VACUUM (FULL,
440 : * ANALYZE) is possible, so we'd better ensure that we make a strategy
441 : * when we see ANALYZE.
442 : */
443 4936 : if ((params.options & (VACOPT_ONLY_DATABASE_STATS |
444 195 : VACOPT_FULL)) == 0 ||
445 195 : (params.options & VACOPT_ANALYZE) != 0)
446 : {
447 :
448 4744 : MemoryContext old_context = MemoryContextSwitchTo(vac_context);
449 :
2 450 4744 : Assert(ring_size >= -1);
451 :
452 : /*
453 : * If BUFFER_USAGE_LIMIT was specified by the VACUUM or ANALYZE
454 : * command, it overrides the value of VacuumBufferUsageLimit. Either
455 : * value may be 0, in which case GetAccessStrategyWithSize() will
456 : * return NULL, effectively allowing full use of shared buffers.
457 : */
458 4744 : if (ring_size == -1)
459 4732 : ring_size = VacuumBufferUsageLimit;
460 :
461 4744 : bstrategy = GetAccessStrategyWithSize(BAS_VACUUM, ring_size);
462 :
3 463 4744 : MemoryContextSwitchTo(old_context);
464 : }
465 :
2944 alvherre 466 ECB : /* Now go through the common routine */
3 drowley 467 GNC 4936 : vacuum(vacstmt->rels, ¶ms, bstrategy, vac_context, isTopLevel);
468 :
469 : /* Finally, clean up the vacuum memory context */
470 4873 : MemoryContextDelete(vac_context);
2944 alvherre 471 GIC 4873 : }
472 :
473 : /*
474 : * Internal entry point for autovacuum and the VACUUM / ANALYZE commands.
475 : *
476 : * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
477 : * we process all relevant tables in the database. For each VacuumRelation,
478 : * if a valid OID is supplied, the table with that OID is what to process;
2014 tgl 479 ECB : * otherwise, the VacuumRelation's RangeVar indicates what to process.
5352 alvherre 480 : *
2944 481 : * params contains a set of parameters that can be used to customize the
482 : * behavior.
483 : *
484 : * bstrategy may be passed in as NULL when the caller does not want to
485 : * restrict the number of shared_buffers that VACUUM / ANALYZE can use,
486 : * otherwise, the caller must build a BufferAccessStrategy with the number of
487 : * shared_buffers that VACUUM / ANALYZE should try to limit themselves to
488 : * using.
489 : *
490 : * isTopLevel should be passed down from ProcessUtility.
491 : *
492 : * It is the caller's responsibility that all parameters are allocated in a
493 : * memory context that will not disappear at transaction commit.
8007 tgl 494 : */
9770 scrappy 495 : void
3 drowley 496 GNC 5094 : vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
497 : MemoryContext vac_context, bool isTopLevel)
498 : {
499 : static bool in_vacuum = false;
2014 tgl 500 ECB :
4892 501 : const char *stmttype;
4381 peter_e 502 : volatile bool in_outer_xact,
6896 tgl 503 : use_own_xacts;
504 :
2944 alvherre 505 GBC 5094 : Assert(params != NULL);
506 :
1483 rhaas 507 GIC 5094 : stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
508 :
509 : /*
510 : * We cannot run VACUUM inside a user transaction block; if we were inside
511 : * a transaction, then our commit- and start-transaction-command calls
4790 bruce 512 ECB : * would not have the intended effect! There are numerous other subtle
4808 tgl 513 : * dependencies on this, too.
6896 514 : *
515 : * ANALYZE (without VACUUM) can run either way.
9345 bruce 516 : */
1483 rhaas 517 CBC 5094 : if (params->options & VACOPT_VACUUM)
6896 tgl 518 ECB : {
1878 peter_e 519 CBC 2569 : PreventInTransactionBlock(isTopLevel, stmttype);
6896 tgl 520 2563 : in_outer_xact = false;
6896 tgl 521 ECB : }
522 : else
1878 peter_e 523 CBC 2525 : in_outer_xact = IsInTransactionBlock(isTopLevel);
524 :
525 : /*
526 : * Check for and disallow recursive calls. This could happen when VACUUM
527 : * FULL or ANALYZE calls a hostile index expression that itself calls
528 : * ANALYZE.
529 : */
3014 noah 530 5088 : if (in_vacuum)
2807 tgl 531 6 : ereport(ERROR,
532 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
533 : errmsg("%s cannot be executed from VACUUM or ANALYZE",
534 : stmttype)));
535 :
6478 tgl 536 ECB : /*
537 : * Build list of relation(s) to process, putting any new data in
538 : * vac_context for safekeeping.
539 : */
93 tgl 540 GNC 5082 : if (params->options & VACOPT_ONLY_DATABASE_STATS)
541 : {
542 : /* We don't process any tables in this case */
543 37 : Assert(relations == NIL);
544 : }
545 5045 : else if (relations != NIL)
2014 tgl 546 ECB : {
2014 tgl 547 GIC 4434 : List *newrels = NIL;
2014 tgl 548 ECB : ListCell *lc;
549 :
2014 tgl 550 CBC 8912 : foreach(lc, relations)
551 : {
2014 tgl 552 GIC 4496 : VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
553 : List *sublist;
2014 tgl 554 ECB : MemoryContext old_context;
555 :
6 drowley 556 GNC 4496 : sublist = expand_vacuum_rel(vrel, vac_context, params->options);
2014 tgl 557 GIC 4478 : old_context = MemoryContextSwitchTo(vac_context);
558 4478 : newrels = list_concat(newrels, sublist);
2014 tgl 559 GBC 4478 : MemoryContextSwitchTo(old_context);
560 : }
2014 tgl 561 GIC 4416 : relations = newrels;
562 : }
563 : else
6 drowley 564 GNC 611 : relations = get_all_vacuum_rels(vac_context, params->options);
565 :
566 : /*
567 : * Decide whether we need to start/commit our own transactions.
6896 tgl 568 ECB : *
569 : * For VACUUM (with or without ANALYZE): always do so, so that we can
6347 bruce 570 : * release locks as soon as possible. (We could possibly use the outer
6385 571 : * transaction for a one-table VACUUM, but handling TOAST tables would be
572 : * problematic.)
6896 tgl 573 : *
574 : * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
575 : * start/commit our own transactions. Also, there's no need to do so if
576 : * only processing one relation. For multiple relations when not within a
5778 alvherre 577 : * transaction block, and also in an autovacuum worker, use own
578 : * transactions so we can release locks sooner.
6896 tgl 579 : */
1483 rhaas 580 CBC 5064 : if (params->options & VACOPT_VACUUM)
6896 tgl 581 GIC 2557 : use_own_xacts = true;
582 : else
583 : {
1483 rhaas 584 CBC 2507 : Assert(params->options & VACOPT_ANALYZE);
5778 alvherre 585 GIC 2507 : if (IsAutoVacuumWorkerProcess())
586 83 : use_own_xacts = true;
5778 alvherre 587 CBC 2424 : else if (in_outer_xact)
6896 tgl 588 GIC 104 : use_own_xacts = false;
6892 neilc 589 2320 : else if (list_length(relations) > 1)
6896 tgl 590 578 : use_own_xacts = true;
591 : else
592 1742 : use_own_xacts = false;
593 : }
594 :
7603 tgl 595 ECB : /*
596 : * vacuum_rel expects to be entered with no transaction active; it will
597 : * start and commit its own transaction. But we are called by an SQL
598 : * command, and so we are executing inside a transaction already. We
599 : * commit the transaction started in PostgresMain() here, and start
600 : * another one before exiting to match the commit waiting for us back in
601 : * PostgresMain().
602 : */
6896 tgl 603 GIC 5064 : if (use_own_xacts)
604 : {
3083 605 3218 : Assert(!in_outer_xact);
3083 tgl 606 ECB :
5445 alvherre 607 : /* ActiveSnapshot is not set by autovacuum */
5445 alvherre 608 CBC 3218 : if (ActiveSnapshotSet())
5445 alvherre 609 GIC 3060 : PopActiveSnapshot();
610 :
7605 bruce 611 ECB : /* matches the StartTransaction in PostgresMain() */
7270 tgl 612 GIC 3218 : CommitTransactionCommand();
7605 bruce 613 ECB : }
614 :
615 : /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
6826 tgl 616 GIC 5064 : PG_TRY();
617 : {
618 : ListCell *cur;
619 :
3014 noah 620 5064 : in_vacuum = true;
2 dgustafsson 621 GNC 5064 : VacuumFailsafeActive = false;
622 5064 : VacuumUpdateCosts();
6826 tgl 623 CBC 5064 : VacuumCostBalance = 0;
4153 alvherre 624 GIC 5064 : VacuumPageHit = 0;
4153 alvherre 625 CBC 5064 : VacuumPageMiss = 0;
4153 alvherre 626 GIC 5064 : VacuumPageDirty = 0;
1175 akapila 627 CBC 5064 : VacuumCostBalanceLocal = 0;
1175 akapila 628 GIC 5064 : VacuumSharedCostBalance = NULL;
629 5064 : VacuumActiveNWorkers = NULL;
630 :
6758 bruce 631 ECB : /*
632 : * Loop to process each selected relation.
633 : */
6826 tgl 634 CBC 52238 : foreach(cur, relations)
7605 bruce 635 ECB : {
2014 tgl 636 GIC 47207 : VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
637 :
1483 rhaas 638 47207 : if (params->options & VACOPT_VACUUM)
639 : {
6 drowley 640 GNC 23618 : if (!vacuum_rel(vrel->oid, vrel->relation, params, false,
641 : bstrategy))
4444 rhaas 642 GIC 31 : continue;
643 : }
644 :
1483 645 47173 : if (params->options & VACOPT_ANALYZE)
646 : {
647 : /*
648 : * If using separate xacts, start one for analyze. Otherwise,
649 : * we can use the outer transaction.
650 : */
6826 tgl 651 24097 : if (use_own_xacts)
652 : {
653 22273 : StartTransactionCommand();
654 : /* functions in indexes may want a snapshot set */
5445 alvherre 655 22273 : PushActiveSnapshot(GetTransactionSnapshot());
656 : }
657 :
1483 rhaas 658 24097 : analyze_rel(vrel->oid, vrel->relation, params,
659 : vrel->va_cols, in_outer_xact, bstrategy);
660 :
6826 tgl 661 CBC 24067 : if (use_own_xacts)
662 : {
5445 alvherre 663 GIC 22254 : PopActiveSnapshot();
6826 tgl 664 22254 : CommitTransactionCommand();
665 : }
666 : else
667 : {
668 : /*
669 : * If we're not using separate xacts, better separate the
1338 tgl 670 ECB : * ANALYZE actions with CCIs. This avoids trouble if user
671 : * says "ANALYZE t, t".
672 : */
1338 tgl 673 GIC 1813 : CommandCounterIncrement();
674 : }
675 : }
676 :
677 : /*
678 : * Ensure VacuumFailsafeActive has been reset before vacuuming the
679 : * next relation.
680 : */
2 dgustafsson 681 GNC 47143 : VacuumFailsafeActive = false;
682 : }
683 : }
1255 peter 684 GIC 33 : PG_FINALLY();
685 : {
3014 noah 686 5064 : in_vacuum = false;
6826 tgl 687 5064 : VacuumCostActive = false;
2 dgustafsson 688 GNC 5064 : VacuumFailsafeActive = false;
689 5064 : VacuumCostBalance = 0;
6826 tgl 690 ECB : }
6826 tgl 691 GIC 5064 : PG_END_TRY();
6826 tgl 692 ECB :
6758 bruce 693 : /*
694 : * Finish up processing.
695 : */
6896 tgl 696 CBC 5031 : if (use_own_xacts)
697 : {
698 : /* here, we are not in a transaction */
699 :
700 : /*
701 : * This matches the CommitTransaction waiting for us in
702 : * PostgresMain().
7527 tgl 703 ECB : */
7270 tgl 704 CBC 3196 : StartTransactionCommand();
705 : }
706 :
93 tgl 707 GNC 5031 : if ((params->options & VACOPT_VACUUM) &&
708 2541 : !(params->options & VACOPT_SKIP_DATABASE_STATS))
709 : {
710 : /*
711 : * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
712 : */
5999 tgl 713 CBC 1233 : vac_update_datfrozenxid();
714 : }
715 :
9770 scrappy 716 5031 : }
717 :
1686 michael 718 ECB : /*
719 : * Check if the current user has privileges to vacuum or analyze the relation.
720 : * If not, issue a WARNING log message and return false to let the caller
721 : * decide what to do with this relation. This routine is used to decide if a
722 : * relation can be processed for VACUUM or ANALYZE.
723 : */
724 : bool
132 andrew 725 GNC 93946 : vacuum_is_permitted_for_relation(Oid relid, Form_pg_class reltuple,
726 : bits32 options)
1686 michael 727 ECB : {
728 : char *relname;
729 :
1686 michael 730 CBC 93946 : Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
731 :
732 : /*----------
733 : * A role has privileges to vacuum or analyze the relation if any of the
734 : * following are true:
735 : * - the role is a superuser
736 : * - the role owns the relation
737 : * - the role owns the current database and the relation is not shared
738 : * - the role has been granted the MAINTAIN privilege on the relation
739 : * - the role has privileges to vacuum/analyze any of the relation's
740 : * partition ancestors
741 : *----------
742 : */
86 jdavis 743 GNC 109308 : if ((object_ownercheck(DatabaseRelationId, MyDatabaseId, GetUserId()) && !reltuple->relisshared) ||
744 15577 : pg_class_aclcheck(relid, GetUserId(), ACL_MAINTAIN) == ACLCHECK_OK ||
745 215 : has_partition_ancestor_privs(relid, GetUserId(), ACL_MAINTAIN))
1686 michael 746 GIC 93830 : return true;
1686 michael 747 ECB :
1686 michael 748 CBC 116 : relname = NameStr(reltuple->relname);
749 :
1686 michael 750 GIC 116 : if ((options & VACOPT_VACUUM) != 0)
1686 michael 751 ECB : {
137 andrew 752 GNC 76 : ereport(WARNING,
753 : (errmsg("permission denied to vacuum \"%s\", skipping it",
754 : relname)));
755 :
756 : /*
757 : * For VACUUM ANALYZE, both logs could show up, but just generate
758 : * information for VACUUM as that would be the first one to be
759 : * processed.
760 : */
1686 michael 761 CBC 76 : return false;
762 : }
1686 michael 763 ECB :
1686 michael 764 GIC 40 : if ((options & VACOPT_ANALYZE) != 0)
137 andrew 765 GNC 40 : ereport(WARNING,
766 : (errmsg("permission denied to analyze \"%s\", skipping it",
767 : relname)));
1686 michael 768 ECB :
1686 michael 769 CBC 40 : return false;
1686 michael 770 ECB : }
771 :
772 :
1650 773 : /*
774 : * vacuum_open_relation
775 : *
776 : * This routine is used for attempting to open and lock a relation which
777 : * is going to be vacuumed or analyzed. If the relation cannot be opened
778 : * or locked, a log is emitted if possible.
779 : */
780 : Relation
811 michael 781 CBC 61292 : vacuum_open_relation(Oid relid, RangeVar *relation, bits32 options,
782 : bool verbose, LOCKMODE lmode)
1650 michael 783 ECB : {
784 : Relation rel;
1650 michael 785 CBC 61292 : bool rel_lock = true;
786 : int elevel;
1650 michael 787 ECB :
1650 michael 788 GIC 61292 : Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
1650 michael 789 ECB :
790 : /*
791 : * Open the relation and get the appropriate lock on it.
792 : *
793 : * There's a race condition here: the relation may have gone away since
794 : * the last time we saw it. If so, we don't need to vacuum or analyze it.
795 : *
796 : * If we've been asked not to wait for the relation lock, acquire it first
797 : * in non-blocking mode, before calling try_relation_open().
798 : */
1650 michael 799 GIC 61292 : if (!(options & VACOPT_SKIP_LOCKED))
734 pg 800 CBC 60831 : rel = try_relation_open(relid, lmode);
1650 michael 801 GIC 461 : else if (ConditionalLockRelationOid(relid, lmode))
734 pg 802 CBC 451 : rel = try_relation_open(relid, NoLock);
803 : else
804 : {
805 10 : rel = NULL;
1650 michael 806 GIC 10 : rel_lock = false;
807 : }
1650 michael 808 ECB :
809 : /* if relation is opened, leave */
734 pg 810 CBC 61292 : if (rel)
811 61276 : return rel;
812 :
813 : /*
814 : * Relation could not be opened, hence generate if possible a log
815 : * informing on the situation.
816 : *
817 : * If the RangeVar is not defined, we do not have enough information to
818 : * provide a meaningful log statement. Chances are that the caller has
819 : * intentionally not provided this information so that this logging is
1650 michael 820 ECB : * skipped, anyway.
821 : */
1650 michael 822 GIC 16 : if (relation == NULL)
823 9 : return NULL;
824 :
825 : /*
826 : * Determine the log level.
827 : *
1418 tgl 828 ECB : * For manual VACUUM or ANALYZE, we emit a WARNING to match the log
829 : * statements in the permission checks; otherwise, only log if the caller
830 : * so requested.
1650 michael 831 : */
1650 michael 832 GIC 7 : if (!IsAutoVacuumWorkerProcess())
1650 michael 833 CBC 7 : elevel = WARNING;
1483 rhaas 834 LBC 0 : else if (verbose)
1650 michael 835 0 : elevel = LOG;
1650 michael 836 ECB : else
1650 michael 837 UIC 0 : return NULL;
1650 michael 838 ECB :
1650 michael 839 GIC 7 : if ((options & VACOPT_VACUUM) != 0)
840 : {
841 5 : if (!rel_lock)
842 3 : ereport(elevel,
1650 michael 843 ECB : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
844 : errmsg("skipping vacuum of \"%s\" --- lock not available",
845 : relation->relname)));
846 : else
1650 michael 847 GIC 2 : ereport(elevel,
848 : (errcode(ERRCODE_UNDEFINED_TABLE),
849 : errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
850 : relation->relname)));
1650 michael 851 ECB :
852 : /*
853 : * For VACUUM ANALYZE, both logs could show up, but just generate
854 : * information for VACUUM as that would be the first one to be
855 : * processed.
856 : */
1650 michael 857 GIC 5 : return NULL;
858 : }
859 :
1650 michael 860 CBC 2 : if ((options & VACOPT_ANALYZE) != 0)
861 : {
1650 michael 862 GIC 2 : if (!rel_lock)
1650 michael 863 CBC 1 : ereport(elevel,
864 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
865 : errmsg("skipping analyze of \"%s\" --- lock not available",
866 : relation->relname)));
867 : else
1650 michael 868 GIC 1 : ereport(elevel,
869 : (errcode(ERRCODE_UNDEFINED_TABLE),
870 : errmsg("skipping analyze of \"%s\" --- relation no longer exists",
871 : relation->relname)));
1650 michael 872 ECB : }
873 :
1650 michael 874 GIC 2 : return NULL;
875 : }
876 :
1650 michael 877 ECB :
878 : /*
879 : * Given a VacuumRelation, fill in the table OID if it wasn't specified,
880 : * and optionally add VacuumRelations for partitions of the table.
881 : *
882 : * If a VacuumRelation does not have an OID supplied and is a partitioned
883 : * table, an extra entry will be added to the output for each partition.
884 : * Presently, only autovacuum supplies OIDs when calling vacuum(), and
885 : * it does not want us to expand partitioned tables.
886 : *
887 : * We take care not to modify the input data structure, but instead build
888 : * new VacuumRelation(s) to return. (But note that they will reference
889 : * unmodified parts of the input, eg column lists.) New data structures
2014 tgl 890 : * are made in vac_context.
9770 scrappy 891 : */
7677 tgl 892 : static List *
6 drowley 893 GNC 4496 : expand_vacuum_rel(VacuumRelation *vrel, MemoryContext vac_context,
894 : int options)
895 : {
2014 tgl 896 CBC 4496 : List *vacrels = NIL;
897 : MemoryContext oldcontext;
7677 tgl 898 ECB :
899 : /* If caller supplied OID, there's nothing we need do here. */
2014 tgl 900 CBC 4496 : if (OidIsValid(vrel->oid))
901 : {
5421 alvherre 902 GIC 158 : oldcontext = MemoryContextSwitchTo(vac_context);
2014 tgl 903 158 : vacrels = lappend(vacrels, vrel);
5421 alvherre 904 158 : MemoryContextSwitchTo(oldcontext);
905 : }
906 : else
907 : {
908 : /* Process a specific relation, and possibly partitions thereof */
7522 bruce 909 ECB : Oid relid;
910 : HeapTuple tuple;
911 : Form_pg_class classForm;
2229 rhaas 912 : bool include_parts;
1648 michael 913 : int rvr_opts;
914 :
915 : /*
916 : * Since autovacuum workers supply OIDs when calling vacuum(), no
1732 917 : * autovacuum worker should reach this code.
918 : */
1732 michael 919 GIC 4338 : Assert(!IsAutoVacuumWorkerProcess());
920 :
921 : /*
922 : * We transiently take AccessShareLock to protect the syscache lookup
923 : * below, as well as find_all_inheritors's expectation that the caller
924 : * holds some lock on the starting relation.
925 : */
1648 926 4338 : rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0;
927 4338 : relid = RangeVarGetRelidExtended(vrel->relation,
928 : AccessShareLock,
1648 michael 929 ECB : rvr_opts,
930 : NULL, NULL);
931 :
932 : /*
933 : * If the lock is unavailable, emit the same log statement that
934 : * vacuum_rel() and analyze_rel() would.
935 : */
1648 michael 936 CBC 4320 : if (!OidIsValid(relid))
937 : {
1648 michael 938 GIC 4 : if (options & VACOPT_VACUUM)
939 3 : ereport(WARNING,
940 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
941 : errmsg("skipping vacuum of \"%s\" --- lock not available",
942 : vrel->relation->relname)));
943 : else
944 1 : ereport(WARNING,
945 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
946 : errmsg("skipping analyze of \"%s\" --- lock not available",
1648 michael 947 ECB : vrel->relation->relname)));
1648 michael 948 CBC 4 : return vacrels;
1648 michael 949 ECB : }
2014 tgl 950 :
951 : /*
952 : * To check whether the relation is a partitioned table and its
1686 michael 953 : * ownership, fetch its syscache entry.
2229 rhaas 954 : */
2229 rhaas 955 GIC 4316 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
956 4316 : if (!HeapTupleIsValid(tuple))
2229 rhaas 957 UIC 0 : elog(ERROR, "cache lookup failed for relation %u", relid);
2229 rhaas 958 CBC 4316 : classForm = (Form_pg_class) GETSTRUCT(tuple);
1686 michael 959 ECB :
960 : /*
961 : * Make a returnable VacuumRelation for this rel if the user has the
962 : * required privileges.
963 : */
132 andrew 964 GNC 4316 : if (vacuum_is_permitted_for_relation(relid, classForm, options))
965 : {
1686 michael 966 GIC 4227 : oldcontext = MemoryContextSwitchTo(vac_context);
967 4227 : vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
968 : relid,
969 : vrel->va_cols));
1686 michael 970 CBC 4227 : MemoryContextSwitchTo(oldcontext);
1686 michael 971 ECB : }
972 :
973 :
2229 rhaas 974 GIC 4316 : include_parts = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
975 4316 : ReleaseSysCache(tuple);
976 :
977 : /*
978 : * If it is, make relation list entries for its partitions. Note that
979 : * the list returned by find_all_inheritors() includes the passed-in
2014 tgl 980 ECB : * OID, so we have to skip that. There's no point in taking locks on
981 : * the individual partitions yet, and doing so would just add
1686 michael 982 EUB : * unnecessary deadlock risk. For this last reason we do not check
983 : * yet the ownership of the partitions, which get added to the list to
984 : * process. Ownership will be checked later on anyway.
2229 rhaas 985 : */
2229 rhaas 986 GIC 4316 : if (include_parts)
2014 tgl 987 ECB : {
2014 tgl 988 GIC 342 : List *part_oids = find_all_inheritors(relid, NoLock, NULL);
2014 tgl 989 ECB : ListCell *part_lc;
990 :
2014 tgl 991 GIC 1599 : foreach(part_lc, part_oids)
992 : {
993 1257 : Oid part_oid = lfirst_oid(part_lc);
994 :
2014 tgl 995 CBC 1257 : if (part_oid == relid)
2014 tgl 996 GIC 342 : continue; /* ignore original table */
997 :
998 : /*
999 : * We omit a RangeVar since it wouldn't be appropriate to
1000 : * complain about failure to open one of these relations
1001 : * later.
1002 : */
1003 915 : oldcontext = MemoryContextSwitchTo(vac_context);
1004 915 : vacrels = lappend(vacrels, makeVacuumRelation(NULL,
2014 tgl 1005 ECB : part_oid,
1006 : vrel->va_cols));
2014 tgl 1007 GIC 915 : MemoryContextSwitchTo(oldcontext);
2014 tgl 1008 ECB : }
1009 : }
2018 1010 :
1011 : /*
1012 : * Release lock again. This means that by the time we actually try to
1013 : * process the table, it might be gone or renamed. In the former case
1014 : * we'll silently ignore it; in the latter case we'll process it
1015 : * anyway, but we must beware that the RangeVar doesn't necessarily
1016 : * identify it anymore. This isn't ideal, perhaps, but there's little
1017 : * practical alternative, since we're typically going to commit this
1018 : * transaction and begin a new one between now and then. Moreover,
1019 : * holding locks on multiple relations would create significant risk
1020 : * of deadlock.
1021 : */
2018 tgl 1022 CBC 4316 : UnlockRelationOid(relid, AccessShareLock);
1023 : }
1024 :
2014 tgl 1025 GIC 4474 : return vacrels;
1026 : }
1027 :
1028 : /*
1029 : * Construct a list of VacuumRelations for all vacuumable rels in
1030 : * the current database. The list is built in vac_context.
1031 : */
1032 : static List *
6 drowley 1033 GNC 611 : get_all_vacuum_rels(MemoryContext vac_context, int options)
1034 : {
2014 tgl 1035 GIC 611 : List *vacrels = NIL;
1036 : Relation pgclass;
1037 : TableScanDesc scan;
1038 : HeapTuple tuple;
1039 :
1539 andres 1040 611 : pgclass = table_open(RelationRelationId, AccessShareLock);
9345 bruce 1041 ECB :
1490 andres 1042 GIC 611 : scan = table_beginscan_catalog(pgclass, 0, NULL);
1043 :
2014 tgl 1044 CBC 253884 : while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1045 : {
2014 tgl 1046 GIC 253273 : Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
1047 : MemoryContext oldcontext;
1601 andres 1048 CBC 253273 : Oid relid = classForm->oid;
1049 :
1050 : /*
1051 : * We include partitioned tables here; depending on which operation is
1052 : * to be performed, caller will decide whether to process or ignore
1053 : * them.
1054 : */
2014 tgl 1055 GIC 253273 : if (classForm->relkind != RELKIND_RELATION &&
1056 211366 : classForm->relkind != RELKIND_MATVIEW &&
1057 211363 : classForm->relkind != RELKIND_PARTITIONED_TABLE)
1058 211342 : continue;
1059 :
1060 : /* check permissions of relation */
86 jdavis 1061 GNC 41931 : if (!vacuum_is_permitted_for_relation(relid, classForm, options))
86 jdavis 1062 UNC 0 : continue;
1063 :
1064 : /*
1065 : * Build VacuumRelation(s) specifying the table OIDs to be processed.
1066 : * We omit a RangeVar since it wouldn't be appropriate to complain
2014 tgl 1067 ECB : * about failure to open one of these relations later.
1068 : */
2014 tgl 1069 GIC 41931 : oldcontext = MemoryContextSwitchTo(vac_context);
1070 41931 : vacrels = lappend(vacrels, makeVacuumRelation(NULL,
1071 : relid,
1072 : NIL));
1073 41931 : MemoryContextSwitchTo(oldcontext);
9345 bruce 1074 ECB : }
1075 :
1490 andres 1076 GIC 611 : table_endscan(scan);
1539 1077 611 : table_close(pgclass, AccessShareLock);
1078 :
2014 tgl 1079 611 : return vacrels;
1080 : }
1081 :
1082 : /*
1083 : * vacuum_get_cutoffs() -- compute OldestXmin and freeze cutoff points
3419 alvherre 1084 ECB : *
1085 : * The target relation and VACUUM parameters are our inputs.
950 tgl 1086 : *
1087 : * Output parameters are the cutoffs that VACUUM caller should use.
3419 alvherre 1088 : *
1089 : * Return value indicates if vacuumlazy.c caller should make its VACUUM
1090 : * operation aggressive. An aggressive VACUUM must advance relfrozenxid up to
1091 : * FreezeLimit (at a minimum), and relminmxid up to MultiXactCutoff (at a
1092 : * minimum).
7896 tgl 1093 : */
1094 : bool
108 pg 1095 GNC 37111 : vacuum_get_cutoffs(Relation rel, const VacuumParams *params,
1096 : struct VacuumCutoffs *cutoffs)
1097 : {
1098 : int freeze_min_age,
1099 : multixact_freeze_min_age,
1100 : freeze_table_age,
1101 : multixact_freeze_table_age,
1102 : effective_multixact_freeze_max_age;
1103 : TransactionId nextXID,
1104 : safeOldestXmin,
1105 : aggressiveXIDCutoff;
1106 : MultiXactId nextMXID,
1107 : safeOldestMxact,
1108 : aggressiveMXIDCutoff;
1109 :
1110 : /* Use mutable copies of freeze age parameters */
137 1111 37111 : freeze_min_age = params->freeze_min_age;
1112 37111 : multixact_freeze_min_age = params->multixact_freeze_min_age;
1113 37111 : freeze_table_age = params->freeze_table_age;
1114 37111 : multixact_freeze_table_age = params->multixact_freeze_table_age;
1115 :
1116 : /* Set pg_class fields in cutoffs */
108 1117 37111 : cutoffs->relfrozenxid = rel->rd_rel->relfrozenxid;
1118 37111 : cutoffs->relminmxid = rel->rd_rel->relminmxid;
1119 :
1120 : /*
1121 : * Acquire OldestXmin.
1122 : *
1123 : * We can always ignore processes running lazy vacuum. This is because we
1124 : * use these values only for deciding which tuples we must keep in the
1125 : * tables. Since lazy vacuum doesn't write its XID anywhere (usually no
1126 : * XID assigned), it's safe to ignore it. In theory it could be
1127 : * problematic to ignore lazy vacuums in a full vacuum, but keep in mind
1128 : * that only one vacuum process can be working on a particular table at
893 andres 1129 ECB : * any time, and that each vacuum is always an independent transaction.
1130 : */
108 pg 1131 GNC 37111 : cutoffs->OldestXmin = GetOldestNonRemovableTransactionId(rel);
1132 :
893 andres 1133 GIC 37111 : if (OldSnapshotThresholdActive())
970 andres 1134 ECB : {
1135 : TransactionId limit_xmin;
893 1136 : TimestampTz limit_ts;
1137 :
108 pg 1138 GNC 3 : if (TransactionIdLimitedForOldSnapshots(cutoffs->OldestXmin, rel,
893 andres 1139 ECB : &limit_xmin, &limit_ts))
1140 : {
1141 : /*
1142 : * TODO: We should only set the threshold if we are pruning on the
1143 : * basis of the increased limits. Not as crucial here as it is
1144 : * for opportunistic pruning (which often happens at a much higher
1145 : * frequency), but would still be a significant improvement.
1146 : */
893 andres 1147 CBC 3 : SetOldSnapshotThresholdTimestamp(limit_ts, limit_xmin);
108 pg 1148 GNC 3 : cutoffs->OldestXmin = limit_xmin;
1149 : }
970 andres 1150 ECB : }
1151 :
108 pg 1152 GNC 37111 : Assert(TransactionIdIsNormal(cutoffs->OldestXmin));
1153 :
1154 : /* Acquire OldestMxact */
1155 37111 : cutoffs->OldestMxact = GetOldestMultiXactId();
1156 37111 : Assert(MultiXactIdIsValid(cutoffs->OldestMxact));
1157 :
1158 : /* Acquire next XID/next MXID values used to apply age-based settings */
221 1159 37111 : nextXID = ReadNextTransactionId();
1160 37111 : nextMXID = ReadNextMultiXactId();
1161 :
1162 : /*
1163 : * Also compute the multixact age for which freezing is urgent. This is
1164 : * normally autovacuum_multixact_freeze_max_age, but may be less if we are
1165 : * short of multixact member space.
1166 : */
108 1167 37111 : effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
1168 :
1169 : /*
1170 : * Almost ready to set freeze output parameters; check if OldestXmin or
1171 : * OldestMxact are held back to an unsafe degree before we start on that
1172 : */
1173 37111 : safeOldestXmin = nextXID - autovacuum_freeze_max_age;
1174 37111 : if (!TransactionIdIsNormal(safeOldestXmin))
108 pg 1175 UNC 0 : safeOldestXmin = FirstNormalTransactionId;
108 pg 1176 GNC 37111 : safeOldestMxact = nextMXID - effective_multixact_freeze_max_age;
1177 37111 : if (safeOldestMxact < FirstMultiXactId)
108 pg 1178 UNC 0 : safeOldestMxact = FirstMultiXactId;
108 pg 1179 GNC 37111 : if (TransactionIdPrecedes(cutoffs->OldestXmin, safeOldestXmin))
108 pg 1180 UNC 0 : ereport(WARNING,
1181 : (errmsg("cutoff for removing and freezing tuples is far in the past"),
1182 : errhint("Close open transactions soon to avoid wraparound problems.\n"
1183 : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
108 pg 1184 GNC 37111 : if (MultiXactIdPrecedes(cutoffs->OldestMxact, safeOldestMxact))
108 pg 1185 UNC 0 : ereport(WARNING,
1186 : (errmsg("cutoff for freezing multixacts is far in the past"),
1187 : errhint("Close open transactions soon to avoid wraparound problems.\n"
1188 : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1189 :
1190 : /*
1191 : * Determine the minimum freeze age to use: as specified by the caller, or
1192 : * vacuum_freeze_min_age, but in any case not more than half
1193 : * autovacuum_freeze_max_age, so that autovacuums to prevent XID
1194 : * wraparound won't occur too frequently.
1195 : */
221 pg 1196 GNC 37111 : if (freeze_min_age < 0)
1197 3542 : freeze_min_age = vacuum_freeze_min_age;
1198 37111 : freeze_min_age = Min(freeze_min_age, autovacuum_freeze_max_age / 2);
1199 37111 : Assert(freeze_min_age >= 0);
221 pg 1200 ECB :
1201 : /* Compute FreezeLimit, being careful to generate a normal XID */
108 pg 1202 GNC 37111 : cutoffs->FreezeLimit = nextXID - freeze_min_age;
1203 37111 : if (!TransactionIdIsNormal(cutoffs->FreezeLimit))
108 pg 1204 UNC 0 : cutoffs->FreezeLimit = FirstNormalTransactionId;
1205 : /* FreezeLimit must always be <= OldestXmin */
108 pg 1206 GNC 37111 : if (TransactionIdPrecedes(cutoffs->OldestXmin, cutoffs->FreezeLimit))
1207 308 : cutoffs->FreezeLimit = cutoffs->OldestXmin;
2893 rhaas 1208 ECB :
3419 alvherre 1209 : /*
3342 1210 : * Determine the minimum multixact freeze age to use: as specified by
1211 : * caller, or vacuum_multixact_freeze_min_age, but in any case not more
1212 : * than half effective_multixact_freeze_max_age, so that autovacuums to
1213 : * prevent MultiXact wraparound won't occur too frequently.
3419 alvherre 1214 EUB : */
221 pg 1215 GNC 37111 : if (multixact_freeze_min_age < 0)
1216 3542 : multixact_freeze_min_age = vacuum_multixact_freeze_min_age;
1217 37111 : multixact_freeze_min_age = Min(multixact_freeze_min_age,
1218 : effective_multixact_freeze_max_age / 2);
1219 37111 : Assert(multixact_freeze_min_age >= 0);
221 pg 1220 ECB :
1221 : /* Compute MultiXactCutoff, being careful to generate a valid value */
108 pg 1222 GNC 37111 : cutoffs->MultiXactCutoff = nextMXID - multixact_freeze_min_age;
1223 37111 : if (cutoffs->MultiXactCutoff < FirstMultiXactId)
108 pg 1224 UNC 0 : cutoffs->MultiXactCutoff = FirstMultiXactId;
1225 : /* MultiXactCutoff must always be <= OldestMxact */
108 pg 1226 GNC 37111 : if (MultiXactIdPrecedes(cutoffs->OldestMxact, cutoffs->MultiXactCutoff))
1227 2 : cutoffs->MultiXactCutoff = cutoffs->OldestMxact;
1228 :
1229 : /*
1230 : * Finally, figure out if caller needs to do an aggressive VACUUM or not.
1231 : *
1232 : * Determine the table freeze age to use: as specified by the caller, or
1233 : * the value of the vacuum_freeze_table_age GUC, but in any case not more
1234 : * than autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
1235 : * VACUUM schedule, the nightly VACUUM gets a chance to freeze XIDs before
1236 : * anti-wraparound autovacuum is launched.
1237 : */
221 1238 37111 : if (freeze_table_age < 0)
1239 3542 : freeze_table_age = vacuum_freeze_table_age;
1240 37111 : freeze_table_age = Min(freeze_table_age, autovacuum_freeze_max_age * 0.95);
1241 37111 : Assert(freeze_table_age >= 0);
1242 37111 : aggressiveXIDCutoff = nextXID - freeze_table_age;
1243 37111 : if (!TransactionIdIsNormal(aggressiveXIDCutoff))
221 pg 1244 UNC 0 : aggressiveXIDCutoff = FirstNormalTransactionId;
422 pg 1245 GIC 37111 : if (TransactionIdPrecedesOrEquals(rel->rd_rel->relfrozenxid,
1246 : aggressiveXIDCutoff))
1247 33500 : return true;
1248 :
1249 : /*
1250 : * Similar to the above, determine the table freeze age to use for
1251 : * multixacts: as specified by the caller, or the value of the
1252 : * vacuum_multixact_freeze_table_age GUC, but in any case not more than
1253 : * effective_multixact_freeze_max_age * 0.95, so that if you have e.g.
1254 : * nightly VACUUM schedule, the nightly VACUUM gets a chance to freeze
1255 : * multixacts before anti-wraparound autovacuum is launched.
422 pg 1256 ECB : */
221 pg 1257 GNC 3611 : if (multixact_freeze_table_age < 0)
1258 3542 : multixact_freeze_table_age = vacuum_multixact_freeze_table_age;
1259 3611 : multixact_freeze_table_age =
1260 3611 : Min(multixact_freeze_table_age,
1261 : effective_multixact_freeze_max_age * 0.95);
1262 3611 : Assert(multixact_freeze_table_age >= 0);
1263 3611 : aggressiveMXIDCutoff = nextMXID - multixact_freeze_table_age;
1264 3611 : if (aggressiveMXIDCutoff < FirstMultiXactId)
221 pg 1265 UNC 0 : aggressiveMXIDCutoff = FirstMultiXactId;
422 pg 1266 GIC 3611 : if (MultiXactIdPrecedesOrEquals(rel->rd_rel->relminmxid,
1267 : aggressiveMXIDCutoff))
422 pg 1268 LBC 0 : return true;
1269 :
1270 : /* Non-aggressive VACUUM */
422 pg 1271 GIC 3611 : return false;
1272 : }
7941 tgl 1273 ECB :
1274 : /*
1275 : * vacuum_xid_failsafe_check() -- Used by VACUUM's wraparound failsafe
732 pg 1276 : * mechanism to determine if its table's relfrozenxid and relminmxid are now
1277 : * dangerously far in the past.
1278 : *
1279 : * When we return true, VACUUM caller triggers the failsafe.
1280 : */
1281 : bool
108 pg 1282 GNC 42389 : vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs)
1283 : {
1284 42389 : TransactionId relfrozenxid = cutoffs->relfrozenxid;
1285 42389 : MultiXactId relminmxid = cutoffs->relminmxid;
1286 : TransactionId xid_skip_limit;
1287 : MultiXactId multi_skip_limit;
732 pg 1288 ECB : int skip_index_vacuum;
1289 :
732 pg 1290 GIC 42389 : Assert(TransactionIdIsNormal(relfrozenxid));
1291 42389 : Assert(MultiXactIdIsValid(relminmxid));
1292 :
1293 : /*
732 pg 1294 ECB : * Determine the index skipping age to use. In any case no less than
1295 : * autovacuum_freeze_max_age * 1.05.
732 pg 1296 EUB : */
732 pg 1297 CBC 42389 : skip_index_vacuum = Max(vacuum_failsafe_age, autovacuum_freeze_max_age * 1.05);
732 pg 1298 ECB :
732 pg 1299 GBC 42389 : xid_skip_limit = ReadNextTransactionId() - skip_index_vacuum;
732 pg 1300 CBC 42389 : if (!TransactionIdIsNormal(xid_skip_limit))
732 pg 1301 UBC 0 : xid_skip_limit = FirstNormalTransactionId;
1302 :
732 pg 1303 GIC 42389 : if (TransactionIdPrecedes(relfrozenxid, xid_skip_limit))
1304 : {
732 pg 1305 ECB : /* The table's relfrozenxid is too old */
732 pg 1306 UBC 0 : return true;
1307 : }
1308 :
1309 : /*
1310 : * Similar to above, determine the index skipping age to use for
1311 : * multixact. In any case no less than autovacuum_multixact_freeze_max_age *
1312 : * 1.05.
1313 : */
732 pg 1314 GIC 42389 : skip_index_vacuum = Max(vacuum_multixact_failsafe_age,
1315 : autovacuum_multixact_freeze_max_age * 1.05);
1316 :
732 pg 1317 CBC 42389 : multi_skip_limit = ReadNextMultiXactId() - skip_index_vacuum;
1318 42389 : if (multi_skip_limit < FirstMultiXactId)
732 pg 1319 LBC 0 : multi_skip_limit = FirstMultiXactId;
732 pg 1320 ECB :
732 pg 1321 GIC 42389 : if (MultiXactIdPrecedes(relminmxid, multi_skip_limit))
1322 : {
732 pg 1323 ECB : /* The table's relminmxid is too old */
732 pg 1324 LBC 0 : return true;
732 pg 1325 EUB : }
1326 :
732 pg 1327 CBC 42389 : return false;
732 pg 1328 ECB : }
1329 :
1330 : /*
1331 : * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
1332 : *
1333 : * If we scanned the whole relation then we should just use the count of
1334 : * live tuples seen; but if we did not, we should not blindly extrapolate
1335 : * from that number, since VACUUM may have scanned a quite nonrandom
1853 tgl 1336 : * subset of the table. When we have only partial information, we take
952 1337 : * the old value of pg_class.reltuples/pg_class.relpages as a measurement
1338 : * of the tuple density in the unscanned pages.
1339 : *
1844 1340 : * Note: scanned_tuples should count only *live* tuples, since
1341 : * pg_class.reltuples is defined that way.
1342 : */
4332 1343 : double
1853 tgl 1344 CBC 36850 : vac_estimate_reltuples(Relation relation,
4332 tgl 1345 EUB : BlockNumber total_pages,
1346 : BlockNumber scanned_pages,
4332 tgl 1347 ECB : double scanned_tuples)
1348 : {
4322 bruce 1349 GIC 36850 : BlockNumber old_rel_pages = relation->rd_rel->relpages;
4332 tgl 1350 36850 : double old_rel_tuples = relation->rd_rel->reltuples;
1351 : double old_density;
1352 : double unscanned_pages;
1353 : double total_tuples;
1354 :
1355 : /* If we did scan the whole table, just use the count as-is */
1356 36850 : if (scanned_pages >= total_pages)
1357 36753 : return scanned_tuples;
1358 :
417 pg 1359 ECB : /*
1360 : * When successive VACUUM commands scan the same few pages again and
1361 : * again, without anything from the table really changing, there is a risk
1362 : * that our beliefs about tuple density will gradually become distorted.
233 1363 : * This might be caused by vacuumlazy.c implementation details, such as
1364 : * its tendency to always scan the last heap page. Handle that here.
233 pg 1365 EUB : *
233 pg 1366 ECB : * If the relation is _exactly_ the same size according to the existing
1367 : * pg_class entry, and only a few of its pages (less than 2%) were
1368 : * scanned, keep the existing value of reltuples. Also keep the existing
1369 : * value when only a subset of rel's pages <= a single page were scanned.
1370 : *
1371 : * (Note: we might be returning -1 here.)
1372 : */
417 pg 1373 GIC 97 : if (old_rel_pages == total_pages &&
1374 87 : scanned_pages < (double) total_pages * 0.02)
1375 53 : return old_rel_tuples;
233 1376 44 : if (scanned_pages <= 1)
1377 30 : return old_rel_tuples;
417 pg 1378 ECB :
4332 tgl 1379 : /*
952 1380 : * If old density is unknown, we can't do much except scale up
1381 : * scanned_tuples to match total_pages.
1382 : */
952 tgl 1383 CBC 14 : if (old_rel_tuples < 0 || old_rel_pages == 0)
4332 tgl 1384 LBC 0 : return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
4332 tgl 1385 ECB :
4332 tgl 1386 EUB : /*
4332 tgl 1387 ECB : * Okay, we've covered the corner cases. The normal calculation is to
1388 : * convert the old measurement to a density (tuples per page), then
1853 tgl 1389 EUB : * estimate the number of tuples in the unscanned pages using that figure,
1390 : * and finally add on the number of tuples in the scanned pages.
1391 : */
4332 tgl 1392 CBC 14 : old_density = old_rel_tuples / old_rel_pages;
1853 tgl 1393 GIC 14 : unscanned_pages = (double) total_pages - (double) scanned_pages;
1394 14 : total_tuples = old_density * unscanned_pages + scanned_tuples;
1395 14 : return floor(total_tuples + 0.5);
1396 : }
1397 :
1398 :
1399 : /*
1400 : * vac_update_relstats() -- update statistics for one relation
1401 : *
1402 : * Update the whole-relation statistics that are kept in its pg_class
7941 tgl 1403 ECB : * row. There are additional stats that will be updated if we are
1404 : * doing ANALYZE, but we always update these stats. This routine works
1405 : * for both index and heap relation entries in pg_class.
1406 : *
1407 : * We violate transaction semantics here by overwriting the rel's
1408 : * existing pg_class tuple with the new values. This is reasonably
1409 : * safe as long as we're sure that the new values are correct whether or
1410 : * not this transaction commits. The reason for doing this is that if
3084 1411 : * we updated these tuples in the usual way, vacuuming pg_class itself
1412 : * wouldn't work very well --- by the time we got done with a vacuum
1413 : * cycle, most of the tuples in pg_class would've been obsoleted. Of
1414 : * course, this only works for fixed-size not-null columns, but these are.
1415 : *
1416 : * Another reason for doing it this way is that when we are in a lazy
1417 : * VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
1418 : * Somebody vacuuming pg_class might think they could delete a tuple
1419 : * marked with xmin = our xid.
6097 alvherre 1420 : *
3084 tgl 1421 : * In addition to fundamentally nontransactional statistics such as
3084 tgl 1422 EUB : * relpages and relallvisible, we try to maintain certain lazily-updated
1423 : * DDL flags such as relhasindex, by clearing them if no longer correct.
3084 tgl 1424 ECB : * It's safe to do this in VACUUM, which can't run in parallel with
1425 : * CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
1426 : * However, it's *not* safe to do it in an ANALYZE that's within an
3083 tgl 1427 EUB : * outer transaction, because for example the current transaction might
1428 : * have dropped the last index; then we'd think relhasindex should be
1429 : * cleared, but if the transaction later rolls back this would be wrong.
1430 : * So we refrain from updating the DDL flags if we're inside an outer
1431 : * transaction. This is OK since postponing the flag maintenance is
1432 : * always allowable.
1433 : *
1434 : * Note: num_tuples should count only *live* tuples, since
1844 tgl 1435 ECB : * pg_class.reltuples is defined that way.
1436 : *
1437 : * This routine is shared by VACUUM and ANALYZE.
7941 1438 : */
1439 : void
5263 tgl 1440 GBC 105116 : vac_update_relstats(Relation relation,
1441 : BlockNumber num_pages, double num_tuples,
4195 tgl 1442 ECB : BlockNumber num_all_visible_pages,
1443 : bool hasindex, TransactionId frozenxid,
1444 : MultiXactId minmulti,
422 pg 1445 EUB : bool *frozenxid_updated, bool *minmulti_updated,
1446 : bool in_outer_xact)
1447 : {
5263 tgl 1448 CBC 105116 : Oid relid = RelationGetRelid(relation);
1449 : Relation rd;
1450 : HeapTuple ctup;
1451 : Form_pg_class pgcform;
1452 : bool dirty,
1453 : futurexid,
1454 : futuremxid;
1455 : TransactionId oldfrozenxid;
1456 : MultiXactId oldminmulti;
1457 :
1539 andres 1458 GIC 105116 : rd = table_open(RelationRelationId, RowExclusiveLock);
1459 :
1460 : /* Fetch a copy of the tuple to scribble on */
4802 rhaas 1461 105116 : ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
7941 tgl 1462 105116 : if (!HeapTupleIsValid(ctup))
7941 tgl 1463 UIC 0 : elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
1464 : relid);
6178 tgl 1465 CBC 105116 : pgcform = (Form_pg_class) GETSTRUCT(ctup);
1466 :
1467 : /* Apply statistical updates, if any, to copied tuple */
1468 :
6178 tgl 1469 GIC 105116 : dirty = false;
6178 tgl 1470 CBC 105116 : if (pgcform->relpages != (int32) num_pages)
6178 tgl 1471 ECB : {
6178 tgl 1472 GIC 16095 : pgcform->relpages = (int32) num_pages;
1473 16095 : dirty = true;
1474 : }
1475 105116 : if (pgcform->reltuples != (float4) num_tuples)
1476 : {
6178 tgl 1477 CBC 43669 : pgcform->reltuples = (float4) num_tuples;
1478 43669 : dirty = true;
1479 : }
4195 tgl 1480 GIC 105116 : if (pgcform->relallvisible != (int32) num_all_visible_pages)
1481 : {
1482 13392 : pgcform->relallvisible = (int32) num_all_visible_pages;
1483 13392 : dirty = true;
1484 : }
1485 :
1486 : /* Apply DDL updates, but not inside an outer transaction (see above) */
1487 :
3083 1488 105116 : if (!in_outer_xact)
1489 : {
1490 : /*
1491 : * If we didn't find any indexes, reset relhasindex.
1492 : */
3084 1493 104980 : if (pgcform->relhasindex && !hasindex)
3084 tgl 1494 ECB : {
3084 tgl 1495 CBC 9 : pgcform->relhasindex = false;
1496 9 : dirty = true;
3084 tgl 1497 ECB : }
1498 :
1499 : /* We also clear relhasrules and relhastriggers if needed */
3084 tgl 1500 GIC 104980 : if (pgcform->relhasrules && relation->rd_rules == NULL)
1501 : {
3084 tgl 1502 UIC 0 : pgcform->relhasrules = false;
1503 0 : dirty = true;
3084 tgl 1504 ECB : }
3084 tgl 1505 GBC 104980 : if (pgcform->relhastriggers && relation->trigdesc == NULL)
1506 : {
3084 tgl 1507 GIC 3 : pgcform->relhastriggers = false;
1508 3 : dirty = true;
1509 : }
1510 : }
1511 :
1512 : /*
3184 tgl 1513 ECB : * Update relfrozenxid, unless caller passed InvalidTransactionId
1514 : * indicating it has no new data.
1515 : *
371 pg 1516 : * Ordinarily, we don't let relfrozenxid go backwards. However, if the
1517 : * stored relfrozenxid is "in the future" then it seems best to assume
1518 : * it's corrupt, and overwrite with the oldest remaining XID in the table.
1519 : * This should match vac_update_datfrozenxid() concerning what we consider
1520 : * to be "in the future".
1521 : */
369 pg 1522 GIC 105116 : oldfrozenxid = pgcform->relfrozenxid;
1523 105116 : futurexid = false;
422 1524 105116 : if (frozenxid_updated)
1525 36848 : *frozenxid_updated = false;
369 1526 105116 : if (TransactionIdIsNormal(frozenxid) && oldfrozenxid != frozenxid)
1527 : {
332 tgl 1528 35963 : bool update = false;
1529 :
369 pg 1530 35963 : if (TransactionIdPrecedes(oldfrozenxid, frozenxid))
1531 35917 : update = true;
1532 46 : else if (TransactionIdPrecedes(ReadNextTransactionId(), oldfrozenxid))
369 pg 1533 UIC 0 : futurexid = update = true;
1534 :
369 pg 1535 GIC 35963 : if (update)
1536 : {
1537 35917 : pgcform->relfrozenxid = frozenxid;
1538 35917 : dirty = true;
1539 35917 : if (frozenxid_updated)
1540 35917 : *frozenxid_updated = true;
1541 : }
1542 : }
1543 :
1544 : /* Similarly for relminmxid */
1545 105116 : oldminmulti = pgcform->relminmxid;
1546 105116 : futuremxid = false;
422 1547 105116 : if (minmulti_updated)
1548 36848 : *minmulti_updated = false;
369 1549 105116 : if (MultiXactIdIsValid(minmulti) && oldminmulti != minmulti)
1550 : {
332 tgl 1551 30 : bool update = false;
1552 :
369 pg 1553 30 : if (MultiXactIdPrecedes(oldminmulti, minmulti))
1554 30 : update = true;
369 pg 1555 UIC 0 : else if (MultiXactIdPrecedes(ReadNextMultiXactId(), oldminmulti))
1556 0 : futuremxid = update = true;
1557 :
369 pg 1558 GIC 30 : if (update)
1559 : {
1560 30 : pgcform->relminmxid = minmulti;
369 pg 1561 CBC 30 : dirty = true;
369 pg 1562 GIC 30 : if (minmulti_updated)
1563 30 : *minmulti_updated = true;
1564 : }
1565 : }
1566 :
1567 : /* If anything changed, write out the tuple. */
6178 tgl 1568 105116 : if (dirty)
6178 tgl 1569 CBC 66215 : heap_inplace_update(rd, ctup);
1570 :
1539 andres 1571 GIC 105116 : table_close(rd, RowExclusiveLock);
1572 :
369 pg 1573 105116 : if (futurexid)
369 pg 1574 UIC 0 : ereport(WARNING,
1575 : (errcode(ERRCODE_DATA_CORRUPTED),
1576 : errmsg_internal("overwrote invalid relfrozenxid value %u with new value %u for table \"%s\"",
1577 : oldfrozenxid, frozenxid,
1578 : RelationGetRelationName(relation))));
369 pg 1579 CBC 105116 : if (futuremxid)
369 pg 1580 UIC 0 : ereport(WARNING,
1581 : (errcode(ERRCODE_DATA_CORRUPTED),
369 pg 1582 ECB : errmsg_internal("overwrote invalid relminmxid value %u with new value %u for table \"%s\"",
1583 : oldminmulti, minmulti,
369 pg 1584 EUB : RelationGetRelationName(relation))));
7941 tgl 1585 GIC 105116 : }
7941 tgl 1586 ECB :
1587 :
1588 : /*
1589 : * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
7896 1590 : *
5999 1591 : * Update pg_database's datfrozenxid entry for our database to be the
1592 : * minimum of the pg_class.relfrozenxid values.
3728 alvherre 1593 : *
3492 1594 : * Similarly, update our datminmxid to be the minimum of the
1595 : * pg_class.relminmxid values.
3728 1596 : *
1597 : * If we are able to advance either pg_database value, also try to
2214 rhaas 1598 : * truncate pg_xact and pg_multixact.
7896 tgl 1599 : *
1600 : * We violate transaction semantics here by overwriting the database's
3184 1601 : * existing pg_database tuple with the new values. This is reasonably
1602 : * safe since the new values are correct whether or not this transaction
6178 1603 : * commits. As with vac_update_relstats, this avoids leaving dead tuples
1604 : * behind after a VACUUM.
1605 : */
1606 : void
5999 tgl 1607 GIC 1249 : vac_update_datfrozenxid(void)
1608 : {
7896 tgl 1609 ECB : HeapTuple tuple;
1610 : Form_pg_database dbform;
1611 : Relation relation;
1612 : SysScanDesc scan;
1613 : HeapTuple classTup;
5999 1614 : TransactionId newFrozenXid;
1615 : MultiXactId newMinMulti;
3184 1616 : TransactionId lastSaneFrozenXid;
1617 : MultiXactId lastSaneMinMulti;
3184 tgl 1618 GIC 1249 : bool bogus = false;
6117 alvherre 1619 1249 : bool dirty = false;
1620 : ScanKeyData key[1];
6117 alvherre 1621 ECB :
1622 : /*
967 noah 1623 EUB : * Restrict this task to one backend per database. This avoids race
1624 : * conditions that would move datfrozenxid or datminmxid backward. It
1625 : * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
967 noah 1626 ECB : * datfrozenxid passed to an earlier vac_truncate_clog() call.
1627 : */
967 noah 1628 CBC 1249 : LockDatabaseFrozenIds(ExclusiveLock);
967 noah 1629 ECB :
1630 : /*
1631 : * Initialize the "min" calculation with
1632 : * GetOldestNonRemovableTransactionId(), which is a reasonable
1633 : * approximation to the minimum relfrozenxid for not-yet-committed
1634 : * pg_class entries for new tables; see AddNewRelationTuple(). So we
1635 : * cannot produce a wrong minimum by starting with this.
1636 : */
970 andres 1637 GIC 1249 : newFrozenXid = GetOldestNonRemovableTransactionId(NULL);
1638 :
1639 : /*
1640 : * Similarly, initialize the MultiXact "min" with the value that would be
1641 : * used on pg_class for new tables. See AddNewRelationTuple().
1642 : */
3184 tgl 1643 CBC 1249 : newMinMulti = GetOldestMultiXactId();
3184 tgl 1644 ECB :
1645 : /*
1646 : * Identify the latest relfrozenxid and relminmxid values that we could
1647 : * validly see during the scan. These are conservative values, but it's
1648 : * not really worth trying to be more exact.
1649 : */
783 tmunro 1650 GIC 1249 : lastSaneFrozenXid = ReadNextTransactionId();
3184 tgl 1651 CBC 1249 : lastSaneMinMulti = ReadNextMultiXactId();
3728 alvherre 1652 ECB :
6031 bruce 1653 : /*
6031 bruce 1654 EUB : * We must seqscan pg_class to find the minimum Xid, because there is no
1655 : * index that can help us here.
6117 alvherre 1656 ECB : */
1539 andres 1657 GIC 1249 : relation = table_open(RelationRelationId, AccessShareLock);
6117 alvherre 1658 ECB :
6117 alvherre 1659 CBC 1249 : scan = systable_beginscan(relation, InvalidOid, false,
3568 rhaas 1660 ECB : NULL, 0, NULL);
6117 alvherre 1661 :
6117 alvherre 1662 GIC 679399 : while ((classTup = systable_getnext(scan)) != NULL)
1663 : {
5999 tgl 1664 678150 : Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
1665 :
6117 alvherre 1666 ECB : /*
3565 noah 1667 : * Only consider relations able to hold unfrozen XIDs (anything else
1447 andres 1668 : * should have InvalidTransactionId in relfrozenxid anyway).
6117 alvherre 1669 : */
6117 alvherre 1670 CBC 678150 : if (classForm->relkind != RELKIND_RELATION &&
3689 kgrittn 1671 GIC 529512 : classForm->relkind != RELKIND_MATVIEW &&
6117 alvherre 1672 CBC 528511 : classForm->relkind != RELKIND_TOASTVALUE)
1673 : {
1447 andres 1674 450923 : Assert(!TransactionIdIsValid(classForm->relfrozenxid));
1675 450923 : Assert(!MultiXactIdIsValid(classForm->relminmxid));
6117 alvherre 1676 GBC 450923 : continue;
1447 andres 1677 EUB : }
1678 :
3184 tgl 1679 ECB : /*
1680 : * Some table AMs might not need per-relation xid / multixid horizons.
1418 1681 : * It therefore seems reasonable to allow relfrozenxid and relminmxid
1682 : * to not be set (i.e. set to their respective Invalid*Id)
1447 andres 1683 : * independently. Thus validate and compute horizon for each only if
1684 : * set.
1685 : *
1686 : * If things are working properly, no relation should have a
1687 : * relfrozenxid or relminmxid that is "in the future". However, such
1688 : * cases have been known to arise due to bugs in pg_upgrade. If we
3184 tgl 1689 : * see any entries that are "in the future", chicken out and don't do
1447 andres 1690 : * anything. This ensures we won't truncate clog & multixact SLRUs
1691 : * before those relations have been scanned and cleaned up.
3184 tgl 1692 : */
1693 :
1447 andres 1694 CBC 227227 : if (TransactionIdIsValid(classForm->relfrozenxid))
3184 tgl 1695 EUB : {
1447 andres 1696 GIC 227227 : Assert(TransactionIdIsNormal(classForm->relfrozenxid));
1697 :
1698 : /* check for values in the future */
1699 227227 : if (TransactionIdPrecedes(lastSaneFrozenXid, classForm->relfrozenxid))
1447 andres 1700 ECB : {
1447 andres 1701 UBC 0 : bogus = true;
1447 andres 1702 UIC 0 : break;
1703 : }
1704 :
1705 : /* determine new horizon */
1447 andres 1706 CBC 227227 : if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
1447 andres 1707 GIC 1710 : newFrozenXid = classForm->relfrozenxid;
1708 : }
1709 :
1710 227227 : if (MultiXactIdIsValid(classForm->relminmxid))
1711 : {
1712 : /* check for values in the future */
1713 227227 : if (MultiXactIdPrecedes(lastSaneMinMulti, classForm->relminmxid))
1714 : {
1447 andres 1715 UIC 0 : bogus = true;
1716 0 : break;
1717 : }
1718 :
1719 : /* determine new horizon */
1447 andres 1720 GIC 227227 : if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
1721 93 : newMinMulti = classForm->relminmxid;
1722 : }
1723 : }
1724 :
1725 : /* we're done with pg_class */
6117 alvherre 1726 1249 : systable_endscan(scan);
1539 andres 1727 1249 : table_close(relation, AccessShareLock);
6117 alvherre 1728 ECB :
1729 : /* chicken out if bogus data found */
3184 tgl 1730 GIC 1249 : if (bogus)
3184 tgl 1731 UIC 0 : return;
1732 :
5999 tgl 1733 GIC 1249 : Assert(TransactionIdIsNormal(newFrozenXid));
3492 alvherre 1734 1249 : Assert(MultiXactIdIsValid(newMinMulti));
1735 :
1736 : /* Now fetch the pg_database tuple we need to update. */
1539 andres 1737 1249 : relation = table_open(DatabaseRelationId, RowExclusiveLock);
1738 :
852 michael 1739 ECB : /*
1740 : * Get the pg_database tuple to scribble on. Note that this does not
1741 : * directly rely on the syscache to avoid issues with flattened toast
1742 : * values for the in-place update.
1743 : */
852 michael 1744 GIC 1249 : ScanKeyInit(&key[0],
1745 : Anum_pg_database_oid,
1746 : BTEqualStrategyNumber, F_OIDEQ,
1747 : ObjectIdGetDatum(MyDatabaseId));
1748 :
852 michael 1749 CBC 1249 : scan = systable_beginscan(relation, DatabaseOidIndexId, true,
1750 : NULL, 1, key);
852 michael 1751 GIC 1249 : tuple = systable_getnext(scan);
1752 1249 : tuple = heap_copytuple(tuple);
1753 1249 : systable_endscan(scan);
1754 :
7896 tgl 1755 1249 : if (!HeapTupleIsValid(tuple))
5999 tgl 1756 UIC 0 : elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1757 :
7896 tgl 1758 CBC 1249 : dbform = (Form_pg_database) GETSTRUCT(tuple);
1759 :
1760 : /*
1761 : * As in vac_update_relstats(), we ordinarily don't want to let
1762 : * datfrozenxid go backward; but if it's "in the future" then it must be
1763 : * corrupt and it seems best to overwrite it.
5999 tgl 1764 ECB : */
3184 tgl 1765 GIC 1567 : if (dbform->datfrozenxid != newFrozenXid &&
1766 318 : (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
3184 tgl 1767 UIC 0 : TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1768 : {
5999 tgl 1769 GIC 318 : dbform->datfrozenxid = newFrozenXid;
6117 alvherre 1770 318 : dirty = true;
6117 alvherre 1771 ECB : }
3184 tgl 1772 : else
3184 tgl 1773 GIC 931 : newFrozenXid = dbform->datfrozenxid;
1774 :
1775 : /* Ditto for datminmxid */
1776 1249 : if (dbform->datminmxid != newMinMulti &&
3184 tgl 1777 UIC 0 : (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
3184 tgl 1778 LBC 0 : MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1779 : {
3492 alvherre 1780 0 : dbform->datminmxid = newMinMulti;
3728 alvherre 1781 UIC 0 : dirty = true;
1782 : }
3184 tgl 1783 ECB : else
3184 tgl 1784 GIC 1249 : newMinMulti = dbform->datminmxid;
3728 alvherre 1785 ECB :
6117 alvherre 1786 GIC 1249 : if (dirty)
1787 318 : heap_inplace_update(relation, tuple);
1788 :
1789 1249 : heap_freetuple(tuple);
1539 andres 1790 1249 : table_close(relation, RowExclusiveLock);
6463 tgl 1791 ECB :
5999 1792 : /*
3184 1793 : * If we were able to advance datfrozenxid or datminmxid, see if we can
1794 : * truncate pg_xact and/or pg_multixact. Also do it if the shared
1795 : * XID-wrap-limit info is stale, since this action will update that too.
5999 1796 : */
4968 tgl 1797 CBC 1249 : if (dirty || ForceTransactionIdLimitUpdate())
3184 tgl 1798 GIC 318 : vac_truncate_clog(newFrozenXid, newMinMulti,
1799 : lastSaneFrozenXid, lastSaneMinMulti);
1800 : }
1801 :
1802 :
1803 : /*
1804 : * vac_truncate_clog() -- attempt to truncate the commit log
1805 : *
1806 : * Scan pg_database to determine the system-wide oldest datfrozenxid,
1807 : * and use it to truncate the transaction commit log (pg_xact).
1808 : * Also update the XID wrap limit info maintained by varsup.c.
1809 : * Likewise for datminmxid.
1810 : *
1811 : * The passed frozenXID and minMulti are the updated values for my own
1812 : * pg_database entry. They're used to initialize the "min" calculations.
1813 : * The caller also passes the "last sane" XID and MXID, since it has
1814 : * those at hand already.
7896 tgl 1815 ECB : *
1816 : * This routine is only invoked when we've managed to change our
3184 1817 : * DB's datfrozenxid/datminmxid values, or we found that the shared
1818 : * XID-wrap-limit info is stale.
1819 : */
7896 1820 : static void
3184 tgl 1821 GIC 318 : vac_truncate_clog(TransactionId frozenXID,
3184 tgl 1822 EUB : MultiXactId minMulti,
1823 : TransactionId lastSaneFrozenXid,
1824 : MultiXactId lastSaneMinMulti)
1825 : {
783 tmunro 1826 GIC 318 : TransactionId nextXID = ReadNextTransactionId();
7896 tgl 1827 ECB : Relation relation;
1490 andres 1828 : TableScanDesc scan;
1829 : HeapTuple tuple;
1830 : Oid oldestxid_datoid;
3492 alvherre 1831 : Oid minmulti_datoid;
3184 tgl 1832 GIC 318 : bool bogus = false;
5999 1833 318 : bool frozenAlreadyWrapped = false;
7677 tgl 1834 ECB :
1835 : /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
967 noah 1836 GBC 318 : LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
967 noah 1837 EUB :
1838 : /* init oldest datoids to sync with my frozenXID/minMulti values */
3728 alvherre 1839 GIC 318 : oldestxid_datoid = MyDatabaseId;
3492 1840 318 : minmulti_datoid = MyDatabaseId;
7896 tgl 1841 ECB :
6622 1842 : /*
1843 : * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1844 : *
1845 : * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1846 : * the values could change while we look at them. Fetch each one just
2511 1847 : * once to ensure sane behavior of the comparison logic. (Here, as in
1848 : * many other places, we assume that fetching or updating an XID in shared
1849 : * storage is atomic.)
1850 : *
5999 1851 : * Note: we need not worry about a race condition with new entries being
5999 tgl 1852 EUB : * inserted by CREATE DATABASE. Any such entry will have a copy of some
1853 : * existing DB's datfrozenxid, and that source DB cannot be ours because
5999 tgl 1854 ECB : * of the interlock against copying a DB containing an active backend.
5624 bruce 1855 : * Hence the new entry will not reduce the minimum. Also, if two VACUUMs
1856 : * concurrently modify the datfrozenxid's of different databases, the
1857 : * worst possible outcome is that pg_xact is not truncated as aggressively
1858 : * as it could be.
1859 : */
1539 andres 1860 GIC 318 : relation = table_open(DatabaseRelationId, AccessShareLock);
1861 :
1490 1862 318 : scan = table_beginscan_catalog(relation, 0, NULL);
1863 :
7629 tgl 1864 672 : while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
7896 tgl 1865 ECB : {
2511 tgl 1866 GIC 354 : volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1867 354 : TransactionId datfrozenxid = dbform->datfrozenxid;
1868 354 : TransactionId datminmxid = dbform->datminmxid;
1869 :
2511 tgl 1870 CBC 354 : Assert(TransactionIdIsNormal(datfrozenxid));
2511 tgl 1871 GIC 354 : Assert(MultiXactIdIsValid(datminmxid));
6117 alvherre 1872 ECB :
3184 tgl 1873 : /*
1874 : * If things are working properly, no database should have a
1875 : * datfrozenxid or datminmxid that is "in the future". However, such
1876 : * cases have been known to arise due to bugs in pg_upgrade. If we
3184 tgl 1877 EUB : * see any entries that are "in the future", chicken out and don't do
1878 : * anything. This ensures we won't truncate clog before those
3184 tgl 1879 ECB : * databases have been scanned and cleaned up. (We will issue the
1880 : * "already wrapped" warning if appropriate, though.)
1881 : */
2511 tgl 1882 GIC 708 : if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1883 354 : MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
3184 tgl 1884 UIC 0 : bogus = true;
1885 :
2511 tgl 1886 CBC 354 : if (TransactionIdPrecedes(nextXID, datfrozenxid))
5999 tgl 1887 LBC 0 : frozenAlreadyWrapped = true;
2511 tgl 1888 GBC 354 : else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1889 : {
2511 tgl 1890 CBC 15 : frozenXID = datfrozenxid;
1601 andres 1891 15 : oldestxid_datoid = dbform->oid;
1892 : }
1893 :
2511 tgl 1894 354 : if (MultiXactIdPrecedes(datminmxid, minMulti))
1895 : {
2511 tgl 1896 UIC 0 : minMulti = datminmxid;
1601 andres 1897 LBC 0 : minmulti_datoid = dbform->oid;
7677 tgl 1898 EUB : }
7896 1899 : }
1900 :
1490 andres 1901 GBC 318 : table_endscan(scan);
7896 tgl 1902 EUB :
1539 andres 1903 GIC 318 : table_close(relation, AccessShareLock);
1904 :
7677 tgl 1905 ECB : /*
1906 : * Do not truncate CLOG if we seem to have suffered wraparound already;
5999 1907 : * the computed minimum XID might be bogus. This case should now be
1908 : * impossible due to the defenses in GetNewTransactionId, but we keep the
1909 : * test anyway.
7677 1910 : */
5999 tgl 1911 CBC 318 : if (frozenAlreadyWrapped)
1912 : {
7203 tgl 1913 UIC 0 : ereport(WARNING,
1914 : (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1915 : errdetail("You might have already suffered transaction-wraparound data loss.")));
7677 1916 0 : return;
1917 : }
7677 tgl 1918 ECB :
3184 1919 : /* chicken out if data is bogus in any other way */
3184 tgl 1920 GIC 318 : if (bogus)
3184 tgl 1921 UIC 0 : return;
1922 :
1923 : /*
1924 : * Advance the oldest value for commit timestamps before truncating, so
1925 : * that if a user requests a timestamp for a transaction we're truncating
1926 : * away right after this point, they get NULL instead of an ugly "file not
1927 : * found" error from slru.c. This doesn't matter for xact/multixact
1928 : * because they are not subject to arbitrary lookups from users.
1929 : */
2271 alvherre 1930 GIC 318 : AdvanceOldestCommitTsXid(frozenXID);
1931 :
1932 : /*
1933 : * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1934 : */
2208 rhaas 1935 318 : TruncateCLOG(frozenXID, oldestxid_datoid);
2721 alvherre 1936 318 : TruncateCommitTs(frozenXID);
2752 andres 1937 318 : TruncateMultiXact(minMulti, minmulti_datoid);
1938 :
1939 : /*
1940 : * Update the wrap limit for GetNewTransactionId and creation of new
1941 : * MultiXactIds. Note: these functions will also signal the postmaster
3260 bruce 1942 ECB : * for an(other) autovac cycle if needed. XXX should we avoid possibly
1943 : * signaling twice?
1944 : */
3728 alvherre 1945 GIC 318 : SetTransactionIdLimit(frozenXID, oldestxid_datoid);
2217 tgl 1946 318 : SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
967 noah 1947 ECB :
967 noah 1948 GIC 318 : LWLockRelease(WrapLimitsVacuumLock);
1949 : }
1950 :
1951 :
1952 : /*
7941 tgl 1953 ECB : * vacuum_rel() -- vacuum one heap relation
9770 scrappy 1954 : *
1955 : * relid identifies the relation to vacuum. If relation is supplied,
1956 : * use the name therein for reporting any failure to open/lock the rel;
2018 tgl 1957 : * do not use it once we've successfully opened the rel, since it might
1958 : * be stale.
1959 : *
2012 1960 : * Returns true if it's okay to proceed with a requested ANALYZE
117 jdavis 1961 : * operation on this table.
1962 : *
1963 : * Doing one heap at a time incurs extra overhead, since we need to
1964 : * check that the heap exists again just before we vacuum it. The
1965 : * reason that we do this is so that vacuuming can be spread across
1966 : * many small transactions. Otherwise, two-phase locking would require
1967 : * us to lock the entire database during one pass of the vacuum cleaner.
1968 : *
1969 : * At entry and exit, we are not inside a transaction.
1970 : */
1971 : static bool
6 drowley 1972 GNC 37195 : vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
1973 : bool skip_privs, BufferAccessStrategy bstrategy)
1974 : {
1975 : LOCKMODE lmode;
1976 : Relation rel;
1977 : LockRelId lockrelid;
1978 : Oid toast_relid;
1979 : Oid save_userid;
1980 : int save_sec_context;
1981 : int save_nestlevel;
8986 bruce 1982 ECB :
2944 alvherre 1983 GIC 37195 : Assert(params != NULL);
2944 alvherre 1984 ECB :
1985 : /* Begin a transaction for vacuuming this relation */
7270 tgl 1986 CBC 37195 : StartTransactionCommand();
1987 :
1483 rhaas 1988 37195 : if (!(params->options & VACOPT_FULL))
6097 alvherre 1989 ECB : {
1990 : /*
1991 : * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
5050 bruce 1992 : * other concurrent VACUUMs know that they can ignore this one while
5624 1993 : * determining their OldestXmin. (The reason we don't set it during a
1994 : * full VACUUM is exactly that we may have to run user-defined
1995 : * functions for functional indexes, and we want to make sure that if
1996 : * they use the snapshot set above, any tuples it requires can't get
1997 : * removed from other tables. An index function that depends on the
1998 : * contents of other tables is arguably broken, but we won't break it
1999 : * here by violating transaction semantics.)
2000 : *
2001 : * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
2002 : * autovacuum; it's used to avoid canceling a vacuum that was invoked
2003 : * in an emergency.
5504 alvherre 2004 : *
5352 2005 : * Note: these flags remain set until CommitTransaction or
5352 alvherre 2006 EUB : * AbortTransaction. We don't want to clear them until we reset
2007 : * MyProc->xid/xmin, otherwise GetOldestNonRemovableTransactionId()
864 alvherre 2008 ECB : * might appear to go backwards, which is probably Not Good. (We also
864 alvherre 2009 EUB : * set PROC_IN_VACUUM *before* taking our own snapshot, so that our
864 alvherre 2010 ECB : * xmin doesn't become visible ahead of setting the flag.)
2011 : */
864 alvherre 2012 CBC 37014 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
874 2013 37014 : MyProc->statusFlags |= PROC_IN_VACUUM;
2944 alvherre 2014 GIC 37014 : if (params->is_wraparound)
874 alvherre 2015 UIC 0 : MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND;
874 alvherre 2016 CBC 37014 : ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
5646 alvherre 2017 GIC 37014 : LWLockRelease(ProcArrayLock);
6097 alvherre 2018 EUB : }
9345 bruce 2019 :
2020 : /*
2021 : * Need to acquire a snapshot to prevent pg_subtrans from being truncated,
2022 : * cutoff xids in local memory wrapping around, and to have updated xmin
864 alvherre 2023 ECB : * horizons.
2024 : */
864 alvherre 2025 CBC 37195 : PushActiveSnapshot(GetTransactionSnapshot());
2026 :
2027 : /*
2028 : * Check for user-requested abort. Note we want this to be inside a
2029 : * transaction, so xact.c doesn't issue useless WARNING.
2030 : */
8120 tgl 2031 GIC 37195 : CHECK_FOR_INTERRUPTS();
2032 :
8604 tgl 2033 ECB : /*
2034 : * Determine the type of lock we want --- hard exclusive lock for a FULL
6347 bruce 2035 EUB : * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
2036 : * way, we can be sure that no other backend is vacuuming the same table.
2037 : */
1483 rhaas 2038 GBC 74390 : lmode = (params->options & VACOPT_FULL) ?
1483 rhaas 2039 GIC 37195 : AccessExclusiveLock : ShareUpdateExclusiveLock;
2040 :
2041 : /* open the relation and get the appropriate lock on it */
734 pg 2042 CBC 37195 : rel = vacuum_open_relation(relid, relation, params->options,
734 pg 2043 GBC 37195 : params->log_min_duration >= 0, lmode);
2044 :
2045 : /* leave if relation could not be opened or locked */
734 pg 2046 GIC 37195 : if (!rel)
2047 : {
5323 alvherre 2048 12 : PopActiveSnapshot();
6078 tgl 2049 12 : CommitTransactionCommand();
4444 rhaas 2050 12 : return false;
2051 : }
6078 tgl 2052 ECB :
2053 : /*
2054 : * Check if relation needs to be skipped based on privileges. This check
2055 : * happens also when building the relation list to vacuum for a manual
2056 : * operation, and needs to be done additionally here as VACUUM could
2057 : * happen across multiple transactions where privileges could have changed
2058 : * in-between. Make sure to only generate logs for VACUUM in this case.
2059 : */
86 jdavis 2060 GNC 37183 : if (!skip_privs &&
2061 23606 : !vacuum_is_permitted_for_relation(RelationGetRelid(rel),
2062 : rel->rd_rel,
117 2063 23606 : params->options & VACOPT_VACUUM))
2064 : {
734 pg 2065 GIC 18 : relation_close(rel, lmode);
5323 alvherre 2066 18 : PopActiveSnapshot();
7270 tgl 2067 CBC 18 : CommitTransactionCommand();
117 jdavis 2068 18 : return false;
2069 : }
7677 tgl 2070 ECB :
2071 : /*
2072 : * Check that it's of a vacuumable relkind.
2073 : */
734 pg 2074 GIC 37165 : if (rel->rd_rel->relkind != RELKIND_RELATION &&
2075 13657 : rel->rd_rel->relkind != RELKIND_MATVIEW &&
2076 13653 : rel->rd_rel->relkind != RELKIND_TOASTVALUE &&
2077 76 : rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
2078 : {
7203 tgl 2079 1 : ereport(WARNING,
2080 : (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
2081 : RelationGetRelationName(rel))));
734 pg 2082 1 : relation_close(rel, lmode);
5323 alvherre 2083 1 : PopActiveSnapshot();
7270 tgl 2084 1 : CommitTransactionCommand();
4444 rhaas 2085 1 : return false;
2086 : }
2087 :
2088 : /*
2089 : * Silently ignore tables that are temp tables of other backends ---
2090 : * trying to vacuum these will lead to great unhappiness, since their
2091 : * contents are probably not up-to-date on disk. (We don't throw a
2092 : * warning here; it would just lead to chatter during a database-wide
2093 : * VACUUM.)
7503 tgl 2094 ECB : */
734 pg 2095 GIC 37164 : if (RELATION_IS_OTHER_TEMP(rel))
2096 : {
734 pg 2097 UIC 0 : relation_close(rel, lmode);
5323 alvherre 2098 0 : PopActiveSnapshot();
7270 tgl 2099 0 : CommitTransactionCommand();
4444 rhaas 2100 0 : return false;
2101 : }
2102 :
2103 : /*
2104 : * Silently ignore partitioned tables as there is no work to be done. The
2012 tgl 2105 ECB : * useful work is on their child partitions, which have been queued up for
2106 : * us separately.
2107 : */
734 pg 2108 CBC 37164 : if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
2109 : {
2110 75 : relation_close(rel, lmode);
2229 rhaas 2111 GIC 75 : PopActiveSnapshot();
2112 75 : CommitTransactionCommand();
2113 : /* It's OK to proceed with ANALYZE on this table */
2114 75 : return true;
2115 : }
2116 :
2117 : /*
2118 : * Get a session-level lock too. This will protect our access to the
2119 : * relation across multiple transactions, so that we can vacuum the
2120 : * relation's TOAST table (if any) secure in the knowledge that no one is
2121 : * deleting the parent relation.
2122 : *
2123 : * NOTE: this cannot block, even if someone else is waiting for access,
2124 : * because the lock manager knows that both lock requests are from the
2125 : * same process.
2126 : */
734 pg 2127 37089 : lockrelid = rel->rd_lockInfo.lockRelId;
2128 37089 : LockRelationIdForSession(&lockrelid, lmode);
2129 :
2130 : /*
2131 : * Set index_cleanup option based on index_cleanup reloption if it wasn't
2132 : * specified in VACUUM command, or when running in an autovacuum worker
2133 : */
660 pg 2134 CBC 37089 : if (params->index_cleanup == VACOPTVALUE_UNSPECIFIED)
1466 rhaas 2135 ECB : {
660 pg 2136 : StdRdOptIndexCleanup vacuum_index_cleanup;
660 pg 2137 EUB :
660 pg 2138 CBC 2378 : if (rel->rd_options == NULL)
2139 2252 : vacuum_index_cleanup = STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO;
2140 : else
660 pg 2141 GIC 126 : vacuum_index_cleanup =
2142 126 : ((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup;
2143 :
2144 2378 : if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO)
2145 2366 : params->index_cleanup = VACOPTVALUE_AUTO;
2146 12 : else if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_ON)
660 pg 2147 CBC 6 : params->index_cleanup = VACOPTVALUE_ENABLED;
2148 : else
2149 : {
660 pg 2150 GIC 6 : Assert(vacuum_index_cleanup ==
2151 : STDRD_OPTION_VACUUM_INDEX_CLEANUP_OFF);
2152 6 : params->index_cleanup = VACOPTVALUE_DISABLED;
660 pg 2153 ECB : }
2154 : }
2155 :
2156 : /*
2157 : * Set truncate option based on truncate reloption if it wasn't specified
2158 : * in VACUUM command, or when running in an autovacuum worker
2159 : */
660 pg 2160 CBC 37089 : if (params->truncate == VACOPTVALUE_UNSPECIFIED)
1432 fujii 2161 ECB : {
734 pg 2162 GIC 2390 : if (rel->rd_options == NULL ||
2163 126 : ((StdRdOptions *) rel->rd_options)->vacuum_truncate)
660 pg 2164 CBC 2387 : params->truncate = VACOPTVALUE_ENABLED;
1432 fujii 2165 ECB : else
660 pg 2166 GIC 3 : params->truncate = VACOPTVALUE_DISABLED;
2167 : }
1432 fujii 2168 ECB :
2169 : /*
5352 alvherre 2170 : * Remember the relation's TOAST relation for later, if the caller asked
4808 tgl 2171 : * us to process it. In VACUUM FULL, though, the toast table is
2172 : * automatically rebuilt by cluster_rel so we shouldn't recurse to it,
2173 : * unless PROCESS_MAIN is disabled.
2174 : */
789 michael 2175 GIC 37089 : if ((params->options & VACOPT_PROCESS_TOAST) != 0 &&
34 michael 2176 GNC 36940 : ((params->options & VACOPT_FULL) == 0 ||
2177 167 : (params->options & VACOPT_PROCESS_MAIN) == 0))
734 pg 2178 GIC 36776 : toast_relid = rel->rd_rel->reltoastrelid;
2179 : else
5352 alvherre 2180 313 : toast_relid = InvalidOid;
2181 :
2182 : /*
2183 : * Switch to the table owner's userid, so that any index functions are run
4869 tgl 2184 ECB : * as that user. Also lock down security-restricted operations and
4790 bruce 2185 : * arrange to make GUC variable changes local to this command. (This is
2186 : * unnecessary, but harmless, for lazy VACUUM.)
5575 tgl 2187 : */
4869 tgl 2188 GIC 37089 : GetUserIdAndSecContext(&save_userid, &save_sec_context);
734 pg 2189 CBC 37089 : SetUserIdAndSecContext(rel->rd_rel->relowner,
4869 tgl 2190 ECB : save_sec_context | SECURITY_RESTRICTED_OPERATION);
4869 tgl 2191 CBC 37089 : save_nestlevel = NewGUCNestLevel();
5575 tgl 2192 ECB :
2193 : /*
2194 : * If PROCESS_MAIN is set (the default), it's time to vacuum the main
2195 : * relation. Otherwise, we can skip this part. If processing the TOAST
2196 : * table is required (e.g., PROCESS_TOAST is set), we force PROCESS_MAIN
2197 : * to be set when we recurse to the TOAST table.
2198 : */
32 michael 2199 GNC 37089 : if (params->options & VACOPT_PROCESS_MAIN)
2200 : {
2201 : /*
2202 : * Do the actual work --- either FULL or "lazy" vacuum
2203 : */
2204 37012 : if (params->options & VACOPT_FULL)
2205 : {
2206 164 : ClusterParams cluster_params = {0};
1720 michael 2207 ECB :
2208 : /* close relation before vacuuming, but hold lock until commit */
32 michael 2209 GNC 164 : relation_close(rel, NoLock);
2210 164 : rel = NULL;
4841 itagaki.takahiro 2211 ECB :
32 michael 2212 GNC 164 : if ((params->options & VACOPT_VERBOSE) != 0)
32 michael 2213 UNC 0 : cluster_params.options |= CLUOPT_VERBOSE;
1720 michael 2214 ECB :
2215 : /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
32 michael 2216 GNC 164 : cluster_rel(relid, InvalidOid, &cluster_params);
2217 : }
2218 : else
6 drowley 2219 36848 : table_relation_vacuum(rel, params, bstrategy);
4841 itagaki.takahiro 2220 ECB : }
2221 :
2222 : /* Roll back any GUC changes executed by index functions */
4869 tgl 2223 GIC 37086 : AtEOXact_GUC(false, save_nestlevel);
2224 :
2225 : /* Restore userid and security context */
2226 37086 : SetUserIdAndSecContext(save_userid, save_sec_context);
2227 :
7941 tgl 2228 ECB : /* all done with this class, but hold lock until commit */
734 pg 2229 GIC 37086 : if (rel)
734 pg 2230 GBC 36925 : relation_close(rel, NoLock);
7941 tgl 2231 EUB :
6758 bruce 2232 : /*
2233 : * Complete the transaction and free all temporary memory used.
2234 : */
5323 alvherre 2235 GIC 37086 : PopActiveSnapshot();
7270 tgl 2236 37086 : CommitTransactionCommand();
2237 :
2238 : /*
2239 : * If the relation has a secondary toast rel, vacuum that too while we
2240 : * still hold the session lock on the main table. Note however that
3260 bruce 2241 ECB : * "analyze" will not get done on the toast table. This is good, because
2242 : * the toaster always uses hardcoded index access and statistics are
6385 2243 : * totally unimportant for toast relations.
7941 tgl 2244 : */
7941 tgl 2245 CBC 37086 : if (toast_relid != InvalidOid)
2246 : {
2247 : VacuumParams toast_vacuum_params;
2248 :
2249 : /* force VACOPT_PROCESS_MAIN so vacuum_rel() processes it */
34 michael 2250 GNC 13577 : memcpy(&toast_vacuum_params, params, sizeof(VacuumParams));
2251 13577 : toast_vacuum_params.options |= VACOPT_PROCESS_MAIN;
2252 :
6 drowley 2253 13577 : vacuum_rel(toast_relid, NULL, &toast_vacuum_params, true, bstrategy);
2254 : }
7941 tgl 2255 ECB :
2256 : /*
2257 : * Now release the session-level lock on the main table.
2258 : */
734 pg 2259 GIC 37086 : UnlockRelationIdForSession(&lockrelid, lmode);
2260 :
2261 : /* Report that we really did it. */
4444 rhaas 2262 37086 : return true;
2263 : }
2264 :
2265 :
2266 : /*
2267 : * Open all the vacuumable indexes of the given relation, obtaining the
3260 bruce 2268 ECB : * specified kind of lock on each. Return an array of Relation pointers for
3784 tgl 2269 : * the indexes into *Irel, and the number of indexes into *nindexes.
2270 : *
2271 : * We consider an index vacuumable if it is marked insertable (indisready).
2272 : * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
2273 : * execution, and what we have is too corrupt to be processable. We will
2274 : * vacuum even if the index isn't indisvalid; this is important because in a
2275 : * unique index, uniqueness checks will be performed anyway and had better not
2276 : * hit dangling index pointers.
2277 : */
2278 : void
4808 tgl 2279 CBC 60287 : vac_open_indexes(Relation relation, LOCKMODE lockmode,
4808 tgl 2280 ECB : int *nindexes, Relation **Irel)
2281 : {
2282 : List *indexoidlist;
2283 : ListCell *indexoidscan;
2284 : int i;
9345 bruce 2285 :
4808 tgl 2286 CBC 60287 : Assert(lockmode != NoLock);
9345 bruce 2287 ECB :
4808 tgl 2288 CBC 60287 : indexoidlist = RelationGetIndexList(relation);
2289 :
2290 : /* allocate enough memory for all indexes */
3784 2291 60287 : i = list_length(indexoidlist);
2292 :
2293 60287 : if (i > 0)
3784 tgl 2294 GIC 56049 : *Irel = (Relation *) palloc(i * sizeof(Relation));
2295 : else
4808 2296 4238 : *Irel = NULL;
2297 :
2298 : /* collect just the ready indexes */
2299 60287 : i = 0;
2300 156028 : foreach(indexoidscan, indexoidlist)
8778 vadim4o 2301 ECB : {
4808 tgl 2302 GIC 95741 : Oid indexoid = lfirst_oid(indexoidscan);
3784 tgl 2303 ECB : Relation indrel;
4808 2304 :
3784 tgl 2305 CBC 95741 : indrel = index_open(indexoid, lockmode);
1564 peter_e 2306 GIC 95741 : if (indrel->rd_index->indisready)
3784 tgl 2307 CBC 95741 : (*Irel)[i++] = indrel;
2308 : else
3784 tgl 2309 UIC 0 : index_close(indrel, lockmode);
2310 : }
2311 :
3784 tgl 2312 GIC 60287 : *nindexes = i;
2313 :
4808 2314 60287 : list_free(indexoidlist);
8350 bruce 2315 60287 : }
9629 vadim4o 2316 ECB :
9770 scrappy 2317 : /*
3260 bruce 2318 : * Release the resources acquired by vac_open_indexes. Optionally release
4808 tgl 2319 : * the locks (say NoLock to keep 'em).
2320 : */
2321 : void
4808 tgl 2322 GIC 60621 : vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
2323 : {
2324 60621 : if (Irel == NULL)
2325 4576 : return;
2326 :
2327 151779 : while (nindexes--)
2328 : {
4808 tgl 2329 CBC 95734 : Relation ind = Irel[nindexes];
6765 tgl 2330 ECB :
6096 tgl 2331 GIC 95734 : index_close(ind, lockmode);
6765 tgl 2332 ECB : }
9345 bruce 2333 GIC 56045 : pfree(Irel);
2334 : }
2335 :
2336 : /*
2337 : * vacuum_delay_point --- check for interrupts and cost-based delay.
2338 : *
2339 : * This should be called in each major loop of VACUUM processing,
6998 tgl 2340 ECB : * typically once per page processed.
2341 : */
2342 : void
6998 tgl 2343 GIC 120849208 : vacuum_delay_point(void)
2344 : {
1175 akapila 2345 CBC 120849208 : double msec = 0;
2346 :
6998 tgl 2347 ECB : /* Always check for interrupts */
6998 tgl 2348 GIC 120849208 : CHECK_FOR_INTERRUPTS();
2349 :
2 dgustafsson 2350 GNC 120849208 : if (InterruptPending ||
2351 120849206 : (!VacuumCostActive && !ConfigReloadPending))
2352 118220412 : return;
2353 :
2354 : /*
2355 : * Autovacuum workers should reload the configuration file if requested.
2356 : * This allows changes to [autovacuum_]vacuum_cost_limit and
2357 : * [autovacuum_]vacuum_cost_delay to take effect while a table is being
2358 : * vacuumed or analyzed.
2359 : */
2360 2628796 : if (ConfigReloadPending && IsAutoVacuumWorkerProcess())
2361 : {
2 dgustafsson 2362 UNC 0 : ConfigReloadPending = false;
2363 0 : ProcessConfigFile(PGC_SIGHUP);
2364 0 : VacuumUpdateCosts();
2365 : }
2366 :
2367 : /*
2368 : * If we disabled cost-based delays after reloading the config file,
2369 : * return.
2370 : */
2 dgustafsson 2371 GNC 2628796 : if (!VacuumCostActive)
1175 akapila 2372 LBC 0 : return;
2373 :
1175 akapila 2374 ECB : /*
1175 akapila 2375 EUB : * For parallel vacuum, the delay is computed based on the shared cost
2376 : * balance. See compute_parallel_delay.
2377 : */
1175 akapila 2378 CBC 2628796 : if (VacuumSharedCostBalance != NULL)
1175 akapila 2379 UIC 0 : msec = compute_parallel_delay();
2 dgustafsson 2380 GNC 2628796 : else if (VacuumCostBalance >= vacuum_cost_limit)
2381 259 : msec = vacuum_cost_delay * VacuumCostBalance / vacuum_cost_limit;
2382 :
2383 : /* Nap if appropriate */
1175 akapila 2384 GIC 2628796 : if (msec > 0)
1175 akapila 2385 ECB : {
2 dgustafsson 2386 GNC 259 : if (msec > vacuum_cost_delay * 4)
2387 1 : msec = vacuum_cost_delay * 4;
6998 tgl 2388 ECB :
25 tmunro 2389 GIC 259 : pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY);
2390 259 : pg_usleep(msec * 1000);
25 tmunro 2391 CBC 259 : pgstat_report_wait_end();
25 tmunro 2392 ECB :
2393 : /*
2394 : * We don't want to ignore postmaster death during very long vacuums
2395 : * with vacuum_cost_delay configured. We can't use the usual
2396 : * WaitLatch() approach here because we want microsecond-based sleep
2397 : * durations above.
2398 : */
25 tmunro 2399 GIC 259 : if (IsUnderPostmaster && !PostmasterIsAlive())
25 tmunro 2400 UIC 0 : exit(1);
2401 :
6998 tgl 2402 GIC 259 : VacuumCostBalance = 0;
2403 :
2404 : /*
2405 : * Balance and update limit values for autovacuum workers. We must do
2406 : * this periodically, as the number of workers across which we are
2407 : * balancing the limit may have changed.
2408 : *
2409 : * TODO: There may be better criteria for determining when to do this
2410 : * besides "check after napping".
2411 : */
2 dgustafsson 2412 GNC 259 : AutoVacuumUpdateCostLimit();
2413 :
6998 tgl 2414 ECB : /* Might have gotten an interrupt while sleeping */
6998 tgl 2415 GIC 259 : CHECK_FOR_INTERRUPTS();
2416 : }
2417 : }
2418 :
1175 akapila 2419 ECB : /*
2420 : * Computes the vacuum delay for parallel workers.
2421 : *
1090 2422 : * The basic idea of a cost-based delay for parallel vacuum is to allow each
2423 : * worker to sleep in proportion to the share of work it's done. We achieve this
2424 : * by allowing all parallel vacuum workers including the leader process to
2425 : * have a shared view of cost related parameters (mainly VacuumCostBalance).
2426 : * We allow each worker to update it as and when it has incurred any cost and
2427 : * then based on that decide whether it needs to sleep. We compute the time
1175 2428 : * to sleep for a worker based on the cost it has incurred
2429 : * (VacuumCostBalanceLocal) and then reduce the VacuumSharedCostBalance by
2430 : * that amount. This avoids putting to sleep those workers which have done less
1090 2431 : * I/O than other workers and therefore ensure that workers
2432 : * which are doing more I/O got throttled more.
2433 : *
2434 : * We allow a worker to sleep only if it has performed I/O above a certain
2435 : * threshold, which is calculated based on the number of active workers
2436 : * (VacuumActiveNWorkers), and the overall cost balance is more than
2437 : * VacuumCostLimit set by the system. Testing reveals that we achieve
2438 : * the required throttling if we force a worker that has done more than 50%
2439 : * of its share of work to sleep.
2440 : */
2441 : static double
1175 akapila 2442 UIC 0 : compute_parallel_delay(void)
2443 : {
2444 0 : double msec = 0;
2445 : uint32 shared_balance;
2446 : int nworkers;
2447 :
1175 akapila 2448 ECB : /* Parallel vacuum must be active */
1175 akapila 2449 UIC 0 : Assert(VacuumSharedCostBalance);
2450 :
2451 0 : nworkers = pg_atomic_read_u32(VacuumActiveNWorkers);
2452 :
2453 : /* At least count itself */
2454 0 : Assert(nworkers >= 1);
1175 akapila 2455 ECB :
2456 : /* Update the shared cost balance value atomically */
1175 akapila 2457 LBC 0 : shared_balance = pg_atomic_add_fetch_u32(VacuumSharedCostBalance, VacuumCostBalance);
2458 :
2459 : /* Compute the total local balance for the current worker */
2460 0 : VacuumCostBalanceLocal += VacuumCostBalance;
2461 :
2 dgustafsson 2462 UNC 0 : if ((shared_balance >= vacuum_cost_limit) &&
2463 0 : (VacuumCostBalanceLocal > 0.5 * ((double) vacuum_cost_limit / nworkers)))
2464 : {
1175 akapila 2465 ECB : /* Compute sleep time based on the local cost balance */
2 dgustafsson 2466 UNC 0 : msec = vacuum_cost_delay * VacuumCostBalanceLocal / vacuum_cost_limit;
1175 akapila 2467 UIC 0 : pg_atomic_sub_fetch_u32(VacuumSharedCostBalance, VacuumCostBalanceLocal);
1175 akapila 2468 LBC 0 : VacuumCostBalanceLocal = 0;
1175 akapila 2469 ECB : }
2470 :
2471 : /*
2472 : * Reset the local balance as we accumulated it into the shared value.
2473 : */
1175 akapila 2474 LBC 0 : VacuumCostBalance = 0;
1175 akapila 2475 ECB :
1175 akapila 2476 LBC 0 : return msec;
2477 : }
1175 akapila 2478 EUB :
2479 : /*
2480 : * A wrapper function of defGetBoolean().
1466 rhaas 2481 ECB : *
2482 : * This function returns VACOPTVALUE_ENABLED and VACOPTVALUE_DISABLED instead
660 pg 2483 : * of true and false.
1466 rhaas 2484 : */
2485 : static VacOptValue
660 pg 2486 GIC 157 : get_vacoptval_from_boolean(DefElem *def)
2487 : {
2488 157 : return defGetBoolean(def) ? VACOPTVALUE_ENABLED : VACOPTVALUE_DISABLED;
2489 : }
2490 :
473 akapila 2491 ECB : /*
2492 : * vac_bulkdel_one_index() -- bulk-deletion for index relation.
2493 : *
2494 : * Returns bulk delete stats derived from input stats
2495 : */
2496 : IndexBulkDeleteResult *
473 akapila 2497 GIC 3803 : vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat,
473 akapila 2498 ECB : VacDeadItems *dead_items)
2499 : {
2500 : /* Do bulk deletion */
473 akapila 2501 GIC 3803 : istat = index_bulk_delete(ivinfo, istat, vac_tid_reaped,
473 akapila 2502 ECB : (void *) dead_items);
2503 :
473 akapila 2504 GIC 3803 : ereport(ivinfo->message_level,
2505 : (errmsg("scanned index \"%s\" to remove %d row versions",
2506 : RelationGetRelationName(ivinfo->index),
2507 : dead_items->num_items)));
2508 :
2509 3803 : return istat;
2510 : }
2511 :
473 akapila 2512 ECB : /*
2513 : * vac_cleanup_one_index() -- do post-vacuum cleanup for index relation.
2514 : *
2515 : * Returns bulk delete stats derived from input stats
2516 : */
2517 : IndexBulkDeleteResult *
473 akapila 2518 GIC 54802 : vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
473 akapila 2519 ECB : {
473 akapila 2520 CBC 54802 : istat = index_vacuum_cleanup(ivinfo, istat);
473 akapila 2521 ECB :
473 akapila 2522 GIC 54802 : if (istat)
2523 3919 : ereport(ivinfo->message_level,
2524 : (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
2525 : RelationGetRelationName(ivinfo->index),
2526 : istat->num_index_tuples,
2527 : istat->num_pages),
2528 : errdetail("%.0f index row versions were removed.\n"
473 akapila 2529 ECB : "%u index pages were newly deleted.\n"
2530 : "%u index pages are currently deleted, of which %u are currently reusable.",
473 akapila 2531 EUB : istat->tuples_removed,
2532 : istat->pages_newly_deleted,
450 pg 2533 : istat->pages_deleted, istat->pages_free)));
2534 :
473 akapila 2535 GIC 54802 : return istat;
2536 : }
2537 :
2538 : /*
2539 : * Returns the total required space for VACUUM's dead_items array given a
473 akapila 2540 ECB : * max_items value.
473 akapila 2541 EUB : */
2542 : Size
473 akapila 2543 GIC 36848 : vac_max_items_to_alloc_size(int max_items)
2544 : {
2545 36848 : Assert(max_items <= MAXDEADITEMS(MaxAllocSize));
2546 :
473 akapila 2547 CBC 36848 : return offsetof(VacDeadItems, items) + sizeof(ItemPointerData) * max_items;
473 akapila 2548 EUB : }
473 akapila 2549 ECB :
2550 : /*
2551 : * vac_tid_reaped() -- is a particular tid deletable?
2552 : *
2553 : * This has the right signature to be an IndexBulkDeleteCallback.
2554 : *
2555 : * Assumes dead_items array is sorted (in ascending TID order).
2556 : */
2557 : static bool
473 akapila 2558 CBC 7920565 : vac_tid_reaped(ItemPointer itemptr, void *state)
473 akapila 2559 ECB : {
473 akapila 2560 CBC 7920565 : VacDeadItems *dead_items = (VacDeadItems *) state;
2561 : int64 litem,
2562 : ritem,
2563 : item;
2564 : ItemPointer res;
2565 :
473 akapila 2566 GIC 7920565 : litem = itemptr_encode(&dead_items->items[0]);
2567 7920565 : ritem = itemptr_encode(&dead_items->items[dead_items->num_items - 1]);
473 akapila 2568 CBC 7920565 : item = itemptr_encode(itemptr);
473 akapila 2569 EUB :
2570 : /*
473 akapila 2571 ECB : * Doing a simple bound check before bsearch() is useful to avoid the
2572 : * extra cost of bsearch(), especially if dead items on the heap are
2573 : * concentrated in a certain range. Since this function is called for
2574 : * every index tuple, it pays to be really fast.
2575 : */
473 akapila 2576 GIC 7920565 : if (item < litem || item > ritem)
2577 3550630 : return false;
2578 :
61 peter 2579 GNC 4369935 : res = (ItemPointer) bsearch(itemptr,
2580 4369935 : dead_items->items,
473 akapila 2581 CBC 4369935 : dead_items->num_items,
2582 : sizeof(ItemPointerData),
2583 : vac_cmp_itemptr);
473 akapila 2584 ECB :
473 akapila 2585 GIC 4369935 : return (res != NULL);
2586 : }
2587 :
2588 : /*
2589 : * Comparator routines for use with qsort() and bsearch().
2590 : */
2591 : static int
2592 42091211 : vac_cmp_itemptr(const void *left, const void *right)
2593 : {
2594 : BlockNumber lblk,
2595 : rblk;
2596 : OffsetNumber loff,
2597 : roff;
2598 :
2599 42091211 : lblk = ItemPointerGetBlockNumber((ItemPointer) left);
2600 42091211 : rblk = ItemPointerGetBlockNumber((ItemPointer) right);
2601 :
2602 42091211 : if (lblk < rblk)
2603 13696398 : return -1;
2604 28394813 : if (lblk > rblk)
2605 14378620 : return 1;
2606 :
2607 14016193 : loff = ItemPointerGetOffsetNumber((ItemPointer) left);
2608 14016193 : roff = ItemPointerGetOffsetNumber((ItemPointer) right);
2609 :
2610 14016193 : if (loff < roff)
473 akapila 2611 GBC 6668005 : return -1;
473 akapila 2612 GIC 7348188 : if (loff > roff)
473 akapila 2613 GBC 5939438 : return 1;
2614 :
473 akapila 2615 GIC 1408750 : return 0;
2616 : }
|