Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * vacuumlazy.c
4 : * Concurrent ("lazy") vacuuming.
5 : *
6 : * The major space usage for vacuuming is storage for the array of dead TIDs
7 : * that are to be removed from indexes. We want to ensure we can vacuum even
8 : * the very largest relations with finite memory space usage. To do that, we
9 : * set upper bounds on the number of TIDs we can keep track of at once.
10 : *
11 : * We are willing to use at most maintenance_work_mem (or perhaps
12 : * autovacuum_work_mem) memory space to keep track of dead TIDs. We initially
13 : * allocate an array of TIDs of that size, with an upper limit that depends on
14 : * table size (this limit ensures we don't allocate a huge area uselessly for
15 : * vacuuming small tables). If the array threatens to overflow, we must call
16 : * lazy_vacuum to vacuum indexes (and to vacuum the pages that we've pruned).
17 : * This frees up the memory space dedicated to storing dead TIDs.
18 : *
19 : * In practice VACUUM will often complete its initial pass over the target
20 : * heap relation without ever running out of space to store TIDs. This means
21 : * that there only needs to be one call to lazy_vacuum, after the initial pass
22 : * completes.
23 : *
24 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
25 : * Portions Copyright (c) 1994, Regents of the University of California
26 : *
27 : *
28 : * IDENTIFICATION
29 : * src/backend/access/heap/vacuumlazy.c
30 : *
31 : *-------------------------------------------------------------------------
32 : */
33 : #include "postgres.h"
34 :
35 : #include <math.h>
36 :
37 : #include "access/amapi.h"
38 : #include "access/genam.h"
39 : #include "access/heapam.h"
40 : #include "access/heapam_xlog.h"
41 : #include "access/htup_details.h"
42 : #include "access/multixact.h"
43 : #include "access/transam.h"
44 : #include "access/visibilitymap.h"
45 : #include "access/xact.h"
46 : #include "access/xlog.h"
47 : #include "access/xloginsert.h"
48 : #include "catalog/index.h"
49 : #include "catalog/storage.h"
50 : #include "commands/dbcommands.h"
51 : #include "commands/progress.h"
52 : #include "commands/vacuum.h"
53 : #include "executor/instrument.h"
54 : #include "miscadmin.h"
55 : #include "optimizer/paths.h"
56 : #include "pgstat.h"
57 : #include "portability/instr_time.h"
58 : #include "postmaster/autovacuum.h"
59 : #include "storage/bufmgr.h"
60 : #include "storage/freespace.h"
61 : #include "storage/lmgr.h"
62 : #include "tcop/tcopprot.h"
63 : #include "utils/lsyscache.h"
64 : #include "utils/memutils.h"
65 : #include "utils/pg_rusage.h"
66 : #include "utils/timestamp.h"
67 :
68 :
69 : /*
70 : * Space/time tradeoff parameters: do these need to be user-tunable?
71 : *
72 : * To consider truncating the relation, we want there to be at least
73 : * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
74 : * is less) potentially-freeable pages.
75 : */
76 : #define REL_TRUNCATE_MINIMUM 1000
77 : #define REL_TRUNCATE_FRACTION 16
78 :
79 : /*
80 : * Timing parameters for truncate locking heuristics.
81 : *
82 : * These were not exposed as user tunable GUC values because it didn't seem
83 : * that the potential for improvement was great enough to merit the cost of
84 : * supporting them.
85 : */
86 : #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
87 : #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
88 : #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
89 :
90 : /*
91 : * Threshold that controls whether we bypass index vacuuming and heap
92 : * vacuuming as an optimization
93 : */
94 : #define BYPASS_THRESHOLD_PAGES 0.02 /* i.e. 2% of rel_pages */
95 :
96 : /*
97 : * Perform a failsafe check each time we scan another 4GB of pages.
98 : * (Note that this is deliberately kept to a power-of-two, usually 2^19.)
99 : */
100 : #define FAILSAFE_EVERY_PAGES \
101 : ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
102 :
103 : /*
104 : * When a table has no indexes, vacuum the FSM after every 8GB, approximately
105 : * (it won't be exact because we only vacuum FSM after processing a heap page
106 : * that has some removable tuples). When there are indexes, this is ignored,
107 : * and we vacuum FSM after each index/heap cleaning pass.
108 : */
109 : #define VACUUM_FSM_EVERY_PAGES \
110 : ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
111 :
112 : /*
113 : * Before we consider skipping a page that's marked as clean in
114 : * visibility map, we must've seen at least this many clean pages.
115 : */
116 : #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
117 :
118 : /*
119 : * Size of the prefetch window for lazy vacuum backwards truncation scan.
120 : * Needs to be a power of 2.
121 : */
122 : #define PREFETCH_SIZE ((BlockNumber) 32)
123 :
124 : /*
125 : * Macro to check if we are in a parallel vacuum. If true, we are in the
126 : * parallel mode and the DSM segment is initialized.
127 : */
128 : #define ParallelVacuumIsActive(vacrel) ((vacrel)->pvs != NULL)
129 :
130 : /* Phases of vacuum during which we report error context. */
131 : typedef enum
132 : {
133 : VACUUM_ERRCB_PHASE_UNKNOWN,
134 : VACUUM_ERRCB_PHASE_SCAN_HEAP,
135 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
136 : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
137 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
138 : VACUUM_ERRCB_PHASE_TRUNCATE
139 : } VacErrPhase;
140 :
141 : typedef struct LVRelState
142 : {
143 : /* Target heap relation and its indexes */
144 : Relation rel;
145 : Relation *indrels;
146 : int nindexes;
147 :
148 : /* Buffer access strategy and parallel vacuum state */
149 : BufferAccessStrategy bstrategy;
150 : ParallelVacuumState *pvs;
151 :
152 : /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
153 : bool aggressive;
154 : /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
155 : bool skipwithvm;
156 : /* Consider index vacuuming bypass optimization? */
157 : bool consider_bypass_optimization;
158 :
159 : /* Doing index vacuuming, index cleanup, rel truncation? */
160 : bool do_index_vacuuming;
161 : bool do_index_cleanup;
162 : bool do_rel_truncate;
163 :
164 : /* VACUUM operation's cutoffs for freezing and pruning */
165 : struct VacuumCutoffs cutoffs;
166 : GlobalVisState *vistest;
167 : /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
168 : TransactionId NewRelfrozenXid;
169 : MultiXactId NewRelminMxid;
170 : bool skippedallvis;
171 :
172 : /* Error reporting state */
173 : char *dbname;
174 : char *relnamespace;
175 : char *relname;
176 : char *indname; /* Current index name */
177 : BlockNumber blkno; /* used only for heap operations */
178 : OffsetNumber offnum; /* used only for heap operations */
179 : VacErrPhase phase;
180 : bool verbose; /* VACUUM VERBOSE? */
181 :
182 : /*
183 : * dead_items stores TIDs whose index tuples are deleted by index
184 : * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
185 : * that has been processed by lazy_scan_prune. Also needed by
186 : * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
187 : * LP_UNUSED during second heap pass.
188 : */
189 : VacDeadItems *dead_items; /* TIDs whose index tuples we'll delete */
190 : BlockNumber rel_pages; /* total number of pages */
191 : BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
192 : BlockNumber removed_pages; /* # pages removed by relation truncation */
193 : BlockNumber frozen_pages; /* # pages with newly frozen tuples */
194 : BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
195 : BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
196 : BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
197 :
198 : /* Statistics output by us, for table */
199 : double new_rel_tuples; /* new estimated total # of tuples */
200 : double new_live_tuples; /* new estimated total # of live tuples */
201 : /* Statistics output by index AMs */
202 : IndexBulkDeleteResult **indstats;
203 :
204 : /* Instrumentation counters */
205 : int num_index_scans;
206 : /* Counters that follow are only for scanned_pages */
207 : int64 tuples_deleted; /* # deleted from table */
208 : int64 tuples_frozen; /* # newly frozen */
209 : int64 lpdead_items; /* # deleted from indexes */
210 : int64 live_tuples; /* # live tuples remaining */
211 : int64 recently_dead_tuples; /* # dead, but not yet removable */
212 : int64 missed_dead_tuples; /* # removable, but not removed */
213 : } LVRelState;
214 :
215 : /*
216 : * State returned by lazy_scan_prune()
217 : */
218 : typedef struct LVPagePruneState
219 : {
220 : bool hastup; /* Page prevents rel truncation? */
221 : bool has_lpdead_items; /* includes existing LP_DEAD items */
222 :
223 : /*
224 : * State describes the proper VM bit states to set for the page following
225 : * pruning and freezing. all_visible implies !has_lpdead_items, but don't
226 : * trust all_frozen result unless all_visible is also set to true.
227 : */
228 : bool all_visible; /* Every item visible to all? */
229 : bool all_frozen; /* provided all_visible is also true */
230 : TransactionId visibility_cutoff_xid; /* For recovery conflicts */
231 : } LVPagePruneState;
232 :
233 : /* Struct for saving and restoring vacuum error information. */
234 : typedef struct LVSavedErrInfo
235 : {
236 : BlockNumber blkno;
237 : OffsetNumber offnum;
238 : VacErrPhase phase;
239 : } LVSavedErrInfo;
240 :
241 :
242 : /* non-export function prototypes */
243 : static void lazy_scan_heap(LVRelState *vacrel);
244 : static BlockNumber lazy_scan_skip(LVRelState *vacrel, Buffer *vmbuffer,
245 : BlockNumber next_block,
246 : bool *next_unskippable_allvis,
247 : bool *skipping_current_range);
248 : static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
249 : BlockNumber blkno, Page page,
250 : bool sharelock, Buffer vmbuffer);
251 : static void lazy_scan_prune(LVRelState *vacrel, Buffer buf,
252 : BlockNumber blkno, Page page,
253 : LVPagePruneState *prunestate);
254 : static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
255 : BlockNumber blkno, Page page,
256 : bool *hastup, bool *recordfreespace);
257 : static void lazy_vacuum(LVRelState *vacrel);
258 : static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
259 : static void lazy_vacuum_heap_rel(LVRelState *vacrel);
260 : static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
261 : Buffer buffer, int index, Buffer vmbuffer);
262 : static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
263 : static void lazy_cleanup_all_indexes(LVRelState *vacrel);
264 : static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel,
265 : IndexBulkDeleteResult *istat,
266 : double reltuples,
267 : LVRelState *vacrel);
268 : static IndexBulkDeleteResult *lazy_cleanup_one_index(Relation indrel,
269 : IndexBulkDeleteResult *istat,
270 : double reltuples,
271 : bool estimated_count,
272 : LVRelState *vacrel);
273 : static bool should_attempt_truncation(LVRelState *vacrel);
274 : static void lazy_truncate_heap(LVRelState *vacrel);
275 : static BlockNumber count_nondeletable_pages(LVRelState *vacrel,
276 : bool *lock_waiter_detected);
277 : static void dead_items_alloc(LVRelState *vacrel, int nworkers);
278 : static void dead_items_cleanup(LVRelState *vacrel);
279 : static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
280 : TransactionId *visibility_cutoff_xid, bool *all_frozen);
281 : static void update_relstats_all_indexes(LVRelState *vacrel);
282 : static void vacuum_error_callback(void *arg);
283 : static void update_vacuum_error_info(LVRelState *vacrel,
284 : LVSavedErrInfo *saved_vacrel,
285 : int phase, BlockNumber blkno,
286 : OffsetNumber offnum);
287 : static void restore_vacuum_error_info(LVRelState *vacrel,
288 : const LVSavedErrInfo *saved_vacrel);
289 :
290 :
291 : /*
292 : * heap_vacuum_rel() -- perform VACUUM for one heap relation
293 : *
294 : * This routine sets things up for and then calls lazy_scan_heap, where
295 : * almost all work actually takes place. Finalizes everything after call
296 : * returns by managing relation truncation and updating rel's pg_class
393 pg 297 ECB : * entry. (Also updates pg_class entries for any indexes that need it.)
298 : *
299 : * At entry, we have already established a transaction and opened
300 : * and locked the relation.
301 : */
302 : void
734 pg 303 GIC 36739 : heap_vacuum_rel(Relation rel, VacuumParams *params,
304 : BufferAccessStrategy bstrategy)
305 : {
306 : LVRelState *vacrel;
307 : bool verbose,
308 : instrument,
372 pg 309 ECB : skipwithvm,
310 : frozenxid_updated,
311 : minmulti_updated;
312 : BlockNumber orig_rel_pages,
313 : new_rel_pages,
314 : new_rel_allvisible;
5835 alvherre 315 : PGRUsage ru0;
5624 bruce 316 CBC 36739 : TimestampTz starttime = 0;
359 pg 317 36739 : PgStat_Counter startreadtime = 0,
332 tgl 318 36739 : startwritetime = 0;
359 pg 319 GIC 36739 : WalUsage startwalusage = pgWalUsage;
359 pg 320 CBC 36739 : int64 StartPageHit = VacuumPageHit,
321 36739 : StartPageMiss = VacuumPageMiss,
322 36739 : StartPageDirty = VacuumPageDirty;
323 : ErrorContextCallback errcallback;
372 pg 324 GBC 36739 : char **indnames = NULL;
5835 alvherre 325 EUB :
450 pg 326 GIC 36739 : verbose = (params->options & VACOPT_VERBOSE) != 0;
327 36814 : instrument = (verbose || (IsAutoVacuumWorkerProcess() &&
328 75 : params->log_min_duration >= 0));
450 pg 329 CBC 36739 : if (instrument)
330 : {
4252 rhaas 331 GIC 79 : pg_rusage_init(&ru0);
4153 alvherre 332 79 : starttime = GetCurrentTimestamp();
754 sfrost 333 79 : if (track_io_timing)
334 : {
754 sfrost 335 UIC 0 : startreadtime = pgStatBlockReadTime;
336 0 : startwritetime = pgStatBlockWriteTime;
337 : }
338 : }
339 :
2587 rhaas 340 GIC 36739 : pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
341 : RelationGetRelid(rel));
342 :
343 : /*
344 : * Setup error traceback support for ereport() first. The idea is to set
345 : * up an error context callback to display additional information on any
346 : * error during a vacuum. During different phases of vacuum, we update
347 : * the state so that the error context callback always display current
348 : * information.
349 : *
350 : * Copy the names of heap rel into local memory for error reporting
351 : * purposes, too. It isn't always safe to assume that we can get the name
450 pg 352 ECB : * of each rel. It's convenient for code in lazy_scan_heap to always use
353 : * these temp copies.
354 : */
734 pg 355 CBC 36739 : vacrel = (LVRelState *) palloc0(sizeof(LVRelState));
96 pg 356 GNC 36739 : vacrel->dbname = get_database_name(MyDatabaseId);
450 pg 357 CBC 36739 : vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
358 36739 : vacrel->relname = pstrdup(RelationGetRelationName(rel));
359 36739 : vacrel->indname = NULL;
360 36739 : vacrel->phase = VACUUM_ERRCB_PHASE_UNKNOWN;
361 36739 : vacrel->verbose = verbose;
450 pg 362 GIC 36739 : errcallback.callback = vacuum_error_callback;
363 36739 : errcallback.arg = vacrel;
450 pg 364 CBC 36739 : errcallback.previous = error_context_stack;
365 36739 : error_context_stack = &errcallback;
734 pg 366 ECB :
450 367 : /* Set up high level stuff about rel and its indexes */
734 pg 368 CBC 36739 : vacrel->rel = rel;
369 36739 : vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
734 pg 370 ECB : &vacrel->indrels);
108 pg 371 GNC 36739 : vacrel->bstrategy = bstrategy;
450 pg 372 GIC 36739 : if (instrument && vacrel->nindexes > 0)
373 : {
374 : /* Copy index names used by instrumentation (not error reporting) */
450 pg 375 CBC 71 : indnames = palloc(sizeof(char *) * vacrel->nindexes);
376 213 : for (int i = 0; i < vacrel->nindexes; i++)
377 142 : indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
450 pg 378 ECB : }
379 :
380 : /*
660 381 : * The index_cleanup param either disables index vacuuming and cleanup or
382 : * forces it to go ahead when we would otherwise apply the index bypass
383 : * optimization. The default is 'auto', which leaves the final decision
384 : * up to lazy_vacuum().
385 : *
386 : * The truncate param allows user to avoid attempting relation truncation,
387 : * though it can't force truncation to happen.
388 : */
660 pg 389 GIC 36739 : Assert(params->index_cleanup != VACOPTVALUE_UNSPECIFIED);
390 36739 : Assert(params->truncate != VACOPTVALUE_UNSPECIFIED &&
391 : params->truncate != VACOPTVALUE_AUTO);
2 dgustafsson 392 GNC 36739 : Assert(!VacuumFailsafeActive);
450 pg 393 GIC 36739 : vacrel->consider_bypass_optimization = true;
733 394 36739 : vacrel->do_index_vacuuming = true;
395 36739 : vacrel->do_index_cleanup = true;
660 396 36739 : vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
397 36739 : if (params->index_cleanup == VACOPTVALUE_DISABLED)
398 : {
399 : /* Force disable index vacuuming up-front */
733 400 132 : vacrel->do_index_vacuuming = false;
401 132 : vacrel->do_index_cleanup = false;
733 pg 402 ECB : }
660 pg 403 CBC 36607 : else if (params->index_cleanup == VACOPTVALUE_ENABLED)
660 pg 404 ECB : {
405 : /* Force index vacuuming. Note that failsafe can still bypass. */
660 pg 406 CBC 15 : vacrel->consider_bypass_optimization = false;
660 pg 407 ECB : }
408 : else
409 : {
410 : /* Default/auto, make all decisions dynamically */
660 pg 411 GIC 36592 : Assert(params->index_cleanup == VACOPTVALUE_AUTO);
412 : }
413 :
414 : /* Initialize page counters explicitly (be tidy) */
393 pg 415 CBC 36739 : vacrel->scanned_pages = 0;
393 pg 416 GIC 36739 : vacrel->removed_pages = 0;
213 pg 417 GNC 36739 : vacrel->frozen_pages = 0;
393 pg 418 CBC 36739 : vacrel->lpdead_item_pages = 0;
393 pg 419 GIC 36739 : vacrel->missed_dead_pages = 0;
393 pg 420 CBC 36739 : vacrel->nonempty_pages = 0;
393 pg 421 EUB : /* dead_items_alloc allocates vacrel->dead_items later on */
422 :
423 : /* Allocate/initialize output statistics state */
393 pg 424 GIC 36739 : vacrel->new_rel_tuples = 0;
425 36739 : vacrel->new_live_tuples = 0;
393 pg 426 CBC 36739 : vacrel->indstats = (IndexBulkDeleteResult **)
393 pg 427 GIC 36739 : palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
428 :
429 : /* Initialize remaining counters (be tidy) */
430 36739 : vacrel->num_index_scans = 0;
431 36739 : vacrel->tuples_deleted = 0;
213 pg 432 GNC 36739 : vacrel->tuples_frozen = 0;
393 pg 433 GIC 36739 : vacrel->lpdead_items = 0;
434 36739 : vacrel->live_tuples = 0;
435 36739 : vacrel->recently_dead_tuples = 0;
436 36739 : vacrel->missed_dead_tuples = 0;
437 :
438 : /*
439 : * Get cutoffs that determine which deleted tuples are considered DEAD,
440 : * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
441 : * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
442 : * happen in this order to ensure that the OldestXmin cutoff field works
443 : * as an upper bound on the XIDs stored in the pages we'll actually scan
444 : * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
393 pg 445 ECB : *
446 : * Next acquire vistest, a related cutoff that's used in heap_page_prune.
447 : * We expect vistest will always make heap_page_prune remove any deleted
448 : * tuple whose xmax is < OldestXmin. lazy_scan_prune must never become
449 : * confused about whether a tuple should be frozen or removed. (In the
450 : * future we might want to teach lazy_scan_prune to recompute vistest from
451 : * time to time, to increase the number of dead tuples it can prune away.)
452 : */
108 pg 453 GNC 36739 : vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
393 pg 454 CBC 36739 : vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
393 pg 455 GIC 36739 : vacrel->vistest = GlobalVisTestFor(rel);
456 : /* Initialize state used to track oldest extant XID/MXID */
108 pg 457 GNC 36739 : vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
458 36739 : vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
371 pg 459 GIC 36739 : vacrel->skippedallvis = false;
108 pg 460 GNC 36739 : skipwithvm = true;
461 36739 : if (params->options & VACOPT_DISABLE_PAGE_SKIPPING)
462 : {
463 : /*
464 : * Force aggressive mode, and disable skipping blocks using the
465 : * visibility map (even those set all-frozen)
466 : */
467 146 : vacrel->aggressive = true;
468 146 : skipwithvm = false;
469 : }
470 :
471 36739 : vacrel->skipwithvm = skipwithvm;
472 :
473 36739 : if (verbose)
474 : {
475 4 : if (vacrel->aggressive)
108 pg 476 UNC 0 : ereport(INFO,
477 : (errmsg("aggressively vacuuming \"%s.%s.%s\"",
478 : vacrel->dbname, vacrel->relnamespace,
479 : vacrel->relname)));
480 : else
108 pg 481 GNC 4 : ereport(INFO,
482 : (errmsg("vacuuming \"%s.%s.%s\"",
483 : vacrel->dbname, vacrel->relnamespace,
484 : vacrel->relname)));
485 : }
486 :
370 pg 487 ECB : /*
488 : * Allocate dead_items array memory using dead_items_alloc. This handles
489 : * parallel VACUUM initialization as part of allocating shared memory
490 : * space used for dead_items. (But do a failsafe precheck first, to
491 : * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
492 : * is already dangerously old.)
493 : */
370 pg 494 CBC 36739 : lazy_check_wraparound_failsafe(vacrel);
370 pg 495 GIC 36739 : dead_items_alloc(vacrel, params->nworkers);
496 :
498 pg 497 ECB : /*
498 : * Call lazy_scan_heap to perform all required heap pruning, index
499 : * vacuuming, and heap vacuuming (plus related processing)
500 : */
370 pg 501 GIC 36739 : lazy_scan_heap(vacrel);
502 :
503 : /*
504 : * Free resources managed by dead_items_alloc. This ends parallel mode in
505 : * passing when necessary.
506 : */
370 pg 507 CBC 36739 : dead_items_cleanup(vacrel);
370 pg 508 GIC 36739 : Assert(!IsInParallelMode());
509 :
510 : /*
393 pg 511 ECB : * Update pg_class entries for each of rel's indexes where appropriate.
512 : *
513 : * Unlike the later update to rel's pg_class entry, this is not critical.
514 : * Maintains relpages/reltuples statistics used by the planner only.
7940 tgl 515 : */
393 pg 516 GIC 36739 : if (vacrel->do_index_cleanup)
517 36607 : update_relstats_all_indexes(vacrel);
518 :
519 : /* Done with rel's indexes */
520 36739 : vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
521 :
393 pg 522 ECB : /* Optionally truncate rel */
660 pg 523 CBC 36739 : if (should_attempt_truncation(vacrel))
734 524 95 : lazy_truncate_heap(vacrel);
525 :
526 : /* Pop the error context stack */
1105 akapila 527 GIC 36739 : error_context_stack = errcallback.previous;
528 :
529 : /* Report that we are now doing final cleanup */
2581 rhaas 530 36739 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
2581 rhaas 531 ECB : PROGRESS_VACUUM_PHASE_FINAL_CLEANUP);
532 :
5240 heikki.linnakangas 533 : /*
393 pg 534 EUB : * Prepare to update rel's pg_class entry.
535 : *
536 : * Aggressive VACUUMs must always be able to advance relfrozenxid to a
537 : * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
538 : * Non-aggressive VACUUMs may advance them by any amount, or not at all.
539 : */
108 pg 540 GNC 36739 : Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
541 : TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
542 : vacrel->cutoffs.relfrozenxid,
371 pg 543 ECB : vacrel->NewRelfrozenXid));
108 pg 544 GNC 36739 : Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
545 : MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
546 : vacrel->cutoffs.relminmxid,
547 : vacrel->NewRelminMxid));
371 pg 548 GIC 36739 : if (vacrel->skippedallvis)
549 : {
550 : /*
551 : * Must keep original relfrozenxid in a non-aggressive VACUUM that
552 : * chose to skip an all-visible page range. The state that tracks new
553 : * values will have missed unfrozen XIDs from the pages we skipped.
554 : */
108 pg 555 GNC 22 : Assert(!vacrel->aggressive);
371 pg 556 GIC 22 : vacrel->NewRelfrozenXid = InvalidTransactionId;
557 22 : vacrel->NewRelminMxid = InvalidMultiXactId;
371 pg 558 ECB : }
559 :
560 : /*
952 tgl 561 : * For safety, clamp relallvisible to be not more than what we're setting
371 pg 562 : * pg_class.relpages to
5240 heikki.linnakangas 563 : */
422 pg 564 GIC 36739 : new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
734 pg 565 CBC 36739 : visibilitymap_count(rel, &new_rel_allvisible, NULL);
4195 tgl 566 GIC 36739 : if (new_rel_allvisible > new_rel_pages)
4195 tgl 567 LBC 0 : new_rel_allvisible = new_rel_pages;
568 :
422 pg 569 ECB : /*
393 570 : * Now actually update rel's pg_class entry.
571 : *
572 : * In principle new_live_tuples could be -1 indicating that we (still)
573 : * don't know the tuple count. In practice that can't happen, since we
574 : * scan every page that isn't skipped using the visibility map.
575 : */
371 pg 576 GIC 36739 : vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
577 36739 : new_rel_allvisible, vacrel->nindexes > 0,
578 : vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
371 pg 579 ECB : &frozenxid_updated, &minmulti_updated, false);
6478 tgl 580 :
720 pg 581 : /*
368 andres 582 : * Report results to the cumulative stats system, too.
720 pg 583 : *
584 : * Deliberately avoid telling the stats system about LP_DEAD items that
585 : * remain in the table due to VACUUM bypassing index and heap vacuuming.
498 586 : * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
587 : * It seems like a good idea to err on the side of not vacuuming again too
588 : * soon in cases where the failsafe prevented significant amounts of heap
720 589 : * vacuuming.
590 : */
734 pg 591 GIC 22099 : pgstat_report_vacuum(RelationGetRelid(rel),
592 36739 : rel->rd_rel->relisshared,
372 593 14640 : Max(vacrel->new_live_tuples, 0),
422 594 36739 : vacrel->recently_dead_tuples +
595 36739 : vacrel->missed_dead_tuples);
2587 rhaas 596 CBC 36739 : pgstat_progress_end_command();
5835 alvherre 597 ECB :
450 pg 598 GIC 36739 : if (instrument)
5835 alvherre 599 ECB : {
3955 bruce 600 GIC 79 : TimestampTz endtime = GetCurrentTimestamp();
601 :
450 pg 602 113 : if (verbose || params->log_min_duration == 0 ||
4153 alvherre 603 34 : TimestampDifferenceExceeds(starttime, endtime,
604 : params->log_min_duration))
605 : {
606 : long secs_dur;
359 pg 607 EUB : int usecs_dur;
372 608 : WalUsage walusage;
609 : StringInfoData buf;
1957 rhaas 610 : char *msgfmt;
611 : int32 diff;
359 pg 612 GIC 45 : int64 PageHitOp = VacuumPageHit - StartPageHit,
613 45 : PageMissOp = VacuumPageMiss - StartPageMiss,
359 pg 614 CBC 45 : PageDirtyOp = VacuumPageDirty - StartPageDirty;
615 45 : double read_rate = 0,
359 pg 616 GIC 45 : write_rate = 0;
2878 bruce 617 ECB :
359 pg 618 GIC 45 : TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
1098 akapila 619 CBC 45 : memset(&walusage, 0, sizeof(WalUsage));
359 pg 620 GIC 45 : WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
621 :
3034 alvherre 622 45 : initStringInfo(&buf);
450 pg 623 45 : if (verbose)
450 pg 624 ECB : {
625 : /*
626 : * Aggressiveness already reported earlier, in dedicated
627 : * VACUUM VERBOSE ereport
628 : */
450 pg 629 CBC 4 : Assert(!params->is_wraparound);
630 4 : msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
450 pg 631 ECB : }
450 pg 632 CBC 41 : else if (params->is_wraparound)
1668 michael 633 ECB : {
734 pg 634 : /*
635 : * While it's possible for a VACUUM to be both is_wraparound
734 pg 636 EUB : * and !aggressive, that's just a corner-case -- is_wraparound
637 : * implies aggressive. Produce distinct output for the corner
638 : * case all the same, just in case.
639 : */
108 pg 640 UNC 0 : if (vacrel->aggressive)
1104 michael 641 LBC 0 : msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1104 michael 642 ECB : else
1104 michael 643 LBC 0 : msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
644 : }
1991 rhaas 645 ECB : else
646 : {
108 pg 647 GNC 41 : if (vacrel->aggressive)
1668 michael 648 CBC 3 : msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1668 michael 649 ECB : else
1668 michael 650 CBC 38 : msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
651 : }
1991 rhaas 652 GIC 45 : appendStringInfo(&buf, msgfmt,
653 : vacrel->dbname,
654 : vacrel->relnamespace,
734 pg 655 ECB : vacrel->relname,
656 : vacrel->num_index_scans);
422 pg 657 CBC 90 : appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total)\n"),
436 pg 658 ECB : vacrel->removed_pages,
659 : new_rel_pages,
660 : vacrel->scanned_pages,
392 661 : orig_rel_pages == 0 ? 100.0 :
422 pg 662 GIC 45 : 100.0 * vacrel->scanned_pages / orig_rel_pages);
3034 alvherre 663 45 : appendStringInfo(&buf,
422 pg 664 CBC 45 : _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"),
734 665 45 : (long long) vacrel->tuples_deleted,
666 45 : (long long) vacrel->new_rel_tuples,
422 pg 667 GIC 45 : (long long) vacrel->recently_dead_tuples);
422 pg 668 CBC 45 : if (vacrel->missed_dead_tuples > 0)
422 pg 669 LBC 0 : appendStringInfo(&buf,
422 pg 670 UIC 0 : _("tuples missed: %lld dead from %u pages not removed due to cleanup lock contention\n"),
422 pg 671 LBC 0 : (long long) vacrel->missed_dead_tuples,
672 : vacrel->missed_dead_pages);
108 pg 673 GNC 45 : diff = (int32) (ReadNextTransactionId() -
674 45 : vacrel->cutoffs.OldestXmin);
422 pg 675 GIC 45 : appendStringInfo(&buf,
359 676 45 : _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
677 : vacrel->cutoffs.OldestXmin, diff);
422 pg 678 CBC 45 : if (frozenxid_updated)
422 pg 679 ECB : {
108 pg 680 GNC 32 : diff = (int32) (vacrel->NewRelfrozenXid -
681 32 : vacrel->cutoffs.relfrozenxid);
422 pg 682 GBC 32 : appendStringInfo(&buf,
359 pg 683 GIC 32 : _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
371 pg 684 ECB : vacrel->NewRelfrozenXid, diff);
685 : }
422 pg 686 CBC 45 : if (minmulti_updated)
687 : {
108 pg 688 GNC 11 : diff = (int32) (vacrel->NewRelminMxid -
689 11 : vacrel->cutoffs.relminmxid);
422 pg 690 CBC 11 : appendStringInfo(&buf,
359 691 11 : _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
371 pg 692 ECB : vacrel->NewRelminMxid, diff);
693 : }
213 pg 694 GNC 45 : appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %lld tuples frozen\n"),
695 : vacrel->frozen_pages,
696 : orig_rel_pages == 0 ? 100.0 :
697 45 : 100.0 * vacrel->frozen_pages / orig_rel_pages,
698 45 : (long long) vacrel->tuples_frozen);
359 pg 699 CBC 45 : if (vacrel->do_index_vacuuming)
700 : {
701 44 : if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
702 14 : appendStringInfoString(&buf, _("index scan not needed: "));
703 : else
704 30 : appendStringInfoString(&buf, _("index scan needed: "));
359 pg 705 ECB :
359 pg 706 CBC 44 : msgfmt = _("%u pages from table (%.2f%% of total) had %lld dead item identifiers removed\n");
707 : }
708 : else
709 : {
2 dgustafsson 710 GNC 1 : if (!VacuumFailsafeActive)
359 pg 711 GIC 1 : appendStringInfoString(&buf, _("index scan bypassed: "));
359 pg 712 ECB : else
359 pg 713 UIC 0 : appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
359 pg 714 EUB :
359 pg 715 GBC 1 : msgfmt = _("%u pages from table (%.2f%% of total) have %lld dead item identifiers\n");
716 : }
717 45 : appendStringInfo(&buf, msgfmt,
718 : vacrel->lpdead_item_pages,
719 : orig_rel_pages == 0 ? 100.0 :
359 pg 720 CBC 45 : 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
359 pg 721 GIC 45 : (long long) vacrel->lpdead_items);
734 pg 722 CBC 124 : for (int i = 0; i < vacrel->nindexes; i++)
747 michael 723 ECB : {
734 pg 724 CBC 79 : IndexBulkDeleteResult *istat = vacrel->indstats[i];
747 michael 725 ECB :
734 pg 726 GIC 79 : if (!istat)
747 michael 727 CBC 5 : continue;
728 :
729 74 : appendStringInfo(&buf,
746 730 74 : _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
747 michael 731 GIC 74 : indnames[i],
732 : istat->num_pages,
733 : istat->pages_newly_deleted,
734 pg 734 ECB : istat->pages_deleted,
735 : istat->pages_free);
747 michael 736 : }
754 sfrost 737 CBC 45 : if (track_io_timing)
754 sfrost 738 ECB : {
590 pg 739 LBC 0 : double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
590 pg 740 UIC 0 : double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
590 pg 741 ECB :
590 pg 742 UIC 0 : appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
590 pg 743 ECB : read_ms, write_ms);
744 : }
359 pg 745 GIC 45 : if (secs_dur > 0 || usecs_dur > 0)
746 : {
747 45 : read_rate = (double) BLCKSZ * PageMissOp / (1024 * 1024) /
359 pg 748 CBC 45 : (secs_dur + usecs_dur / 1000000.0);
359 pg 749 GIC 45 : write_rate = (double) BLCKSZ * PageDirtyOp / (1024 * 1024) /
359 pg 750 CBC 45 : (secs_dur + usecs_dur / 1000000.0);
359 pg 751 ECB : }
590 pg 752 GIC 45 : appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
590 pg 753 ECB : read_rate, write_rate);
590 pg 754 CBC 45 : appendStringInfo(&buf,
590 pg 755 GIC 45 : _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
359 pg 756 ECB : (long long) PageHitOp,
757 : (long long) PageMissOp,
758 : (long long) PageDirtyOp);
1098 akapila 759 GIC 45 : appendStringInfo(&buf,
590 pg 760 45 : _("WAL usage: %lld records, %lld full page images, %llu bytes\n"),
697 fujii 761 45 : (long long) walusage.wal_records,
762 45 : (long long) walusage.wal_fpi,
937 peter 763 45 : (unsigned long long) walusage.wal_bytes);
590 pg 764 45 : appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
765 :
450 766 45 : ereport(verbose ? INFO : LOG,
767 : (errmsg_internal("%s", buf.data)));
3034 alvherre 768 45 : pfree(buf.data);
769 : }
770 : }
771 :
772 : /* Cleanup index statistics and index names */
734 pg 773 91592 : for (int i = 0; i < vacrel->nindexes; i++)
774 : {
775 54853 : if (vacrel->indstats[i])
776 3919 : pfree(vacrel->indstats[i]);
777 :
450 778 54853 : if (instrument)
747 michael 779 142 : pfree(indnames[i]);
780 : }
7940 tgl 781 36739 : }
782 :
783 : /*
784 : * lazy_scan_heap() -- workhorse function for VACUUM
785 : *
786 : * This routine prunes each page in the heap, and considers the need to
787 : * freeze remaining tuples with storage (not including pages that can be
788 : * skipped using the visibility map). Also performs related maintenance
789 : * of the FSM and visibility map. These steps all take place during an
790 : * initial pass over the target heap relation.
791 : *
792 : * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
793 : * consists of deleting index tuples that point to LP_DEAD items left in
794 : * heap pages following pruning. Earlier initial pass over the heap will
498 pg 795 ECB : * have collected the TIDs whose index tuples need to be removed.
796 : *
797 : * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
798 : * largely consists of marking LP_DEAD items (from collected TID array)
799 : * as LP_UNUSED. This has to happen in a second, final pass over the
800 : * heap, to preserve a basic invariant that all index AMs rely on: no
801 : * extant index tuple can ever be allowed to contain a TID that points to
802 : * an LP_UNUSED line pointer in the heap. We must disallow premature
803 : * recycling of line pointers to avoid index scans that get confused
804 : * about which TID points to which tuple immediately after recycling.
805 : * (Actually, this isn't a concern when target heap relation happens to
806 : * have no indexes, which allows us to safely apply the one-pass strategy
807 : * as an optimization).
808 : *
809 : * In practice we often have enough space to fit all TIDs, and so won't
810 : * need to call lazy_vacuum more than once, after our initial pass over
811 : * the heap has totally finished. Otherwise things are slightly more
812 : * complicated: our "initial pass" over the heap applies only to those
813 : * pages that were pruned before we needed to call lazy_vacuum, and our
814 : * "final pass" over the heap only vacuums these same heap pages.
815 : * However, we process indexes in full every time lazy_vacuum is called,
816 : * which makes index processing very inefficient when memory is in short
817 : * supply.
818 : */
7940 tgl 819 : static void
370 pg 820 GIC 36739 : lazy_scan_heap(LVRelState *vacrel)
821 : {
393 pg 822 CBC 36739 : BlockNumber rel_pages = vacrel->rel_pages,
823 : blkno,
824 : next_unskippable_block,
370 pg 825 GIC 36739 : next_fsm_block_to_vacuum = 0;
826 36739 : VacDeadItems *dead_items = vacrel->dead_items;
5240 heikki.linnakangas 827 36739 : Buffer vmbuffer = InvalidBuffer;
371 pg 828 ECB : bool next_unskippable_allvis,
829 : skipping_current_range;
2581 rhaas 830 GIC 36739 : const int initprog_index[] = {
831 : PROGRESS_VACUUM_PHASE,
832 : PROGRESS_VACUUM_TOTAL_HEAP_BLKS,
833 : PROGRESS_VACUUM_MAX_DEAD_TUPLES
2581 rhaas 834 ECB : };
835 : int64 initprog_val[3];
836 :
837 : /* Report that we're scanning the heap, advertising total # of blocks */
2581 rhaas 838 GIC 36739 : initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
393 pg 839 36739 : initprog_val[1] = rel_pages;
496 pg 840 CBC 36739 : initprog_val[2] = dead_items->max_items;
2581 rhaas 841 GIC 36739 : pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
842 :
843 : /* Set up an initial range of skippable blocks using the visibility map */
371 pg 844 36739 : next_unskippable_block = lazy_scan_skip(vacrel, &vmbuffer, 0,
371 pg 845 ECB : &next_unskippable_allvis,
846 : &skipping_current_range);
393 pg 847 CBC 210697 : for (blkno = 0; blkno < rel_pages; blkno++)
7940 tgl 848 ECB : {
849 : Buffer buf;
850 : Page page;
371 pg 851 : bool all_visible_according_to_vm;
852 : LVPagePruneState prunestate;
853 :
2586 rhaas 854 CBC 173958 : if (blkno == next_unskippable_block)
855 : {
856 : /*
371 pg 857 ECB : * Can't skip this page safely. Must scan the page. But
858 : * determine the next skippable range after the page first.
859 : */
371 pg 860 GIC 161858 : all_visible_according_to_vm = next_unskippable_allvis;
371 pg 861 CBC 161858 : next_unskippable_block = lazy_scan_skip(vacrel, &vmbuffer,
862 : blkno + 1,
863 : &next_unskippable_allvis,
864 : &skipping_current_range);
865 :
371 pg 866 GIC 161858 : Assert(next_unskippable_block >= blkno + 1);
867 : }
868 : else
869 : {
870 : /* Last page always scanned (may need to set nonempty_pages) */
871 12100 : Assert(blkno < rel_pages - 1);
371 pg 872 ECB :
371 pg 873 GBC 12100 : if (skipping_current_range)
4332 tgl 874 GIC 9226 : continue;
875 :
876 : /* Current range is too small to skip -- just scan the page */
877 3235 : all_visible_according_to_vm = true;
878 : }
879 :
422 pg 880 165093 : vacrel->scanned_pages++;
422 pg 881 ECB :
371 882 : /* Report as block scanned, update error traceback information */
371 pg 883 GIC 165093 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
884 165093 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
885 : blkno, InvalidOffsetNumber);
886 :
887 165093 : vacuum_delay_point();
888 :
889 : /*
685 pg 890 EUB : * Regularly check if wraparound failsafe should trigger.
891 : *
892 : * There is a similar check inside lazy_vacuum_all_indexes(), but
893 : * relfrozenxid might start to look dangerously old before we reach
894 : * that point. This check also provides failsafe coverage for the
895 : * one-pass strategy, and the two-pass strategy with the index_cleanup
896 : * param set to 'off'.
897 : */
108 pg 898 GNC 165093 : if (vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
685 pg 899 UIC 0 : lazy_check_wraparound_failsafe(vacrel);
900 :
7940 tgl 901 EUB : /*
902 : * Consider if we definitely have enough space to process TIDs on page
733 pg 903 : * already. If we are close to overrunning the available space for
904 : * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
905 : * this page.
7940 tgl 906 : */
496 pg 907 GIC 165093 : Assert(dead_items->max_items >= MaxHeapTuplesPerPage);
908 165093 : if (dead_items->max_items - dead_items->num_items < MaxHeapTuplesPerPage)
909 : {
910 : /*
911 : * Before beginning index vacuuming, we release any pin we may
912 : * hold on the visibility map page. This isn't necessary for
913 : * correctness, but we do it anyway to avoid holding the pin
914 : * across a lengthy, unrelated operation.
4003 rhaas 915 ECB : */
4003 rhaas 916 UIC 0 : if (BufferIsValid(vmbuffer))
917 : {
918 0 : ReleaseBuffer(vmbuffer);
919 0 : vmbuffer = InvalidBuffer;
920 : }
921 :
922 : /* Perform a round of index and heap vacuuming */
660 pg 923 LBC 0 : vacrel->consider_bypass_optimization = false;
660 pg 924 UIC 0 : lazy_vacuum(vacrel);
2581 rhaas 925 ECB :
1837 tgl 926 : /*
927 : * Vacuum the Free Space Map to make newly-freed space visible on
928 : * upper-level FSM pages. Note we have not yet processed blkno.
929 : */
734 pg 930 UIC 0 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
734 pg 931 ECB : blkno);
1837 tgl 932 UIC 0 : next_fsm_block_to_vacuum = blkno;
933 :
2581 rhaas 934 ECB : /* Report that we are once again scanning the heap */
2581 rhaas 935 UIC 0 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
936 : PROGRESS_VACUUM_PHASE_SCAN_HEAP);
937 : }
7940 tgl 938 ECB :
939 : /*
940 : * Pin the visibility map page in case we need to mark the page
941 : * all-visible. In most cases this will be very cheap, because we'll
422 pg 942 : * already have the correct page pinned anyway.
943 : */
734 pg 944 CBC 165093 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
4003 rhaas 945 ECB :
946 : /*
947 : * We need a buffer cleanup lock to prune HOT chains and defragment
422 pg 948 : * the page in lazy_scan_prune. But when it's not possible to acquire
949 : * a cleanup lock right away, we may be able to settle for reduced
950 : * processing using lazy_scan_noprune.
733 951 : */
83 pg 952 GNC 165093 : buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
953 : vacrel->bstrategy);
954 165093 : page = BufferGetPage(buf);
4171 rhaas 955 CBC 165093 : if (!ConditionalLockBufferForCleanup(buf))
4171 rhaas 956 ECB : {
422 pg 957 : bool hastup,
958 : recordfreespace;
959 :
4171 rhaas 960 GIC 4 : LockBuffer(buf, BUFFER_LOCK_SHARE);
961 :
962 : /* Check for new or empty pages before lazy_scan_noprune call */
422 pg 963 4 : if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, true,
964 : vmbuffer))
4171 rhaas 965 EUB : {
422 pg 966 : /* Processed as new/empty page (lock and pin released) */
4171 rhaas 967 GBC 4 : continue;
968 : }
969 :
970 : /* Collect LP_DEAD items in dead_items array, count tuples */
422 pg 971 CBC 4 : if (lazy_scan_noprune(vacrel, buf, blkno, page, &hastup,
972 : &recordfreespace))
2657 tgl 973 GIC 4 : {
420 tgl 974 GBC 4 : Size freespace = 0;
975 :
976 : /*
977 : * Processed page successfully (without cleanup lock) -- just
978 : * need to perform rel truncation and FSM steps, much like the
979 : * lazy_scan_prune case. Don't bother trying to match its
980 : * visibility map setting steps, though.
981 : */
2657 tgl 982 GIC 4 : if (hastup)
734 pg 983 4 : vacrel->nonempty_pages = blkno + 1;
422 984 4 : if (recordfreespace)
985 4 : freespace = PageGetHeapFreeSpace(page);
422 pg 986 CBC 4 : UnlockReleaseBuffer(buf);
422 pg 987 GIC 4 : if (recordfreespace)
422 pg 988 CBC 4 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2657 tgl 989 GIC 4 : continue;
990 : }
7940 tgl 991 ECB :
6546 992 : /*
993 : * lazy_scan_noprune could not do all required processing. Wait
422 pg 994 : * for a cleanup lock, and call lazy_scan_prune in the usual way.
995 : */
422 pg 996 UIC 0 : Assert(vacrel->aggressive);
997 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
998 0 : LockBufferForCleanup(buf);
999 : }
1000 :
1001 : /* Check for new or empty pages before lazy_scan_prune call */
422 pg 1002 GIC 165089 : if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, false, vmbuffer))
1003 : {
422 pg 1004 ECB : /* Processed as new/empty page (lock and pin released) */
7940 tgl 1005 LBC 0 : continue;
1006 : }
1007 :
5624 bruce 1008 ECB : /*
1009 : * Prune, freeze, and count tuples.
1010 : *
733 pg 1011 : * Accumulates details of remaining LP_DEAD line pointers on page in
1012 : * dead_items array. This includes LP_DEAD line pointers that we
1013 : * pruned ourselves, as well as existing LP_DEAD line pointers that
1014 : * were pruned some time earlier. Also considers freezing XIDs in the
1015 : * tuple headers of remaining items with storage.
1016 : */
393 pg 1017 GIC 165089 : lazy_scan_prune(vacrel, buf, blkno, page, &prunestate);
4660 bruce 1018 ECB :
720 pg 1019 GIC 165089 : Assert(!prunestate.all_visible || !prunestate.has_lpdead_items);
720 pg 1020 EUB :
1021 : /* Remember the location of the last page with nonremovable tuples */
733 pg 1022 GBC 165089 : if (prunestate.hastup)
733 pg 1023 GIC 159788 : vacrel->nonempty_pages = blkno + 1;
1024 :
1025 165089 : if (vacrel->nindexes == 0)
1026 : {
1027 : /*
1028 : * Consider the need to do page-at-a-time heap vacuuming when
1029 : * using the one-pass strategy now.
1030 : *
1031 : * The one-pass strategy will never call lazy_vacuum(). The steps
1032 : * performed here can be thought of as the one-pass equivalent of
1033 : * a call to lazy_vacuum().
1844 tgl 1034 ECB : */
733 pg 1035 GIC 8078 : if (prunestate.has_lpdead_items)
7940 tgl 1036 CBC 357 : {
733 pg 1037 ECB : Size freespace;
1844 tgl 1038 :
83 pg 1039 GNC 357 : lazy_vacuum_heap_page(vacrel, blkno, buf, 0, vmbuffer);
1040 :
1041 : /* Forget the LP_DEAD items that we just vacuumed */
496 pg 1042 GIC 357 : dead_items->num_items = 0;
1043 :
1044 : /*
1045 : * Periodically perform FSM vacuuming to make newly-freed
1046 : * space visible on upper FSM pages. Note we have not yet
1047 : * performed FSM processing for blkno.
6117 alvherre 1048 ECB : */
733 pg 1049 GIC 357 : if (blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1050 : {
733 pg 1051 UIC 0 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1052 : blkno);
1053 0 : next_fsm_block_to_vacuum = blkno;
1054 : }
5999 tgl 1055 ECB :
1466 rhaas 1056 : /*
733 pg 1057 : * Now perform FSM processing for blkno, and move on to next
1058 : * page.
1466 rhaas 1059 : *
1060 : * Our call to lazy_vacuum_heap_page() will have considered if
733 pg 1061 : * it's possible to set all_visible/all_frozen independently
1062 : * of lazy_scan_prune(). Note that prunestate was invalidated
1063 : * by lazy_vacuum_heap_page() call.
1064 : */
733 pg 1065 GIC 357 : freespace = PageGetHeapFreeSpace(page);
1066 :
1067 357 : UnlockReleaseBuffer(buf);
1068 357 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1069 357 : continue;
1070 : }
1071 :
1072 : /*
1073 : * There was no call to lazy_vacuum_heap_page() because pruning
1074 : * didn't encounter/create any LP_DEAD items that needed to be
1075 : * vacuumed. Prune state has not been invalidated, so proceed
1076 : * with prunestate-driven visibility map and FSM steps (just like
1077 : * the two-pass strategy).
1837 tgl 1078 ECB : */
496 pg 1079 CBC 7721 : Assert(dead_items->num_items == 0);
6052 tgl 1080 ECB : }
1081 :
1082 : /*
1083 : * Handle setting visibility map bit based on information from the VM
1084 : * (as of last lazy_scan_skip() call), and from prunestate
1085 : */
733 pg 1086 GIC 164732 : if (!all_visible_according_to_vm && prunestate.all_visible)
5240 heikki.linnakangas 1087 129786 : {
2587 rhaas 1088 129786 : uint8 flags = VISIBILITYMAP_ALL_VISIBLE;
1089 :
733 pg 1090 129786 : if (prunestate.all_frozen)
1091 : {
83 pg 1092 GNC 121554 : Assert(!TransactionIdIsValid(prunestate.visibility_cutoff_xid));
2588 rhaas 1093 CBC 121554 : flags |= VISIBILITYMAP_ALL_FROZEN;
1094 : }
2588 rhaas 1095 EUB :
1096 : /*
3631 simon 1097 : * It should never be the case that the visibility map page is set
1098 : * while the page-level bit is clear, but the reverse is allowed
1172 michael 1099 : * (if checksums are not enabled). Regardless, set both bits so
1100 : * that we get back in sync.
1101 : *
1102 : * NB: If the heap page is all-visible but the VM bit is not set,
1103 : * we don't need to dirty the heap page. However, if checksums
1104 : * are enabled, we do need to make sure that the heap page is
1105 : * dirtied before passing it to visibilitymap_set(), because it
1106 : * may be logged. Given that this situation should only happen in
1107 : * rare cases after a crash, it is not worth optimizing.
1108 : */
3631 simon 1109 GIC 129786 : PageSetAllVisible(page);
1110 129786 : MarkBufferDirty(buf);
734 pg 1111 129786 : visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1112 : vmbuffer, prunestate.visibility_cutoff_xid,
1113 : flags);
1114 : }
1115 :
1116 : /*
1117 : * As of PostgreSQL 9.2, the visibility map bit should never be set if
3958 rhaas 1118 ECB : * the page-level bit is clear. However, it's possible that the bit
1119 : * got cleared after lazy_scan_skip() was called, so we must recheck
371 pg 1120 EUB : * with buffer lock before concluding that the VM is corrupt.
1121 : */
83 pg 1122 GNC 34946 : else if (all_visible_according_to_vm && !PageIsAllVisible(page) &&
83 pg 1123 UNC 0 : visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0)
4003 rhaas 1124 EUB : {
4003 rhaas 1125 UIC 0 : elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
1126 : vacrel->relname, blkno);
734 pg 1127 0 : visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1128 : VISIBILITYMAP_VALID_BITS);
1129 : }
1130 :
1131 : /*
1132 : * It's possible for the value returned by
970 andres 1133 ECB : * GetOldestNonRemovableTransactionId() to move backwards, so it's not
1134 : * wrong for us to see tuples that appear to not be visible to
1135 : * everyone yet, while PD_ALL_VISIBLE is already set. The real safe
1136 : * xmin value never moves backwards, but
1137 : * GetOldestNonRemovableTransactionId() is conservative and sometimes
1138 : * returns a value that's unnecessarily small, so if we see that
1139 : * contradiction it just means that the tuples that we think are not
1140 : * visible to everyone yet actually are, and the PD_ALL_VISIBLE flag
1141 : * is correct.
4415 heikki.linnakangas 1142 : *
1143 : * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE
4415 heikki.linnakangas 1144 EUB : * set, however.
1145 : */
733 pg 1146 GIC 34946 : else if (prunestate.has_lpdead_items && PageIsAllVisible(page))
1147 : {
498 pg 1148 UIC 0 : elog(WARNING, "page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
1149 : vacrel->relname, blkno);
5240 heikki.linnakangas 1150 0 : PageClearAllVisible(page);
4003 rhaas 1151 0 : MarkBufferDirty(buf);
734 pg 1152 0 : visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1153 : VISIBILITYMAP_VALID_BITS);
1154 : }
5240 heikki.linnakangas 1155 ECB :
2595 rhaas 1156 : /*
1157 : * If the all-visible page is all-frozen but not marked as such yet,
1158 : * mark it as all-frozen. Note that all_frozen is only valid if
1159 : * all_visible is true, so we must check both prunestate fields.
1160 : */
733 pg 1161 GIC 34946 : else if (all_visible_according_to_vm && prunestate.all_visible &&
1162 4044 : prunestate.all_frozen &&
734 1163 3645 : !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
1164 : {
1165 : /*
1166 : * Avoid relying on all_visible_according_to_vm as a proxy for the
1167 : * page-level PD_ALL_VISIBLE bit being set, since it might have
1168 : * become stale -- even when all_visible is set in prunestate
1169 : */
83 pg 1170 GNC 11 : if (!PageIsAllVisible(page))
1171 : {
83 pg 1172 UNC 0 : PageSetAllVisible(page);
1173 0 : MarkBufferDirty(buf);
1174 : }
1175 :
1176 : /*
1177 : * Set the page all-frozen (and all-visible) in the VM.
1178 : *
1179 : * We can pass InvalidTransactionId as our visibility_cutoff_xid,
1180 : * since a snapshotConflictHorizon sufficient to make everything
1181 : * safe for REDO was logged when the page's tuples were frozen.
1182 : */
83 pg 1183 GNC 11 : Assert(!TransactionIdIsValid(prunestate.visibility_cutoff_xid));
734 pg 1184 GIC 11 : visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1185 : vmbuffer, InvalidTransactionId,
1186 : VISIBILITYMAP_ALL_VISIBLE |
1187 : VISIBILITYMAP_ALL_FROZEN);
1188 : }
1189 :
1190 : /*
1191 : * Final steps for block: drop cleanup lock, record free space in the
1192 : * FSM
1193 : */
733 1194 164732 : if (prunestate.has_lpdead_items && vacrel->do_index_vacuuming)
1195 : {
1196 : /*
1197 : * Wait until lazy_vacuum_heap_rel() to save free space. This
1198 : * doesn't just save us some cycles; it also allows us to record
1199 : * any additional free space that lazy_vacuum_heap_page() will
1200 : * make available in cases where it's possible to truncate the
732 pg 1201 ECB : * page's line pointer array.
733 1202 : *
1203 : * Note: It's not in fact 100% certain that we really will call
1204 : * lazy_vacuum_heap_rel() -- lazy_vacuum() might yet opt to skip
1205 : * index vacuuming (and so must skip heap vacuuming). This is
732 1206 : * deemed okay because it only happens in emergencies, or when
1207 : * there is very little free space anyway. (Besides, we start
1208 : * recording free space in the FSM once index vacuuming has been
1209 : * abandoned.)
1210 : *
1211 : * Note: The one-pass (no indexes) case is only supposed to make
1212 : * it this far when there were no LP_DEAD items during pruning.
733 1213 : */
733 pg 1214 CBC 25199 : Assert(vacrel->nindexes > 0);
1215 25199 : UnlockReleaseBuffer(buf);
1216 : }
1217 : else
733 pg 1218 ECB : {
733 pg 1219 GIC 139533 : Size freespace = PageGetHeapFreeSpace(page);
1220 :
733 pg 1221 CBC 139533 : UnlockReleaseBuffer(buf);
734 pg 1222 GIC 139533 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
733 pg 1223 ECB : }
1224 : }
1225 :
370 pg 1226 GIC 36739 : vacrel->blkno = InvalidBlockNumber;
1227 36739 : if (BufferIsValid(vmbuffer))
1228 14699 : ReleaseBuffer(vmbuffer);
370 pg 1229 ECB :
733 1230 : /* report that everything is now scanned */
2581 rhaas 1231 CBC 36739 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
1232 :
1233 : /* now we can compute the new value for pg_class.reltuples */
393 pg 1234 GIC 73478 : vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1235 : vacrel->scanned_pages,
733 1236 36739 : vacrel->live_tuples);
1844 tgl 1237 ECB :
952 1238 : /*
1239 : * Also compute the total number of surviving heap entries. In the
1240 : * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1241 : */
734 pg 1242 GIC 36739 : vacrel->new_rel_tuples =
422 1243 36739 : Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
422 pg 1244 CBC 36739 : vacrel->missed_dead_tuples;
4332 tgl 1245 ECB :
1246 : /*
1247 : * Do index vacuuming (call each index's ambulkdelete routine), then do
370 pg 1248 : * related heap vacuuming
1249 : */
496 pg 1250 GIC 36739 : if (dead_items->num_items > 0)
660 pg 1251 CBC 1765 : lazy_vacuum(vacrel);
6186 tgl 1252 ECB :
1837 1253 : /*
1254 : * Vacuum the remainder of the Free Space Map. We must do this whether or
1255 : * not there were indexes, and whether or not we bypassed index vacuuming.
1256 : */
1837 tgl 1257 GIC 36739 : if (blkno > next_fsm_block_to_vacuum)
734 pg 1258 14699 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno);
1259 :
1260 : /* report all blocks vacuumed */
2580 rhaas 1261 36739 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
1262 :
1263 : /* Do final index cleanup (call each index's amvacuumcleanup routine) */
733 pg 1264 36739 : if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
734 1265 34805 : lazy_cleanup_all_indexes(vacrel);
7940 tgl 1266 36739 : }
1267 :
1268 : /*
1269 : * lazy_scan_skip() -- set up range of skippable blocks using visibility map.
1270 : *
1271 : * lazy_scan_heap() calls here every time it needs to set up a new range of
1272 : * blocks to skip via the visibility map. Caller passes the next block in
1273 : * line. We return a next_unskippable_block for this range. When there are
1274 : * no skippable blocks we just return caller's next_block. The all-visible
1275 : * status of the returned block is set in *next_unskippable_allvis for caller,
1276 : * too. Block usually won't be all-visible (since it's unskippable), but it
1277 : * can be during aggressive VACUUMs (as well as in certain edge cases).
371 pg 1278 ECB : *
1279 : * Sets *skipping_current_range to indicate if caller should skip this range.
1280 : * Costs and benefits drive our decision. Very small ranges won't be skipped.
1281 : *
1282 : * Note: our opinion of which blocks can be skipped can go stale immediately.
1283 : * It's okay if caller "misses" a page whose all-visible or all-frozen marking
1284 : * was concurrently cleared, though. All that matters is that caller scan all
1285 : * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1286 : * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1287 : * older XIDs/MXIDs. The vacrel->skippedallvis flag will be set here when the
1288 : * choice to skip such a range is actually made, making everything safe.)
1289 : */
1290 : static BlockNumber
371 pg 1291 GIC 198597 : lazy_scan_skip(LVRelState *vacrel, Buffer *vmbuffer, BlockNumber next_block,
1292 : bool *next_unskippable_allvis, bool *skipping_current_range)
371 pg 1293 ECB : {
371 pg 1294 GIC 198597 : BlockNumber rel_pages = vacrel->rel_pages,
371 pg 1295 CBC 198597 : next_unskippable_block = next_block,
1296 198597 : nskippable_blocks = 0;
1297 198597 : bool skipsallvis = false;
1298 :
371 pg 1299 GIC 198597 : *next_unskippable_allvis = true;
1300 210697 : while (next_unskippable_block < rel_pages)
1301 : {
1302 173958 : uint8 mapbits = visibilitymap_get_status(vacrel->rel,
1303 : next_unskippable_block,
1304 : vmbuffer);
1305 :
1306 173958 : if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
1307 : {
1308 160671 : Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0);
1309 160671 : *next_unskippable_allvis = false;
371 pg 1310 CBC 160671 : break;
371 pg 1311 ECB : }
1312 :
1313 : /*
1314 : * Caller must scan the last page to determine whether it has tuples
1315 : * (caller must have the opportunity to set vacrel->nonempty_pages).
1316 : * This rule avoids having lazy_truncate_heap() take access-exclusive
1317 : * lock on rel to attempt a truncation that fails anyway, just because
1318 : * there are tuples on the last page (it is likely that there will be
1319 : * tuples on other nearby pages as well, but those can be skipped).
1320 : *
1321 : * Implement this by always treating the last block as unsafe to skip.
1322 : */
371 pg 1323 GIC 13287 : if (next_unskippable_block == rel_pages - 1)
1324 807 : break;
1325 :
371 pg 1326 ECB : /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
371 pg 1327 GIC 12480 : if (!vacrel->skipwithvm)
1328 : {
1329 : /* Caller shouldn't rely on all_visible_according_to_vm */
83 pg 1330 GNC 378 : *next_unskippable_allvis = false;
371 pg 1331 CBC 378 : break;
1332 : }
371 pg 1333 ECB :
1334 : /*
1335 : * Aggressive VACUUM caller can't skip pages just because they are
1336 : * all-visible. They may still skip all-frozen pages, which can't
1337 : * contain XIDs < OldestXmin (XIDs that aren't already frozen by now).
1338 : */
371 pg 1339 CBC 12102 : if ((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0)
1340 : {
371 pg 1341 GIC 3166 : if (vacrel->aggressive)
371 pg 1342 CBC 2 : break;
371 pg 1343 ECB :
1344 : /*
1345 : * All-visible block is safe to skip in non-aggressive case. But
1346 : * remember that the final range contains such a block for later.
1347 : */
371 pg 1348 GIC 3164 : skipsallvis = true;
1349 : }
1350 :
1351 12100 : vacuum_delay_point();
1352 12100 : next_unskippable_block++;
1353 12100 : nskippable_blocks++;
1354 : }
1355 :
1356 : /*
371 pg 1357 ECB : * We only skip a range with at least SKIP_PAGES_THRESHOLD consecutive
1358 : * pages. Since we're reading sequentially, the OS should be doing
1359 : * readahead for us, so there's no gain in skipping a page now and then.
1360 : * Skipping such a range might even discourage sequential detection.
1361 : *
1362 : * This test also enables more frequent relfrozenxid advancement during
1363 : * non-aggressive VACUUMs. If the range has any all-visible pages then
1364 : * skipping makes updating relfrozenxid unsafe, which is a real downside.
1365 : */
371 pg 1366 CBC 198597 : if (nskippable_blocks < SKIP_PAGES_THRESHOLD)
371 pg 1367 GIC 198504 : *skipping_current_range = false;
1368 : else
1369 : {
1370 93 : *skipping_current_range = true;
1371 93 : if (skipsallvis)
1372 22 : vacrel->skippedallvis = true;
1373 : }
1374 :
1375 198597 : return next_unskippable_block;
1376 : }
1377 :
1378 : /*
1379 : * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
1380 : *
1381 : * Must call here to handle both new and empty pages before calling
1382 : * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
1383 : * with new or empty pages.
1384 : *
1385 : * It's necessary to consider new pages as a special case, since the rules for
1386 : * maintaining the visibility map and FSM with empty pages are a little
1387 : * different (though new pages can be truncated away during rel truncation).
1388 : *
1389 : * Empty pages are not really a special case -- they're just heap pages that
1390 : * have no allocated tuples (including even LP_UNUSED items). You might
1391 : * wonder why we need to handle them here all the same. It's only necessary
1392 : * because of a corner-case involving a hard crash during heap relation
1393 : * extension. If we ever make relation-extension crash safe, then it should
1394 : * no longer be necessary to deal with empty pages here (or new pages, for
1395 : * that matter).
1396 : *
1397 : * Caller must hold at least a shared lock. We might need to escalate the
422 pg 1398 ECB : * lock in that case, so the type of lock caller holds needs to be specified
1399 : * using 'sharelock' argument.
1400 : *
1401 : * Returns false in common case where caller should go on to call
1402 : * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
1403 : * that lazy_scan_heap is done processing the page, releasing lock on caller's
1404 : * behalf.
1405 : */
1406 : static bool
422 pg 1407 GIC 165093 : lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
1408 : Page page, bool sharelock, Buffer vmbuffer)
1409 : {
1410 : Size freespace;
1411 :
1412 165093 : if (PageIsNew(page))
1413 : {
1414 : /*
1415 : * All-zeroes pages can be left over if either a backend extends the
1416 : * relation by a single page, but crashes before the newly initialized
1417 : * page has been written out, or when bulk-extending the relation
1418 : * (which creates a number of empty pages at the tail end of the
1419 : * relation), and then enters them into the FSM.
1420 : *
1421 : * Note we do not enter the page into the visibilitymap. That has the
1422 : * downside that we repeatedly visit this page in subsequent vacuums,
1423 : * but otherwise we'll never discover the space on a promoted standby.
422 pg 1424 EUB : * The harm of repeated checking ought to normally not be too bad. The
1425 : * space usually should be used at some point, otherwise there
1426 : * wouldn't be any regular vacuums.
1427 : *
1428 : * Make sure these pages are in the FSM, to ensure they can be reused.
1429 : * Do that by testing if there's any space recorded for the page. If
1430 : * not, enter it. We do so after releasing the lock on the heap page,
1431 : * the FSM is approximate, after all.
1432 : */
422 pg 1433 UBC 0 : UnlockReleaseBuffer(buf);
1434 :
422 pg 1435 UIC 0 : if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
422 pg 1436 ECB : {
422 pg 1437 UIC 0 : freespace = BLCKSZ - SizeOfPageHeaderData;
1438 :
1439 0 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1440 : }
1441 :
1442 0 : return true;
422 pg 1443 EUB : }
1444 :
422 pg 1445 GBC 165093 : if (PageIsEmpty(page))
422 pg 1446 EUB : {
1447 : /*
1448 : * It seems likely that caller will always be able to get a cleanup
1449 : * lock on an empty page. But don't take any chances -- escalate to
1450 : * an exclusive lock (still don't need a cleanup lock, though).
1451 : */
422 pg 1452 UIC 0 : if (sharelock)
1453 : {
1454 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1455 0 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
1456 :
1457 0 : if (!PageIsEmpty(page))
1458 : {
1459 : /* page isn't new or empty -- keep lock and pin for now */
1460 0 : return false;
1461 : }
1462 : }
422 pg 1463 EUB : else
1464 : {
1465 : /* Already have a full cleanup lock (which is more than enough) */
1466 : }
1467 :
1468 : /*
1469 : * Unlike new pages, empty pages are always set all-visible and
1470 : * all-frozen.
1471 : */
422 pg 1472 UIC 0 : if (!PageIsAllVisible(page))
1473 : {
1474 0 : START_CRIT_SECTION();
1475 :
1476 : /* mark buffer dirty before writing a WAL record */
1477 0 : MarkBufferDirty(buf);
1478 :
422 pg 1479 EUB : /*
1480 : * It's possible that another backend has extended the heap,
1481 : * initialized the page, and then failed to WAL-log the page due
1482 : * to an ERROR. Since heap extension is not WAL-logged, recovery
1483 : * might try to replay our record setting the page all-visible and
1484 : * find that the page isn't initialized, which will cause a PANIC.
1485 : * To prevent that, check whether the page has been previously
1486 : * WAL-logged, and if not, do that now.
1487 : */
422 pg 1488 UIC 0 : if (RelationNeedsWAL(vacrel->rel) &&
1489 0 : PageGetLSN(page) == InvalidXLogRecPtr)
422 pg 1490 UBC 0 : log_newpage_buffer(buf, true);
422 pg 1491 EUB :
422 pg 1492 UBC 0 : PageSetAllVisible(page);
1493 0 : visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1494 : vmbuffer, InvalidTransactionId,
1495 : VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
422 pg 1496 UIC 0 : END_CRIT_SECTION();
422 pg 1497 ECB : }
1498 :
422 pg 1499 UIC 0 : freespace = PageGetHeapFreeSpace(page);
1500 0 : UnlockReleaseBuffer(buf);
1501 0 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1502 0 : return true;
1503 : }
1504 :
1505 : /* page isn't new or empty -- keep lock and pin */
422 pg 1506 GIC 165093 : return false;
1507 : }
1508 :
1509 : /*
1510 : * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
1511 : *
1512 : * Caller must hold pin and buffer cleanup lock on the buffer.
1513 : *
1514 : * Prior to PostgreSQL 14 there were very rare cases where heap_page_prune()
1515 : * was allowed to disagree with our HeapTupleSatisfiesVacuum() call about
1516 : * whether or not a tuple should be considered DEAD. This happened when an
1517 : * inserting transaction concurrently aborted (after our heap_page_prune()
1518 : * call, before our HeapTupleSatisfiesVacuum() call). There was rather a lot
1519 : * of complexity just so we could deal with tuples that were DEAD to VACUUM,
1520 : * but nevertheless were left with storage after pruning.
733 pg 1521 ECB : *
1522 : * The approach we take now is to restart pruning when the race condition is
1523 : * detected. This allows heap_page_prune() to prune the tuples inserted by
1524 : * the now-aborted transaction. This is a little crude, but it guarantees
1525 : * that any items that make it into the dead_items array are simple LP_DEAD
1526 : * line pointers, and that every remaining item with tuple storage is
1527 : * considered as a candidate for freezing.
1528 : */
1529 : static void
733 pg 1530 GIC 165089 : lazy_scan_prune(LVRelState *vacrel,
1531 : Buffer buf,
1532 : BlockNumber blkno,
1533 : Page page,
1534 : LVPagePruneState *prunestate)
1535 : {
1536 165089 : Relation rel = vacrel->rel;
1537 : OffsetNumber offnum,
1538 : maxoff;
1539 : ItemId itemid;
733 pg 1540 ECB : HeapTupleData tuple;
1541 : HTSV_Result res;
1542 : int tuples_deleted,
1543 : tuples_frozen,
1544 : lpdead_items,
409 1545 : live_tuples,
1546 : recently_dead_tuples;
1547 : int nnewlpdead;
1548 : HeapPageFreeze pagefrz;
102 pg 1549 GNC 165089 : int64 fpi_before = pgWalUsage.wal_fpi;
1550 : OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
1551 : HeapTupleFreeze frozen[MaxHeapTuplesPerPage];
1552 :
422 pg 1553 CBC 165089 : Assert(BufferGetBlockNumber(buf) == blkno);
1554 :
1555 : /*
367 pg 1556 ECB : * maxoff might be reduced following line pointer array truncation in
1557 : * heap_page_prune. That's safe for us to ignore, since the reclaimed
1558 : * space will continue to look like LP_UNUSED items below.
1559 : */
733 pg 1560 CBC 165089 : maxoff = PageGetMaxOffsetNumber(page);
733 pg 1561 ECB :
733 pg 1562 CBC 165089 : retry:
733 pg 1563 ECB :
371 1564 : /* Initialize (or reset) page-level state */
102 pg 1565 GNC 165089 : pagefrz.freeze_required = false;
1566 165089 : pagefrz.FreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
1567 165089 : pagefrz.FreezePageRelminMxid = vacrel->NewRelminMxid;
1568 165089 : pagefrz.NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
1569 165089 : pagefrz.NoFreezePageRelminMxid = vacrel->NewRelminMxid;
733 pg 1570 GIC 165089 : tuples_deleted = 0;
213 pg 1571 GNC 165089 : tuples_frozen = 0;
733 pg 1572 GIC 165089 : lpdead_items = 0;
1573 165089 : live_tuples = 0;
409 1574 165089 : recently_dead_tuples = 0;
1575 :
1576 : /*
1577 : * Prune all HOT-update chains in this page.
1578 : *
1579 : * We count tuples removed by the pruning step as tuples_deleted. Its
733 pg 1580 ECB : * final value can be thought of as the number of tuples that have been
1581 : * deleted from the table. It should not be confused with lpdead_items;
1582 : * lpdead_items's final value can be thought of as the number of tuples
1583 : * that were deleted from indexes.
1584 : */
393 pg 1585 GIC 165089 : tuples_deleted = heap_page_prune(rel, buf, vacrel->vistest,
1586 : InvalidTransactionId, 0, &nnewlpdead,
1587 : &vacrel->offnum);
733 pg 1588 ECB :
1589 : /*
1590 : * Now scan the page to collect LP_DEAD items and check for tuples
1591 : * requiring freezing among remaining tuples with storage
1592 : */
733 pg 1593 GIC 165089 : prunestate->hastup = false;
733 pg 1594 CBC 165089 : prunestate->has_lpdead_items = false;
1595 165089 : prunestate->all_visible = true;
1596 165089 : prunestate->all_frozen = true;
733 pg 1597 GIC 165089 : prunestate->visibility_cutoff_xid = InvalidTransactionId;
1598 :
1599 165089 : for (offnum = FirstOffsetNumber;
1600 10322367 : offnum <= maxoff;
1601 10157278 : offnum = OffsetNumberNext(offnum))
1602 : {
1603 : bool totally_frozen;
733 pg 1604 ECB :
1605 : /*
1606 : * Set the offset number so that we can display it along with any
1607 : * error that occurred while processing this tuple.
1608 : */
733 pg 1609 GIC 10157278 : vacrel->offnum = offnum;
733 pg 1610 CBC 10157278 : itemid = PageGetItemId(page, offnum);
1611 :
733 pg 1612 GIC 10157278 : if (!ItemIdIsUsed(itemid))
733 pg 1613 CBC 1016162 : continue;
733 pg 1614 ECB :
1615 : /* Redirect items mustn't be touched */
733 pg 1616 GIC 10029656 : if (ItemIdIsRedirected(itemid))
733 pg 1617 ECB : {
1618 : /* page makes rel truncation unsafe */
97 pg 1619 GNC 67635 : prunestate->hastup = true;
733 pg 1620 GIC 67635 : continue;
1621 : }
1622 :
1623 9962021 : if (ItemIdIsDead(itemid))
733 pg 1624 ECB : {
1625 : /*
1626 : * Deliberately don't set hastup for LP_DEAD items. We make the
1627 : * soft assumption that any LP_DEAD items encountered here will
1628 : * become LP_UNUSED later on, before count_nondeletable_pages is
1629 : * reached. If we don't make this assumption then rel truncation
1630 : * will only happen every other VACUUM, at most. Besides, VACUUM
1631 : * must treat hastup/nonempty_pages as provisional no matter how
1632 : * LP_DEAD items are handled (handled here, or handled later on).
1633 : *
1634 : * Also deliberately delay unsetting all_visible until just before
1635 : * we return to lazy_scan_heap caller, as explained in full below.
1636 : * (This is another case where it's useful to anticipate that any
1637 : * LP_DEAD items will become LP_UNUSED during the ongoing VACUUM.)
1638 : */
733 pg 1639 CBC 820905 : deadoffsets[lpdead_items++] = offnum;
733 pg 1640 GIC 820905 : continue;
1641 : }
1642 :
1643 9141116 : Assert(ItemIdIsNormal(itemid));
1644 :
1645 9141116 : ItemPointerSet(&(tuple.t_self), blkno, offnum);
1646 9141116 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
733 pg 1647 CBC 9141116 : tuple.t_len = ItemIdGetLength(itemid);
733 pg 1648 GIC 9141116 : tuple.t_tableOid = RelationGetRelid(rel);
1649 :
733 pg 1650 ECB : /*
733 pg 1651 EUB : * DEAD tuples are almost always pruned into LP_DEAD line pointers by
1652 : * heap_page_prune(), but it's possible that the tuple state changed
1653 : * since heap_page_prune() looked. Handle that here by restarting.
1654 : * (See comments at the top of function for a full explanation.)
1655 : */
108 pg 1656 GNC 9141116 : res = HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
1657 : buf);
1658 :
733 pg 1659 GIC 9141116 : if (unlikely(res == HEAPTUPLE_DEAD))
733 pg 1660 UIC 0 : goto retry;
1661 :
1662 : /*
1663 : * The criteria for counting a tuple as live in this block need to
1664 : * match what analyze.c's acquire_sample_rows() does, otherwise VACUUM
1665 : * and ANALYZE may produce wildly different reltuples values, e.g.
1666 : * when there are many recently-dead tuples.
1667 : *
1668 : * The logic here is a bit simpler than acquire_sample_rows(), as
1669 : * VACUUM can't run inside a transaction block, which makes some cases
1670 : * impossible (e.g. in-progress insert from the same transaction).
1671 : *
1672 : * We treat LP_DEAD items (which are the closest thing to DEAD tuples
498 pg 1673 ECB : * that might be seen here) differently, too: we assume that they'll
1674 : * become LP_UNUSED before VACUUM finishes. This difference is only
1675 : * superficial. VACUUM effectively agrees with ANALYZE about DEAD
1676 : * items, in the end. VACUUM won't remember LP_DEAD items, but only
1677 : * because they're not supposed to be left behind when it is done.
1678 : * (Cases where we bypass index vacuuming will violate this optimistic
1679 : * assumption, but the overall impact of that should be negligible.)
1680 : */
733 pg 1681 CBC 9141116 : switch (res)
1682 : {
733 pg 1683 GIC 8821538 : case HEAPTUPLE_LIVE:
1684 :
1685 : /*
1686 : * Count it as live. Not only is this natural, but it's also
1687 : * what acquire_sample_rows() does.
1688 : */
1689 8821538 : live_tuples++;
1690 :
733 pg 1691 ECB : /*
1692 : * Is the tuple definitely visible to all transactions?
1693 : *
1694 : * NB: Like with per-tuple hint bits, we can't set the
1695 : * PD_ALL_VISIBLE flag if the inserter committed
1696 : * asynchronously. See SetHintBits for more info. Check that
1697 : * the tuple is hinted xmin-committed because of that.
1698 : */
733 pg 1699 GIC 8821538 : if (prunestate->all_visible)
1700 : {
1701 : TransactionId xmin;
1702 :
1703 8435183 : if (!HeapTupleHeaderXminCommitted(tuple.t_data))
1704 : {
733 pg 1705 CBC 34 : prunestate->all_visible = false;
1706 34 : break;
1707 : }
1708 :
733 pg 1709 ECB : /*
1710 : * The inserter definitely committed. But is it old enough
1711 : * that everyone sees it as committed?
1712 : */
733 pg 1713 GIC 8435149 : xmin = HeapTupleHeaderGetXmin(tuple.t_data);
108 pg 1714 GNC 8435149 : if (!TransactionIdPrecedes(xmin,
1715 : vacrel->cutoffs.OldestXmin))
1716 : {
733 pg 1717 CBC 3129 : prunestate->all_visible = false;
733 pg 1718 GIC 3129 : break;
733 pg 1719 ECB : }
1720 :
1721 : /* Track newest xmin on page. */
97 pg 1722 GNC 8432020 : if (TransactionIdFollows(xmin, prunestate->visibility_cutoff_xid) &&
1723 : TransactionIdIsNormal(xmin))
733 pg 1724 GIC 483725 : prunestate->visibility_cutoff_xid = xmin;
1725 : }
1726 8818375 : break;
1727 319145 : case HEAPTUPLE_RECENTLY_DEAD:
733 pg 1728 ECB :
1729 : /*
422 1730 : * If tuple is recently dead then we must not remove it from
1731 : * the relation. (We only remove items that are LP_DEAD from
1732 : * pruning.)
1733 : */
422 pg 1734 GIC 319145 : recently_dead_tuples++;
733 1735 319145 : prunestate->all_visible = false;
1736 319145 : break;
1737 429 : case HEAPTUPLE_INSERT_IN_PROGRESS:
1738 :
1739 : /*
733 pg 1740 ECB : * We do not count these rows as live, because we expect the
1741 : * inserting transaction to update the counters at commit, and
1742 : * we assume that will happen only after we report our
1743 : * results. This assumption is a bit shaky, but it is what
1744 : * acquire_sample_rows() does, so be consistent.
1745 : */
733 pg 1746 GIC 429 : prunestate->all_visible = false;
1747 429 : break;
1748 4 : case HEAPTUPLE_DELETE_IN_PROGRESS:
1749 : /* This is an expected case during concurrent vacuum */
1750 4 : prunestate->all_visible = false;
733 pg 1751 ECB :
1752 : /*
733 pg 1753 EUB : * Count such rows as live. As above, we assume the deleting
1754 : * transaction will commit and update the counters after we
1755 : * report.
1756 : */
733 pg 1757 GIC 4 : live_tuples++;
733 pg 1758 CBC 4 : break;
733 pg 1759 UIC 0 : default:
1760 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
733 pg 1761 ECB : break;
1762 : }
1763 :
367 pg 1764 GIC 9141116 : prunestate->hastup = true; /* page makes rel truncation unsafe */
1765 :
1766 : /* Tuple with storage -- consider need to freeze */
102 pg 1767 GNC 9141116 : if (heap_prepare_freeze_tuple(tuple.t_data, &vacrel->cutoffs, &pagefrz,
1768 : &frozen[tuples_frozen], &totally_frozen))
1769 : {
1770 : /* Save prepared freeze plan for later */
213 1771 4474824 : frozen[tuples_frozen++].offset = offnum;
1772 : }
1773 :
733 pg 1774 ECB : /*
1775 : * If any tuple isn't either totally frozen already or eligible to
1776 : * become totally frozen (according to its freeze plan), then the page
1777 : * definitely cannot be set all-frozen in the visibility map later on
1778 : */
108 pg 1779 GNC 9141116 : if (!totally_frozen)
733 pg 1780 GIC 706764 : prunestate->all_frozen = false;
1781 : }
1782 :
733 pg 1783 ECB : /*
1784 : * We have now divided every item on the page into either an LP_DEAD item
1785 : * that will need to be vacuumed in indexes later, or a LP_NORMAL tuple
1786 : * that remains and needs to be considered for freezing now (LP_UNUSED and
1787 : * LP_REDIRECT items also remain, but are of no further interest to us).
1788 : */
102 pg 1789 GNC 165089 : vacrel->offnum = InvalidOffsetNumber;
1790 :
733 pg 1791 ECB : /*
1792 : * Freeze the page when heap_prepare_freeze_tuple indicates that at least
1793 : * one XID/MXID from before FreezeLimit/MultiXactCutoff is present. Also
1794 : * freeze when pruning generated an FPI, if doing so means that we set the
1795 : * page all-frozen afterwards (might not happen until final heap pass).
1796 : */
102 pg 1797 GNC 165089 : if (pagefrz.freeze_required || tuples_frozen == 0 ||
1798 13782 : (prunestate->all_visible && prunestate->all_frozen &&
74 1799 13553 : fpi_before != pgWalUsage.wal_fpi))
1800 : {
1801 : /*
1802 : * We're freezing the page. Our final NewRelfrozenXid doesn't need to
1803 : * be affected by the XIDs that are just about to be frozen anyway.
1804 : */
102 1805 151486 : vacrel->NewRelfrozenXid = pagefrz.FreezePageRelfrozenXid;
1806 151486 : vacrel->NewRelminMxid = pagefrz.FreezePageRelminMxid;
1807 :
1808 151486 : if (tuples_frozen == 0)
1809 : {
1810 : /*
1811 : * We have no freeze plans to execute, so there's no added cost
1812 : * from following the freeze path. That's why it was chosen.
1813 : * This is important in the case where the page only contains
1814 : * totally frozen tuples at this point (perhaps only following
1815 : * pruning). Such pages can be marked all-frozen in the VM by our
1816 : * caller, even though none of its tuples were newly frozen here
1817 : * (note that the "no freeze" path never sets pages all-frozen).
1818 : *
1819 : * We never increment the frozen_pages instrumentation counter
1820 : * here, since it only counts pages with newly frozen tuples
1821 : * (don't confuse that with pages newly set all-frozen in VM).
1822 : */
1823 : }
1824 : else
1825 : {
1826 : TransactionId snapshotConflictHorizon;
1827 :
1828 80091 : vacrel->frozen_pages++;
1829 :
1830 : /*
1831 : * We can use visibility_cutoff_xid as our cutoff for conflicts
1832 : * when the whole page is eligible to become all-frozen in the VM
1833 : * once we're done with it. Otherwise we generate a conservative
1834 : * cutoff by stepping back from OldestXmin.
1835 : */
1836 80091 : if (prunestate->all_visible && prunestate->all_frozen)
1837 : {
1838 : /* Using same cutoff when setting VM is now unnecessary */
1839 80067 : snapshotConflictHorizon = prunestate->visibility_cutoff_xid;
83 1840 80067 : prunestate->visibility_cutoff_xid = InvalidTransactionId;
1841 : }
1842 : else
1843 : {
1844 : /* Avoids false conflicts when hot_standby_feedback in use */
102 1845 24 : snapshotConflictHorizon = vacrel->cutoffs.OldestXmin;
1846 24 : TransactionIdRetreat(snapshotConflictHorizon);
1847 : }
1848 :
1849 : /* Execute all freeze plans for page as a single atomic action */
1850 80091 : heap_freeze_execute_prepared(vacrel->rel, buf,
1851 : snapshotConflictHorizon,
1852 : frozen, tuples_frozen);
1853 : }
1854 : }
1855 : else
1856 : {
1857 : /*
1858 : * Page requires "no freeze" processing. It might be set all-visible
1859 : * in the visibility map, but it can never be set all-frozen.
1860 : */
1861 13603 : vacrel->NewRelfrozenXid = pagefrz.NoFreezePageRelfrozenXid;
1862 13603 : vacrel->NewRelminMxid = pagefrz.NoFreezePageRelminMxid;
1863 13603 : prunestate->all_frozen = false;
1864 13603 : tuples_frozen = 0; /* avoid miscounts in instrumentation */
733 pg 1865 ECB : }
1866 :
1867 : /*
1868 : * VACUUM will call heap_page_is_all_visible() during the second pass over
1869 : * the heap to determine all_visible and all_frozen for the page -- this
1870 : * is a specialized version of the logic from this function. Now that
1871 : * we've finished pruning and freezing, make sure that we're in total
1872 : * agreement with heap_page_is_all_visible() using an assertion.
1873 : */
1874 : #ifdef USE_ASSERT_CHECKING
1875 : /* Note that all_frozen value does not matter when !all_visible */
102 pg 1876 GNC 165089 : if (prunestate->all_visible && lpdead_items == 0)
1877 : {
1878 : TransactionId cutoff;
1879 : bool all_frozen;
1880 :
733 pg 1881 GIC 133830 : if (!heap_page_is_all_visible(vacrel, buf, &cutoff, &all_frozen))
733 pg 1882 UIC 0 : Assert(false);
1883 :
97 pg 1884 GNC 133830 : Assert(!TransactionIdIsValid(cutoff) ||
1885 : cutoff == prunestate->visibility_cutoff_xid);
1886 : }
1887 : #endif
1888 :
1889 : /*
1890 : * Now save details of the LP_DEAD items from the page in vacrel
1891 : */
733 pg 1892 CBC 165089 : if (lpdead_items > 0)
1893 : {
473 akapila 1894 25629 : VacDeadItems *dead_items = vacrel->dead_items;
1895 : ItemPointerData tmp;
1896 :
733 pg 1897 25629 : vacrel->lpdead_item_pages++;
102 pg 1898 GNC 25629 : prunestate->has_lpdead_items = true;
1899 :
733 pg 1900 CBC 25629 : ItemPointerSetBlockNumber(&tmp, blkno);
1901 :
1902 846534 : for (int i = 0; i < lpdead_items; i++)
733 pg 1903 ECB : {
733 pg 1904 GIC 820905 : ItemPointerSetOffsetNumber(&tmp, deadoffsets[i]);
496 1905 820905 : dead_items->items[dead_items->num_items++] = tmp;
733 pg 1906 ECB : }
1907 :
496 pg 1908 CBC 25629 : Assert(dead_items->num_items <= dead_items->max_items);
733 pg 1909 GIC 25629 : pgstat_progress_update_param(PROGRESS_VACUUM_NUM_DEAD_TUPLES,
496 1910 25629 : dead_items->num_items);
1911 :
1912 : /*
1913 : * It was convenient to ignore LP_DEAD items in all_visible earlier on
1914 : * to make the choice of whether or not to freeze the page unaffected
1915 : * by the short-term presence of LP_DEAD items. These LP_DEAD items
1916 : * were effectively assumed to be LP_UNUSED items in the making. It
1917 : * doesn't matter which heap pass (initial pass or final pass) ends up
1918 : * setting the page all-frozen, as long as the ongoing VACUUM does it.
1919 : *
1920 : * Now that freezing has been finalized, unset all_visible. It needs
1921 : * to reflect the present state of things, as expected by our caller.
1922 : */
102 pg 1923 GNC 25629 : prunestate->all_visible = false;
1924 : }
1925 :
1926 : /* Finally, add page-local counts to whole-VACUUM counts */
720 pg 1927 GIC 165089 : vacrel->tuples_deleted += tuples_deleted;
213 pg 1928 GNC 165089 : vacrel->tuples_frozen += tuples_frozen;
720 pg 1929 GIC 165089 : vacrel->lpdead_items += lpdead_items;
1930 165089 : vacrel->live_tuples += live_tuples;
409 1931 165089 : vacrel->recently_dead_tuples += recently_dead_tuples;
733 1932 165089 : }
1933 :
1934 : /*
422 pg 1935 ECB : * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
1936 : *
1937 : * Caller need only hold a pin and share lock on the buffer, unlike
1938 : * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't
1939 : * performed here, it's quite possible that an earlier opportunistic pruning
1940 : * operation left LP_DEAD items behind. We'll at least collect any such items
1941 : * in the dead_items array for removal from indexes.
1942 : *
1943 : * For aggressive VACUUM callers, we may return false to indicate that a full
1944 : * cleanup lock is required for processing by lazy_scan_prune. This is only
1945 : * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
1946 : * one or more tuples on the page. We always return true for non-aggressive
1947 : * callers.
1948 : *
1949 : * See lazy_scan_prune for an explanation of hastup return flag.
1950 : * recordfreespace flag instructs caller on whether or not it should do
1951 : * generic FSM processing for page.
1952 : */
1953 : static bool
422 pg 1954 GIC 4 : lazy_scan_noprune(LVRelState *vacrel,
1955 : Buffer buf,
1956 : BlockNumber blkno,
1957 : Page page,
1958 : bool *hastup,
1959 : bool *recordfreespace)
1960 : {
1961 : OffsetNumber offnum,
1962 : maxoff;
1963 : int lpdead_items,
1964 : live_tuples,
1965 : recently_dead_tuples,
422 pg 1966 ECB : missed_dead_tuples;
1967 : HeapTupleHeader tupleheader;
102 pg 1968 GNC 4 : TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
1969 4 : MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
1970 : OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
1971 :
422 pg 1972 GIC 4 : Assert(BufferGetBlockNumber(buf) == blkno);
1973 :
1974 4 : *hastup = false; /* for now */
1975 4 : *recordfreespace = false; /* for now */
1976 :
1977 4 : lpdead_items = 0;
1978 4 : live_tuples = 0;
1979 4 : recently_dead_tuples = 0;
422 pg 1980 CBC 4 : missed_dead_tuples = 0;
422 pg 1981 ECB :
422 pg 1982 GIC 4 : maxoff = PageGetMaxOffsetNumber(page);
1983 4 : for (offnum = FirstOffsetNumber;
422 pg 1984 CBC 90 : offnum <= maxoff;
422 pg 1985 GIC 86 : offnum = OffsetNumberNext(offnum))
422 pg 1986 ECB : {
1987 : ItemId itemid;
1988 : HeapTupleData tuple;
1989 :
422 pg 1990 CBC 86 : vacrel->offnum = offnum;
1991 86 : itemid = PageGetItemId(page, offnum);
422 pg 1992 ECB :
422 pg 1993 GIC 86 : if (!ItemIdIsUsed(itemid))
422 pg 1994 LBC 0 : continue;
422 pg 1995 ECB :
422 pg 1996 CBC 86 : if (ItemIdIsRedirected(itemid))
422 pg 1997 ECB : {
422 pg 1998 UIC 0 : *hastup = true;
1999 0 : continue;
2000 : }
2001 :
422 pg 2002 CBC 86 : if (ItemIdIsDead(itemid))
422 pg 2003 ECB : {
2004 : /*
2005 : * Deliberately don't set hastup=true here. See same point in
422 pg 2006 EUB : * lazy_scan_prune for an explanation.
2007 : */
422 pg 2008 LBC 0 : deadoffsets[lpdead_items++] = offnum;
422 pg 2009 UIC 0 : continue;
422 pg 2010 EUB : }
2011 :
422 pg 2012 GIC 86 : *hastup = true; /* page prevents rel truncation */
2013 86 : tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
102 pg 2014 GNC 86 : if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs,
2015 : &NoFreezePageRelfrozenXid,
2016 : &NoFreezePageRelminMxid))
2017 : {
2018 : /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
422 pg 2019 GBC 64 : if (vacrel->aggressive)
422 pg 2020 EUB : {
2021 : /*
2022 : * Aggressive VACUUMs must always be able to advance rel's
371 pg 2023 ECB : * relfrozenxid to a value >= FreezeLimit (and be able to
2024 : * advance rel's relminmxid to a value >= MultiXactCutoff).
2025 : * The ongoing aggressive VACUUM won't be able to do that
2026 : * unless it can freeze an XID (or MXID) from this tuple now.
2027 : *
2028 : * The only safe option is to have caller perform processing
2029 : * of this page using lazy_scan_prune. Caller might have to
2030 : * wait a while for a cleanup lock, but it can't be helped.
2031 : */
422 pg 2032 UIC 0 : vacrel->offnum = InvalidOffsetNumber;
2033 0 : return false;
2034 : }
2035 :
2036 : /*
2037 : * Non-aggressive VACUUMs are under no obligation to advance
2038 : * relfrozenxid (even by one XID). We can be much laxer here.
2039 : *
2040 : * Currently we always just accept an older final relfrozenxid
2041 : * and/or relminmxid value. We never make caller wait or work a
2042 : * little harder, even when it likely makes sense to do so.
422 pg 2043 EUB : */
2044 : }
2045 :
422 pg 2046 GIC 86 : ItemPointerSet(&(tuple.t_self), blkno, offnum);
2047 86 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2048 86 : tuple.t_len = ItemIdGetLength(itemid);
2049 86 : tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2050 :
108 pg 2051 GNC 86 : switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
2052 : buf))
2053 : {
422 pg 2054 GIC 83 : case HEAPTUPLE_DELETE_IN_PROGRESS:
2055 : case HEAPTUPLE_LIVE:
2056 :
2057 : /*
422 pg 2058 ECB : * Count both cases as live, just like lazy_scan_prune
2059 : */
422 pg 2060 CBC 83 : live_tuples++;
422 pg 2061 ECB :
422 pg 2062 GIC 83 : break;
422 pg 2063 CBC 1 : case HEAPTUPLE_DEAD:
2064 :
2065 : /*
422 pg 2066 ECB : * There is some useful work for pruning to do, that won't be
2067 : * done due to failure to get a cleanup lock.
2068 : */
422 pg 2069 GIC 1 : missed_dead_tuples++;
2070 1 : break;
2071 2 : case HEAPTUPLE_RECENTLY_DEAD:
422 pg 2072 ECB :
2073 : /*
2074 : * Count in recently_dead_tuples, just like lazy_scan_prune
2075 : */
422 pg 2076 GIC 2 : recently_dead_tuples++;
2077 2 : break;
422 pg 2078 UIC 0 : case HEAPTUPLE_INSERT_IN_PROGRESS:
2079 :
2080 : /*
422 pg 2081 ECB : * Do not count these rows as live, just like lazy_scan_prune
2082 : */
422 pg 2083 LBC 0 : break;
422 pg 2084 UIC 0 : default:
2085 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2086 : break;
2087 : }
422 pg 2088 ECB : }
2089 :
422 pg 2090 GBC 4 : vacrel->offnum = InvalidOffsetNumber;
2091 :
2092 : /*
2093 : * By here we know for sure that caller can put off freezing and pruning
2094 : * this particular page until the next VACUUM. Remember its details now.
371 pg 2095 EUB : * (lazy_scan_prune expects a clean slate, so we have to do this last.)
422 2096 : */
102 pg 2097 GNC 4 : vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
2098 4 : vacrel->NewRelminMxid = NoFreezePageRelminMxid;
2099 :
2100 : /* Save any LP_DEAD items found on the page in dead_items array */
422 pg 2101 GIC 4 : if (vacrel->nindexes == 0)
422 pg 2102 ECB : {
2103 : /* Using one-pass strategy (since table has no indexes) */
422 pg 2104 UIC 0 : if (lpdead_items > 0)
2105 : {
2106 : /*
2107 : * Perfunctory handling for the corner case where a single pass
2108 : * strategy VACUUM cannot get a cleanup lock, and it turns out
422 pg 2109 ECB : * that there is one or more LP_DEAD items: just count the LP_DEAD
2110 : * items as missed_dead_tuples instead. (This is a bit dishonest,
2111 : * but it beats having to maintain specialized heap vacuuming code
2112 : * forever, for vanishingly little benefit.)
2113 : */
422 pg 2114 UIC 0 : *hastup = true;
2115 0 : missed_dead_tuples += lpdead_items;
422 pg 2116 EUB : }
2117 :
422 pg 2118 UIC 0 : *recordfreespace = true;
2119 : }
422 pg 2120 GIC 4 : else if (lpdead_items == 0)
2121 : {
2122 : /*
2123 : * Won't be vacuuming this page later, so record page's freespace in
2124 : * the FSM now
2125 : */
422 pg 2126 GBC 4 : *recordfreespace = true;
422 pg 2127 EUB : }
2128 : else
2129 : {
422 pg 2130 UBC 0 : VacDeadItems *dead_items = vacrel->dead_items;
2131 : ItemPointerData tmp;
422 pg 2132 ECB :
2133 : /*
2134 : * Page has LP_DEAD items, and so any references/TIDs that remain in
2135 : * indexes will be deleted during index vacuuming (and then marked
2136 : * LP_UNUSED in the heap)
2137 : */
422 pg 2138 LBC 0 : vacrel->lpdead_item_pages++;
2139 :
422 pg 2140 UIC 0 : ItemPointerSetBlockNumber(&tmp, blkno);
2141 :
422 pg 2142 UBC 0 : for (int i = 0; i < lpdead_items; i++)
2143 : {
422 pg 2144 UIC 0 : ItemPointerSetOffsetNumber(&tmp, deadoffsets[i]);
2145 0 : dead_items->items[dead_items->num_items++] = tmp;
2146 : }
2147 :
2148 0 : Assert(dead_items->num_items <= dead_items->max_items);
2149 0 : pgstat_progress_update_param(PROGRESS_VACUUM_NUM_DEAD_TUPLES,
422 pg 2150 UBC 0 : dead_items->num_items);
2151 :
2152 0 : vacrel->lpdead_items += lpdead_items;
2153 :
422 pg 2154 EUB : /*
2155 : * Assume that we'll go on to vacuum this heap page during final pass
2156 : * over the heap. Don't record free space until then.
2157 : */
422 pg 2158 UIC 0 : *recordfreespace = false;
2159 : }
422 pg 2160 EUB :
2161 : /*
2162 : * Finally, add relevant page-local counts to whole-VACUUM counts
2163 : */
409 pg 2164 GBC 4 : vacrel->live_tuples += live_tuples;
422 pg 2165 GIC 4 : vacrel->recently_dead_tuples += recently_dead_tuples;
2166 4 : vacrel->missed_dead_tuples += missed_dead_tuples;
2167 4 : if (missed_dead_tuples > 0)
2168 1 : vacrel->missed_dead_pages++;
2169 :
422 pg 2170 EUB : /* Caller won't need to call lazy_scan_prune with same page */
422 pg 2171 GIC 4 : return true;
2172 : }
2173 :
2174 : /*
2175 : * Main entry point for index vacuuming and heap vacuuming.
452 pg 2176 ECB : *
2177 : * Removes items collected in dead_items from table's indexes, then marks the
2178 : * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap
2179 : * for full details.
2180 : *
2181 : * Also empties dead_items, freeing up space for later TIDs.
2182 : *
732 2183 : * We may choose to bypass index vacuuming at this point, though only when the
2184 : * ongoing VACUUM operation will definitely only have one index scan/round of
2185 : * index vacuuming.
2186 : */
2187 : static void
660 pg 2188 GIC 1765 : lazy_vacuum(LVRelState *vacrel)
2189 : {
2190 : bool bypass;
2191 :
2192 : /* Should not end up here with no indexes */
733 2193 1765 : Assert(vacrel->nindexes > 0);
2194 1765 : Assert(vacrel->lpdead_item_pages > 0);
2195 :
2196 1765 : if (!vacrel->do_index_vacuuming)
2197 : {
2198 9 : Assert(!vacrel->do_index_cleanup);
496 2199 9 : vacrel->dead_items->num_items = 0;
733 pg 2200 CBC 9 : return;
2201 : }
2202 :
2203 : /*
2204 : * Consider bypassing index vacuuming (and heap vacuuming) entirely.
732 pg 2205 ECB : *
2206 : * We currently only do this in cases where the number of LP_DEAD items
2207 : * for the entire VACUUM operation is close to zero. This avoids sharp
2208 : * discontinuities in the duration and overhead of successive VACUUM
2209 : * operations that run against the same table with a fixed workload.
2210 : * Ideally, successive VACUUM operations will behave as if there are
2211 : * exactly zero LP_DEAD items in cases where there are close to zero.
2212 : *
2213 : * This is likely to be helpful with a table that is continually affected
2214 : * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2215 : * have small aberrations that lead to just a few heap pages retaining
2216 : * only one or two LP_DEAD items. This is pretty common; even when the
2217 : * DBA goes out of their way to make UPDATEs use HOT, it is practically
2218 : * impossible to predict whether HOT will be applied in 100% of cases.
2219 : * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2220 : * HOT through careful tuning.
2221 : */
660 pg 2222 GIC 1756 : bypass = false;
2223 1756 : if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2224 : {
2225 : BlockNumber threshold;
2226 :
732 2227 1745 : Assert(vacrel->num_index_scans == 0);
496 2228 1745 : Assert(vacrel->lpdead_items == vacrel->dead_items->num_items);
732 2229 1745 : Assert(vacrel->do_index_vacuuming);
2230 1745 : Assert(vacrel->do_index_cleanup);
2231 :
2232 : /*
2233 : * This crossover point at which we'll start to do index vacuuming is
732 pg 2234 ECB : * expressed as a percentage of the total number of heap pages in the
2235 : * table that are known to have at least one LP_DEAD item. This is
2236 : * much more important than the total number of LP_DEAD items, since
2237 : * it's a proxy for the number of heap pages whose visibility map bits
2238 : * cannot be set on account of bypassing index and heap vacuuming.
2239 : *
2240 : * We apply one further precautionary test: the space currently used
2241 : * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2242 : * not exceed 32MB. This limits the risk that we will bypass index
2243 : * vacuuming again and again until eventually there is a VACUUM whose
2244 : * dead_items space is not CPU cache resident.
2245 : *
2246 : * We don't take any special steps to remember the LP_DEAD items (such
2247 : * as counting them in our final update to the stats system) when the
2248 : * optimization is applied. Though the accounting used in analyze.c's
2249 : * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2250 : * rows in its own stats report, that's okay. The discrepancy should
2251 : * be negligible. If this optimization is ever expanded to cover more
2252 : * cases then this may need to be reconsidered.
2253 : */
732 pg 2254 GIC 1745 : threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
660 2255 1747 : bypass = (vacrel->lpdead_item_pages < threshold &&
496 2256 2 : vacrel->lpdead_items < MAXDEADITEMS(32L * 1024L * 1024L));
2257 : }
2258 :
660 2259 1756 : if (bypass)
2260 : {
2261 : /*
2262 : * There are almost zero TIDs. Behave as if there were precisely
2263 : * zero: bypass index vacuuming, but do index cleanup.
2264 : *
2265 : * We expect that the ongoing VACUUM operation will finish very
732 pg 2266 ECB : * quickly, so there is no point in considering speeding up as a
2267 : * failsafe against wraparound failure. (Index cleanup is expected to
2268 : * finish very quickly in cases where there were no ambulkdelete()
2269 : * calls.)
2270 : */
732 pg 2271 CBC 2 : vacrel->do_index_vacuuming = false;
2272 : }
732 pg 2273 GIC 1754 : else if (lazy_vacuum_all_indexes(vacrel))
2274 : {
2275 : /*
2276 : * We successfully completed a round of index vacuuming. Do related
2277 : * heap vacuuming now.
2278 : */
2279 1754 : lazy_vacuum_heap_rel(vacrel);
2280 : }
2281 : else
2282 : {
732 pg 2283 ECB : /*
2284 : * Failsafe case.
2285 : *
2286 : * We attempted index vacuuming, but didn't finish a full round/full
2287 : * index scan. This happens when relfrozenxid or relminmxid is too
2288 : * far in the past.
2289 : *
2290 : * From this point on the VACUUM operation will do no further index
726 2291 : * vacuuming or heap vacuuming. This VACUUM operation won't end up
2292 : * back here again.
2293 : */
2 dgustafsson 2294 UNC 0 : Assert(VacuumFailsafeActive);
2295 : }
2296 :
2297 : /*
2298 : * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2299 : * vacuum)
2300 : */
496 pg 2301 GIC 1756 : vacrel->dead_items->num_items = 0;
2302 : }
2303 :
2304 : /*
2305 : * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
732 pg 2306 EUB : *
2307 : * Returns true in the common case when all indexes were successfully
2308 : * vacuumed. Returns false in rare cases where we determined that the ongoing
2309 : * VACUUM operation is at risk of taking too long to finish, leading to
2310 : * wraparound failure.
2311 : */
2312 : static bool
734 pg 2313 CBC 1754 : lazy_vacuum_all_indexes(LVRelState *vacrel)
2314 : {
732 pg 2315 GIC 1754 : bool allindexes = true;
108 pg 2316 GNC 1754 : double old_live_tuples = vacrel->rel->rd_rel->reltuples;
2317 :
734 pg 2318 GIC 1754 : Assert(vacrel->nindexes > 0);
733 2319 1754 : Assert(vacrel->do_index_vacuuming);
2320 1754 : Assert(vacrel->do_index_cleanup);
2321 :
2322 : /* Precheck for XID wraparound emergencies */
732 2323 1754 : if (lazy_check_wraparound_failsafe(vacrel))
2324 : {
2325 : /* Wraparound emergency -- don't even start an index scan */
732 pg 2326 LBC 0 : return false;
2327 : }
732 pg 2328 ECB :
1200 michael 2329 : /* Report that we are now vacuuming indexes */
1200 michael 2330 GIC 1754 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
1200 michael 2331 ECB : PROGRESS_VACUUM_PHASE_VACUUM_INDEX);
2332 :
734 pg 2333 CBC 1754 : if (!ParallelVacuumIsActive(vacrel))
2334 : {
734 pg 2335 GIC 5531 : for (int idx = 0; idx < vacrel->nindexes; idx++)
734 pg 2336 ECB : {
734 pg 2337 GIC 3782 : Relation indrel = vacrel->indrels[idx];
2338 3782 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
1175 akapila 2339 EUB :
108 pg 2340 GNC 3782 : vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2341 : old_live_tuples,
2342 : vacrel);
732 pg 2343 ECB :
732 pg 2344 GIC 3782 : if (lazy_check_wraparound_failsafe(vacrel))
2345 : {
732 pg 2346 ECB : /* Wraparound emergency -- end current index scan */
732 pg 2347 UIC 0 : allindexes = false;
732 pg 2348 LBC 0 : break;
2349 : }
734 pg 2350 ECB : }
1175 akapila 2351 : }
2352 : else
2353 : {
2354 : /* Outsource everything to parallel variant */
108 pg 2355 GNC 5 : parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples,
2356 : vacrel->num_index_scans);
732 pg 2357 ECB :
2358 : /*
2359 : * Do a postcheck to consider applying wraparound failsafe now. Note
732 pg 2360 EUB : * that parallel VACUUM only gets the precheck and this postcheck.
2361 : */
732 pg 2362 GIC 5 : if (lazy_check_wraparound_failsafe(vacrel))
732 pg 2363 UIC 0 : allindexes = false;
2364 : }
2365 :
2366 : /*
2367 : * We delete all LP_DEAD items from the first heap pass in all indexes on
732 pg 2368 ECB : * each call here (except calls where we choose to do the failsafe). This
2369 : * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2370 : * of the failsafe triggering, which prevents the next call from taking
2371 : * place).
2372 : */
733 pg 2373 GIC 1754 : Assert(vacrel->num_index_scans > 0 ||
2374 : vacrel->dead_items->num_items == vacrel->lpdead_items);
2 dgustafsson 2375 GNC 1754 : Assert(allindexes || VacuumFailsafeActive);
733 pg 2376 EUB :
2377 : /*
2378 : * Increase and report the number of index scans.
2379 : *
2380 : * We deliberately include the case where we started a round of bulk
2381 : * deletes that we weren't able to finish due to the failsafe triggering.
2382 : */
734 pg 2383 GIC 1754 : vacrel->num_index_scans++;
1200 michael 2384 1754 : pgstat_progress_update_param(PROGRESS_VACUUM_NUM_INDEX_VACUUMS,
734 pg 2385 1754 : vacrel->num_index_scans);
732 pg 2386 ECB :
732 pg 2387 GIC 1754 : return allindexes;
1200 michael 2388 ECB : }
2389 :
2390 : /*
2391 : * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2392 : *
2393 : * This routine marks LP_DEAD items in vacrel->dead_items array as LP_UNUSED.
2394 : * Pages that never had lazy_scan_prune record LP_DEAD items are not visited
2395 : * at all.
7940 tgl 2396 : *
732 pg 2397 : * We may also be able to truncate the line pointer array of the heap pages we
2398 : * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2399 : * array, it can be reclaimed as free space. These LP_UNUSED items usually
2400 : * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2401 : * each page to LP_UNUSED, and then consider if it's possible to truncate the
2402 : * page's line pointer array).
2403 : *
2404 : * Note: the reason for doing this as a second pass is we cannot remove the
2405 : * tuples until we've removed their index entries, and we want to process
2406 : * index entry removal in batches as large as possible.
2407 : */
2408 : static void
734 pg 2409 GIC 1754 : lazy_vacuum_heap_rel(LVRelState *vacrel)
2410 : {
88 pg 2411 GNC 1754 : int index = 0;
2412 1754 : BlockNumber vacuumed_pages = 0;
3707 heikki.linnakangas 2413 GIC 1754 : Buffer vmbuffer = InvalidBuffer;
2414 : LVSavedErrInfo saved_err_info;
2415 :
733 pg 2416 1754 : Assert(vacrel->do_index_vacuuming);
2417 1754 : Assert(vacrel->do_index_cleanup);
2418 1754 : Assert(vacrel->num_index_scans > 0);
2419 :
2420 : /* Report that we are now vacuuming the heap */
1200 michael 2421 1754 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
1200 michael 2422 ECB : PROGRESS_VACUUM_PHASE_VACUUM_HEAP);
2423 :
1105 akapila 2424 : /* Update error traceback information */
734 pg 2425 CBC 1754 : update_vacuum_error_info(vacrel, &saved_err_info,
734 pg 2426 ECB : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
2427 : InvalidBlockNumber, InvalidOffsetNumber);
2428 :
496 pg 2429 GIC 26951 : while (index < vacrel->dead_items->num_items)
2430 : {
2431 : BlockNumber blkno;
2432 : Buffer buf;
2433 : Page page;
2434 : Size freespace;
7940 tgl 2435 ECB :
6998 tgl 2436 GIC 25197 : vacuum_delay_point();
2437 :
88 pg 2438 GNC 25197 : blkno = ItemPointerGetBlockNumber(&vacrel->dead_items->items[index]);
2439 25197 : vacrel->blkno = blkno;
2440 :
2441 : /*
2442 : * Pin the visibility map page in case we need to mark the page
2443 : * all-visible. In most cases this will be very cheap, because we'll
2444 : * already have the correct page pinned anyway.
2445 : */
83 2446 25197 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2447 :
2448 : /* We need a non-cleanup exclusive lock to mark dead_items unused */
88 2449 25197 : buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
2450 : vacrel->bstrategy);
733 pg 2451 GIC 25197 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
83 pg 2452 GNC 25197 : index = lazy_vacuum_heap_page(vacrel, blkno, buf, index, vmbuffer);
2453 :
2454 : /* Now that we've vacuumed the page, record its available space */
2545 kgrittn 2455 CBC 25197 : page = BufferGetPage(buf);
5304 heikki.linnakangas 2456 GIC 25197 : freespace = PageGetHeapFreeSpace(page);
5304 heikki.linnakangas 2457 ECB :
6218 tgl 2458 CBC 25197 : UnlockReleaseBuffer(buf);
88 pg 2459 GNC 25197 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
734 pg 2460 GIC 25197 : vacuumed_pages++;
2461 : }
2462 :
2463 1754 : vacrel->blkno = InvalidBlockNumber;
3707 heikki.linnakangas 2464 1754 : if (BufferIsValid(vmbuffer))
3707 heikki.linnakangas 2465 CBC 1754 : ReleaseBuffer(vmbuffer);
3707 heikki.linnakangas 2466 ECB :
2467 : /*
2468 : * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
733 pg 2469 : * the second heap pass. No more, no less.
2470 : */
496 pg 2471 GIC 1754 : Assert(index > 0);
733 pg 2472 CBC 1754 : Assert(vacrel->num_index_scans > 1 ||
496 pg 2473 ECB : (index == vacrel->lpdead_items &&
733 2474 : vacuumed_pages == vacrel->lpdead_item_pages));
2475 :
450 pg 2476 GIC 1754 : ereport(DEBUG2,
610 peter 2477 ECB : (errmsg("table \"%s\": removed %lld dead item identifiers in %u pages",
450 pg 2478 : vacrel->relname, (long long) index, vacuumed_pages)));
1105 akapila 2479 :
2480 : /* Revert to the previous phase information for error traceback */
734 pg 2481 GIC 1754 : restore_vacuum_error_info(vacrel, &saved_err_info);
7940 tgl 2482 1754 : }
2483 :
2484 : /*
733 pg 2485 ECB : * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
496 2486 : * vacrel->dead_items array.
2487 : *
2488 : * Caller must have an exclusive buffer lock on the buffer (though a full
2489 : * cleanup lock is also acceptable). vmbuffer must be valid and already have
2490 : * a pin on blkno's visibility map page.
7940 tgl 2491 : *
2492 : * index is an offset into the vacrel->dead_items array for the first listed
2493 : * LP_DEAD item on the page. The return value is the first index immediately
2494 : * after all LP_DEAD items for the same page in the array.
2495 : */
2496 : static int
734 pg 2497 CBC 25554 : lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
2498 : int index, Buffer vmbuffer)
2499 : {
473 akapila 2500 GIC 25554 : VacDeadItems *dead_items = vacrel->dead_items;
2545 kgrittn 2501 25554 : Page page = BufferGetPage(buffer);
2502 : OffsetNumber unused[MaxHeapTuplesPerPage];
88 pg 2503 GNC 25554 : int nunused = 0;
2504 : TransactionId visibility_cutoff_xid;
2505 : bool all_frozen;
2506 : LVSavedErrInfo saved_err_info;
2507 :
733 pg 2508 GIC 25554 : Assert(vacrel->nindexes == 0 || vacrel->do_index_vacuuming);
2509 :
2581 rhaas 2510 25554 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
2511 :
1105 akapila 2512 ECB : /* Update error traceback information */
734 pg 2513 GIC 25554 : update_vacuum_error_info(vacrel, &saved_err_info,
2514 : VACUUM_ERRCB_PHASE_VACUUM_HEAP, blkno,
734 pg 2515 ECB : InvalidOffsetNumber);
1105 akapila 2516 :
7940 tgl 2517 GIC 25554 : START_CRIT_SECTION();
6218 tgl 2518 ECB :
496 pg 2519 GIC 845958 : for (; index < dead_items->num_items; index++)
2520 : {
2521 : BlockNumber tblk;
2522 : OffsetNumber toff;
5680 tgl 2523 ECB : ItemId itemid;
2524 :
496 pg 2525 CBC 843847 : tblk = ItemPointerGetBlockNumber(&dead_items->items[index]);
7940 tgl 2526 GIC 843847 : if (tblk != blkno)
2527 23443 : break; /* past end of tuples for this block */
496 pg 2528 CBC 820404 : toff = ItemPointerGetOffsetNumber(&dead_items->items[index]);
7940 tgl 2529 GIC 820404 : itemid = PageGetItemId(page, toff);
2530 :
733 pg 2531 820404 : Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
5688 tgl 2532 CBC 820404 : ItemIdSetUnused(itemid);
88 pg 2533 GNC 820404 : unused[nunused++] = toff;
7940 tgl 2534 ECB : }
2535 :
88 pg 2536 GNC 25554 : Assert(nunused > 0);
2537 :
2538 : /* Attempt to truncate line pointer array now */
732 pg 2539 GIC 25554 : PageTruncateLinePointerArray(page);
7940 tgl 2540 ECB :
3631 simon 2541 : /*
2542 : * Mark buffer dirty before we write WAL.
2543 : */
3631 simon 2544 CBC 25554 : MarkBufferDirty(buffer);
2545 :
3404 heikki.linnakangas 2546 ECB : /* XLOG stuff */
734 pg 2547 CBC 25554 : if (RelationNeedsWAL(vacrel->rel))
3404 heikki.linnakangas 2548 ECB : {
2549 : xl_heap_vacuum xlrec;
2550 : XLogRecPtr recptr;
2551 :
88 pg 2552 GNC 24803 : xlrec.nunused = nunused;
2553 :
733 pg 2554 CBC 24803 : XLogBeginInsert();
733 pg 2555 GIC 24803 : XLogRegisterData((char *) &xlrec, SizeOfHeapVacuum);
2556 :
2557 24803 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
88 pg 2558 GNC 24803 : XLogRegisterBufData(0, (char *) unused, nunused * sizeof(OffsetNumber));
733 pg 2559 ECB :
733 pg 2560 GIC 24803 : recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_VACUUM);
2561 :
3404 heikki.linnakangas 2562 CBC 24803 : PageSetLSN(page, recptr);
2563 : }
2564 :
2565 : /*
2566 : * End critical section, so we safely can do visibility tests (which
3215 andres 2567 ECB : * possibly need to perform IO and allocate memory!). If we crash now the
2568 : * page (including the corresponding vm bit) might not be marked all
2569 : * visible, but that's fine. A later vacuum will fix that.
2570 : */
3215 andres 2571 GIC 25554 : END_CRIT_SECTION();
3215 andres 2572 ECB :
2573 : /*
2574 : * Now that we have removed the LD_DEAD items from the page, once again
2575 : * check if the page has become all-visible. The page is already marked
2576 : * dirty, exclusively locked, and, if needed, a full page image has been
733 pg 2577 : * emitted.
2578 : */
83 pg 2579 GNC 25554 : Assert(!PageIsAllVisible(page));
734 pg 2580 GIC 25554 : if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid,
2581 : &all_frozen))
2582 : {
83 pg 2583 GNC 25360 : uint8 flags = VISIBILITYMAP_ALL_VISIBLE;
2584 :
2585 25360 : if (all_frozen)
2586 : {
2587 20658 : Assert(!TransactionIdIsValid(visibility_cutoff_xid));
2595 rhaas 2588 CBC 20658 : flags |= VISIBILITYMAP_ALL_FROZEN;
2589 : }
2590 :
83 pg 2591 GNC 25360 : PageSetAllVisible(page);
2592 25360 : visibilitymap_set(vacrel->rel, blkno, buffer, InvalidXLogRecPtr,
2593 : vmbuffer, visibility_cutoff_xid, flags);
2594 : }
2595 :
1105 akapila 2596 ECB : /* Revert to the previous phase information for error traceback */
734 pg 2597 CBC 25554 : restore_vacuum_error_info(vacrel, &saved_err_info);
496 pg 2598 GIC 25554 : return index;
2599 : }
2600 :
2601 : /*
732 pg 2602 ECB : * Trigger the failsafe to avoid wraparound failure when vacrel table has a
2603 : * relfrozenxid and/or relminmxid that is dangerously far in the past.
2604 : * Triggering the failsafe makes the ongoing VACUUM bypass any further index
2605 : * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
2606 : *
2607 : * Any remaining work (work that VACUUM cannot just bypass) is typically sped
2608 : * up when the failsafe triggers. VACUUM stops applying any cost-based delay
2609 : * that it started out with.
2610 : *
2611 : * Returns true when failsafe has been triggered.
2612 : */
2613 : static bool
732 pg 2614 GIC 42280 : lazy_check_wraparound_failsafe(LVRelState *vacrel)
2615 : {
732 pg 2616 ECB : /* Don't warn more than once per VACUUM */
2 dgustafsson 2617 GNC 42280 : if (VacuumFailsafeActive)
732 pg 2618 UIC 0 : return true;
732 pg 2619 ECB :
108 pg 2620 GNC 42280 : if (unlikely(vacuum_xid_failsafe_check(&vacrel->cutoffs)))
732 pg 2621 ECB : {
2 dgustafsson 2622 UNC 0 : VacuumFailsafeActive = true;
2623 :
2624 : /*
2625 : * Abandon use of a buffer access strategy to allow use of all of
2626 : * shared buffers. We assume the caller who allocated the memory for
2627 : * the BufferAccessStrategy will free it.
2628 : */
6 drowley 2629 0 : vacrel->bstrategy = NULL;
6 drowley 2630 EUB :
2631 : /* Disable index vacuuming, index cleanup, and heap rel truncation */
732 pg 2632 UIC 0 : vacrel->do_index_vacuuming = false;
2633 0 : vacrel->do_index_cleanup = false;
660 2634 0 : vacrel->do_rel_truncate = false;
2635 :
732 2636 0 : ereport(WARNING,
685 pg 2637 EUB : (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
2638 : vacrel->dbname, vacrel->relnamespace, vacrel->relname,
732 2639 : vacrel->num_index_scans),
610 peter 2640 : errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
2641 : errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
732 pg 2642 : "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
2643 :
2644 : /* Stop applying cost limits from this point on */
732 pg 2645 UIC 0 : VacuumCostActive = false;
2646 0 : VacuumCostBalance = 0;
2647 :
2648 0 : return true;
2649 : }
2650 :
732 pg 2651 GBC 42280 : return false;
732 pg 2652 EUB : }
2653 :
1175 akapila 2654 : /*
2655 : * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
2656 : */
1175 akapila 2657 ECB : static void
734 pg 2658 GIC 34805 : lazy_cleanup_all_indexes(LVRelState *vacrel)
2659 : {
393 2660 34805 : double reltuples = vacrel->new_rel_tuples;
2661 34805 : bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
2662 :
2663 34805 : Assert(vacrel->do_index_cleanup);
734 pg 2664 CBC 34805 : Assert(vacrel->nindexes > 0);
2665 :
1175 akapila 2666 ECB : /* Report that we are now cleaning up indexes */
1175 akapila 2667 CBC 34805 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
2668 : PROGRESS_VACUUM_PHASE_INDEX_CLEANUP);
1175 akapila 2669 ECB :
734 pg 2670 CBC 34805 : if (!ParallelVacuumIsActive(vacrel))
2671 : {
734 pg 2672 GIC 89389 : for (int idx = 0; idx < vacrel->nindexes; idx++)
734 pg 2673 ECB : {
734 pg 2674 GIC 54593 : Relation indrel = vacrel->indrels[idx];
2675 54593 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
1175 akapila 2676 ECB :
734 pg 2677 GIC 54593 : vacrel->indstats[idx] =
734 pg 2678 CBC 54593 : lazy_cleanup_one_index(indrel, istat, reltuples,
2679 : estimated_count, vacrel);
734 pg 2680 ECB : }
1175 akapila 2681 : }
2682 : else
2683 : {
734 pg 2684 : /* Outsource everything to parallel variant */
393 pg 2685 GIC 9 : parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples,
2686 : vacrel->num_index_scans,
2687 : estimated_count);
2688 : }
1175 akapila 2689 34805 : }
2690 :
7935 tgl 2691 ECB : /*
2692 : * lazy_vacuum_one_index() -- vacuum index relation.
2693 : *
2694 : * Delete all the index tuples containing a TID collected in
496 pg 2695 : * vacrel->dead_items array. Also update running statistics.
2696 : * Exact details depend on index AM's ambulkdelete routine.
2697 : *
2698 : * reltuples is the number of heap tuples to be passed to the
2699 : * bulkdelete callback. It's always assumed to be estimated.
2700 : * See indexam.sgml for more info.
2701 : *
2702 : * Returns bulk delete stats derived from input stats
2703 : */
2704 : static IndexBulkDeleteResult *
734 pg 2705 GIC 3782 : lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat,
2706 : double reltuples, LVRelState *vacrel)
2707 : {
2708 : IndexVacuumInfo ivinfo;
2709 : LVSavedErrInfo saved_err_info;
2710 :
6186 tgl 2711 CBC 3782 : ivinfo.index = indrel;
6 pg 2712 GNC 3782 : ivinfo.heaprel = vacrel->rel;
5129 tgl 2713 GIC 3782 : ivinfo.analyze_only = false;
1467 alvherre 2714 3782 : ivinfo.report_progress = false;
5055 tgl 2715 3782 : ivinfo.estimated_count = true;
450 pg 2716 3782 : ivinfo.message_level = DEBUG2;
1175 akapila 2717 3782 : ivinfo.num_heap_tuples = reltuples;
734 pg 2718 CBC 3782 : ivinfo.strategy = vacrel->bstrategy;
7351 tgl 2719 ECB :
1012 akapila 2720 : /*
2721 : * Update error traceback information.
2722 : *
2723 : * The index name is saved during this phase and restored immediately
2724 : * after this phase. See vacuum_error_callback.
2725 : */
734 pg 2726 GIC 3782 : Assert(vacrel->indname == NULL);
2727 3782 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
2728 3782 : update_vacuum_error_info(vacrel, &saved_err_info,
2729 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
2730 : InvalidBlockNumber, InvalidOffsetNumber);
2731 :
2732 : /* Do bulk deletion */
473 akapila 2733 CBC 3782 : istat = vac_bulkdel_one_index(&ivinfo, istat, (void *) vacrel->dead_items);
1105 akapila 2734 ECB :
2735 : /* Revert to the previous phase information for error traceback */
734 pg 2736 GIC 3782 : restore_vacuum_error_info(vacrel, &saved_err_info);
2737 3782 : pfree(vacrel->indname);
2738 3782 : vacrel->indname = NULL;
2739 :
734 pg 2740 CBC 3782 : return istat;
2741 : }
2742 :
7940 tgl 2743 ECB : /*
734 pg 2744 : * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
1175 akapila 2745 : *
2746 : * Calls index AM's amvacuumcleanup routine. reltuples is the number
496 pg 2747 : * of heap tuples and estimated_count is true if reltuples is an
2748 : * estimated value. See indexam.sgml for more info.
2749 : *
2750 : * Returns bulk delete stats derived from input stats
2751 : */
2752 : static IndexBulkDeleteResult *
734 pg 2753 GIC 54593 : lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat,
2754 : double reltuples, bool estimated_count,
2755 : LVRelState *vacrel)
2756 : {
2757 : IndexVacuumInfo ivinfo;
2758 : LVSavedErrInfo saved_err_info;
2759 :
6186 tgl 2760 CBC 54593 : ivinfo.index = indrel;
6 pg 2761 GNC 54593 : ivinfo.heaprel = vacrel->rel;
5129 tgl 2762 GIC 54593 : ivinfo.analyze_only = false;
1467 alvherre 2763 54593 : ivinfo.report_progress = false;
1175 akapila 2764 54593 : ivinfo.estimated_count = estimated_count;
450 pg 2765 54593 : ivinfo.message_level = DEBUG2;
2766 :
1175 akapila 2767 54593 : ivinfo.num_heap_tuples = reltuples;
734 pg 2768 CBC 54593 : ivinfo.strategy = vacrel->bstrategy;
7351 tgl 2769 ECB :
1012 akapila 2770 : /*
2771 : * Update error traceback information.
2772 : *
2773 : * The index name is saved during this phase and restored immediately
2774 : * after this phase. See vacuum_error_callback.
2775 : */
734 pg 2776 CBC 54593 : Assert(vacrel->indname == NULL);
734 pg 2777 GIC 54593 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
2778 54593 : update_vacuum_error_info(vacrel, &saved_err_info,
2779 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
2780 : InvalidBlockNumber, InvalidOffsetNumber);
2781 :
473 akapila 2782 54593 : istat = vac_cleanup_one_index(&ivinfo, istat);
2783 :
956 akapila 2784 ECB : /* Revert to the previous phase information for error traceback */
734 pg 2785 CBC 54593 : restore_vacuum_error_info(vacrel, &saved_err_info);
2786 54593 : pfree(vacrel->indname);
734 pg 2787 GIC 54593 : vacrel->indname = NULL;
2788 :
2789 54593 : return istat;
7940 tgl 2790 ECB : }
2791 :
2792 : /*
2657 2793 : * should_attempt_truncation - should we attempt to truncate the heap?
2794 : *
2795 : * Don't even think about it unless we have a shot at releasing a goodly
2796 : * number of pages. Otherwise, the time taken isn't worth it, mainly because
422 pg 2797 : * an AccessExclusive lock must be replayed on any hot standby, where it can
2798 : * be particularly disruptive.
2799 : *
2800 : * Also don't attempt it if wraparound failsafe is in effect. The entire
2801 : * system might be refusing to allocate new XIDs at this point. The system
2802 : * definitely won't return to normal unless and until VACUUM actually advances
2803 : * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
2804 : * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
2805 : * truncate the table under these circumstances, an XID exhaustion error might
2806 : * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
2807 : * There is very little chance of truncation working out when the failsafe is
2808 : * in effect in any case. lazy_scan_prune makes the optimistic assumption
2809 : * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
2810 : * we're called.
2811 : *
2812 : * Also don't attempt it if we are doing early pruning/vacuuming, because a
2813 : * scan which cannot find a truncated heap page cannot determine that the
2814 : * snapshot is too old to read that page.
2815 : */
2816 : static bool
660 pg 2817 GIC 36739 : should_attempt_truncation(LVRelState *vacrel)
2818 : {
2819 : BlockNumber possibly_freeable;
2820 :
2 dgustafsson 2821 GNC 36739 : if (!vacrel->do_rel_truncate || VacuumFailsafeActive ||
393 pg 2822 GIC 36619 : old_snapshot_threshold >= 0)
726 2823 123 : return false;
2824 :
734 pg 2825 CBC 36616 : possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
2657 tgl 2826 GIC 36616 : if (possibly_freeable > 0 &&
2827 107 : (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
393 pg 2828 107 : possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION))
2657 tgl 2829 CBC 95 : return true;
393 pg 2830 ECB :
393 pg 2831 CBC 36521 : return false;
2832 : }
2657 tgl 2833 ECB :
7940 2834 : /*
2835 : * lazy_truncate_heap - try to truncate off any empty pages at the end
2836 : */
4807 2837 : static void
734 pg 2838 GIC 95 : lazy_truncate_heap(LVRelState *vacrel)
7940 tgl 2839 ECB : {
586 pg 2840 GIC 95 : BlockNumber orig_rel_pages = vacrel->rel_pages;
2841 : BlockNumber new_rel_pages;
2842 : bool lock_waiter_detected;
2843 : int lock_retry;
2844 :
2845 : /* Report that we are now truncating */
2581 rhaas 2846 CBC 95 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
2847 : PROGRESS_VACUUM_PHASE_TRUNCATE);
2581 rhaas 2848 ECB :
2849 : /* Update error traceback information one last time */
393 pg 2850 GIC 95 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_TRUNCATE,
2851 : vacrel->nonempty_pages, InvalidOffsetNumber);
2852 :
2853 : /*
3771 kgrittn 2854 ECB : * Loop until no more truncating can be done.
2855 : */
2856 : do
2857 : {
4332 tgl 2858 : /*
2859 : * We need full exclusive lock on the relation in order to do
2860 : * truncation. If we can't get it, give up rather than waiting --- we
2861 : * don't want to block other backends, and we don't want to deadlock
2862 : * (which is quite possible considering we already hold a lower-grade
2863 : * lock).
2864 : */
663 pg 2865 GIC 95 : lock_waiter_detected = false;
3771 kgrittn 2866 95 : lock_retry = 0;
2867 : while (true)
2868 : {
734 pg 2869 295 : if (ConditionalLockRelation(vacrel->rel, AccessExclusiveLock))
3771 kgrittn 2870 93 : break;
2871 :
2872 : /*
3771 kgrittn 2873 ECB : * Check for interrupts while trying to (re-)acquire the exclusive
2874 : * lock.
2875 : */
3771 kgrittn 2876 GIC 202 : CHECK_FOR_INTERRUPTS();
7940 tgl 2877 ECB :
3632 kgrittn 2878 CBC 202 : if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
2879 : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL))
2880 : {
2881 : /*
2882 : * We failed to establish the lock in the specified number of
2883 : * retries. This means we give up truncating.
3771 kgrittn 2884 ECB : */
450 pg 2885 GIC 2 : ereport(vacrel->verbose ? INFO : DEBUG2,
3632 kgrittn 2886 ECB : (errmsg("\"%s\": stopping truncate due to conflicting lock request",
2887 : vacrel->relname)));
3771 kgrittn 2888 GIC 3 : return;
2889 : }
2890 :
646 michael 2891 200 : (void) WaitLatch(MyLatch,
2892 : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
646 michael 2893 ECB : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL,
2894 : WAIT_EVENT_VACUUM_TRUNCATE);
646 michael 2895 GIC 200 : ResetLatch(MyLatch);
3771 kgrittn 2896 ECB : }
2897 :
2898 : /*
2899 : * Now that we have exclusive lock, look to see if the rel has grown
2900 : * whilst we were vacuuming with non-exclusive lock. If so, give up;
2901 : * the newly added pages presumably contain non-deletable tuples.
2902 : */
734 pg 2903 CBC 93 : new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel);
586 pg 2904 GIC 93 : if (new_rel_pages != orig_rel_pages)
2905 : {
2906 : /*
2907 : * Note: we intentionally don't update vacrel->rel_pages with the
2908 : * new rel size here. If we did, it would amount to assuming that
2909 : * the new pages are empty, which is unlikely. Leaving the numbers
2910 : * alone amounts to assuming that the new pages have the same
734 pg 2911 ECB : * tuple density as existing ones, which is less unlikely.
3771 kgrittn 2912 : */
734 pg 2913 UIC 0 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3771 kgrittn 2914 0 : return;
2915 : }
2916 :
2917 : /*
2918 : * Scan backwards from the end to verify that the end pages actually
2919 : * contain no tuples. This is *necessary*, not optional, because
2920 : * other backends could have added tuples to these pages whilst we
3771 kgrittn 2921 EUB : * were vacuuming.
2922 : */
663 pg 2923 GIC 93 : new_rel_pages = count_nondeletable_pages(vacrel, &lock_waiter_detected);
734 2924 93 : vacrel->blkno = new_rel_pages;
2925 :
586 2926 93 : if (new_rel_pages >= orig_rel_pages)
2927 : {
2928 : /* can't do anything after all */
734 2929 1 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3771 kgrittn 2930 1 : return;
3771 kgrittn 2931 ECB : }
2932 :
2933 : /*
2934 : * Okay to truncate.
2935 : */
734 pg 2936 GIC 92 : RelationTruncate(vacrel->rel, new_rel_pages);
3771 kgrittn 2937 ECB :
2938 : /*
2939 : * We can release the exclusive lock as soon as we have truncated.
2940 : * Other backends can't safely access the relation until they have
2941 : * processed the smgr invalidation that smgrtruncate sent out ... but
2942 : * that should happen as part of standard invalidation processing once
2943 : * they acquire lock on the relation.
2944 : */
734 pg 2945 GIC 92 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
2946 :
2947 : /*
2948 : * Update statistics. Here, it *is* correct to adjust rel_pages
2949 : * without also touching reltuples, since the tuple count wasn't
2950 : * changed by the truncation.
2951 : */
436 2952 92 : vacrel->removed_pages += orig_rel_pages - new_rel_pages;
734 pg 2953 CBC 92 : vacrel->rel_pages = new_rel_pages;
2954 :
450 pg 2955 GIC 92 : ereport(vacrel->verbose ? INFO : DEBUG2,
2956 : (errmsg("table \"%s\": truncated %u to %u pages",
2957 : vacrel->relname,
2958 : orig_rel_pages, new_rel_pages)));
586 2959 92 : orig_rel_pages = new_rel_pages;
663 pg 2960 CBC 92 : } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
7940 tgl 2961 ECB : }
2962 :
2963 : /*
2964 : * Rescan end pages to verify that they are (still) empty of tuples.
2965 : *
2966 : * Returns number of nondeletable pages (last nonempty page + 1).
2967 : */
2968 : static BlockNumber
663 pg 2969 GIC 93 : count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
2970 : {
2971 : BlockNumber blkno;
2972 : BlockNumber prefetchedUntil;
2973 : instr_time starttime;
2974 :
2975 : /* Initialize the starttime if we check for conflicting lock requests */
3771 kgrittn 2976 93 : INSTR_TIME_SET_CURRENT(starttime);
7940 tgl 2977 ECB :
2978 : /*
2979 : * Start checking blocks at what we believe relation end to be and move
2980 : * backwards. (Strange coding of loop control is needed because blkno is
2981 : * unsigned.) To make the scan faster, we prefetch a few blocks at a time
2982 : * in forward direction, so that OS-level readahead can kick in.
2983 : */
734 pg 2984 CBC 93 : blkno = vacrel->rel_pages;
2985 : StaticAssertStmt((PREFETCH_SIZE & (PREFETCH_SIZE - 1)) == 0,
2986 : "prefetch size must be power of 2");
2267 alvherre 2987 GIC 93 : prefetchedUntil = InvalidBlockNumber;
734 pg 2988 1409 : while (blkno > vacrel->nonempty_pages)
2989 : {
2990 : Buffer buf;
2991 : Page page;
7940 tgl 2992 ECB : OffsetNumber offnum,
2993 : maxoff;
2994 : bool hastup;
2995 :
3771 kgrittn 2996 : /*
2997 : * Check if another process requests a lock on our relation. We are
2998 : * holding an AccessExclusiveLock here, so they will be waiting. We
2999 : * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
3000 : * only check if that interval has elapsed once every 32 blocks to
3001 : * keep the number of system calls and actual shared lock table
3002 : * lookups to a minimum.
3003 : */
3771 kgrittn 3004 GIC 1320 : if ((blkno % 32) == 0)
3005 : {
3006 : instr_time currenttime;
3007 : instr_time elapsed;
3008 :
3009 32 : INSTR_TIME_SET_CURRENT(currenttime);
3010 32 : elapsed = currenttime;
3011 32 : INSTR_TIME_SUBTRACT(elapsed, starttime);
3771 kgrittn 3012 CBC 32 : if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3013 : >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL)
3014 : {
734 pg 3015 UIC 0 : if (LockHasWaitersRelation(vacrel->rel, AccessExclusiveLock))
3016 : {
450 pg 3017 LBC 0 : ereport(vacrel->verbose ? INFO : DEBUG2,
586 pg 3018 ECB : (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
734 3019 : vacrel->relname)));
3771 kgrittn 3020 :
663 pg 3021 UIC 0 : *lock_waiter_detected = true;
3771 kgrittn 3022 0 : return blkno;
3771 kgrittn 3023 EUB : }
3771 kgrittn 3024 UIC 0 : starttime = currenttime;
3771 kgrittn 3025 EUB : }
3026 : }
3027 :
3028 : /*
5690 alvherre 3029 : * We don't insert a vacuum delay point here, because we have an
5624 bruce 3030 : * exclusive lock on the table which we want to hold for as short a
3031 : * time as possible. We still need to check for interrupts however.
5690 alvherre 3032 : */
5688 alvherre 3033 GIC 1320 : CHECK_FOR_INTERRUPTS();
3034 :
7940 tgl 3035 1320 : blkno--;
3036 :
3037 : /* If we haven't prefetched this lot yet, do so now. */
2267 alvherre 3038 1320 : if (prefetchedUntil > blkno)
3039 : {
3040 : BlockNumber prefetchStart;
2153 bruce 3041 ECB : BlockNumber pblkno;
3042 :
2267 alvherre 3043 CBC 125 : prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
2267 alvherre 3044 GIC 1610 : for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3045 : {
734 pg 3046 CBC 1485 : PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno);
2267 alvherre 3047 GIC 1485 : CHECK_FOR_INTERRUPTS();
3048 : }
3049 125 : prefetchedUntil = prefetchStart;
3050 : }
2267 alvherre 3051 ECB :
734 pg 3052 CBC 1320 : buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
3053 : vacrel->bstrategy);
7940 tgl 3054 ECB :
3055 : /* In this phase we only need shared access to the buffer */
7940 tgl 3056 GIC 1320 : LockBuffer(buf, BUFFER_LOCK_SHARE);
7940 tgl 3057 ECB :
2545 kgrittn 3058 GIC 1320 : page = BufferGetPage(buf);
3059 :
7940 tgl 3060 CBC 1320 : if (PageIsNew(page) || PageIsEmpty(page))
3061 : {
6218 tgl 3062 UIC 0 : UnlockReleaseBuffer(buf);
7940 3063 0 : continue;
7940 tgl 3064 ECB : }
3065 :
7940 tgl 3066 CBC 1320 : hastup = false;
7940 tgl 3067 GIC 1320 : maxoff = PageGetMaxOffsetNumber(page);
7940 tgl 3068 CBC 1320 : for (offnum = FirstOffsetNumber;
7940 tgl 3069 GIC 2636 : offnum <= maxoff;
7940 tgl 3070 GBC 1316 : offnum = OffsetNumberNext(offnum))
7940 tgl 3071 EUB : {
3072 : ItemId itemid;
3073 :
7940 tgl 3074 CBC 1320 : itemid = PageGetItemId(page, offnum);
7940 tgl 3075 ECB :
5684 3076 : /*
3077 : * Note: any non-unused item should be taken as a reason to keep
660 pg 3078 : * this page. Even an LP_DEAD item makes truncation unsafe, since
3079 : * we must not have cleaned out its index entries.
3080 : */
5684 tgl 3081 GIC 1320 : if (ItemIdIsUsed(itemid))
7940 tgl 3082 ECB : {
7940 tgl 3083 GIC 4 : hastup = true;
3084 4 : break; /* can stop scanning */
3085 : }
3086 : } /* scan along page */
3087 :
6218 3088 1320 : UnlockReleaseBuffer(buf);
7940 tgl 3089 ECB :
3090 : /* Done scanning if we found a tuple here */
7940 tgl 3091 CBC 1320 : if (hastup)
3092 4 : return blkno + 1;
3093 : }
3094 :
3095 : /*
7940 tgl 3096 ECB : * If we fall out of the loop, all the previously-thought-to-be-empty
3097 : * pages still are; we need not bother to look at the last known-nonempty
3098 : * page.
3099 : */
734 pg 3100 CBC 89 : return vacrel->nonempty_pages;
3101 : }
3102 :
3103 : /*
3104 : * Returns the number of dead TIDs that VACUUM should allocate space to
3105 : * store, given a heap rel of size vacrel->rel_pages, and given current
3106 : * maintenance_work_mem setting (or current autovacuum_work_mem setting,
3107 : * when applicable).
496 pg 3108 ECB : *
3109 : * See the comments at the head of this file for rationale.
3110 : */
3111 : static int
496 pg 3112 GIC 36739 : dead_items_max_items(LVRelState *vacrel)
3113 : {
3114 : int64 max_items;
3260 bruce 3115 36739 : int vac_work_mem = IsAutoVacuumWorkerProcess() &&
3116 75 : autovacuum_work_mem != -1 ?
3117 36814 : autovacuum_work_mem : maintenance_work_mem;
3118 :
496 pg 3119 36739 : if (vacrel->nindexes > 0)
6052 tgl 3120 ECB : {
496 pg 3121 GIC 34933 : BlockNumber rel_pages = vacrel->rel_pages;
3122 :
496 pg 3123 CBC 34933 : max_items = MAXDEADITEMS(vac_work_mem * 1024L);
3124 34933 : max_items = Min(max_items, INT_MAX);
3125 34933 : max_items = Min(max_items, MAXDEADITEMS(MaxAllocSize));
3126 :
5676 alvherre 3127 ECB : /* curious coding here to ensure the multiplication can't overflow */
496 pg 3128 GIC 34933 : if ((BlockNumber) (max_items / MaxHeapTuplesPerPage) > rel_pages)
496 pg 3129 CBC 34933 : max_items = rel_pages * MaxHeapTuplesPerPage;
3130 :
6061 bruce 3131 ECB : /* stay sane if small maintenance_work_mem */
496 pg 3132 CBC 34933 : max_items = Max(max_items, MaxHeapTuplesPerPage);
6052 tgl 3133 ECB : }
3134 : else
3135 : {
496 pg 3136 : /* One-pass case only stores a single heap page's TIDs at a time */
496 pg 3137 CBC 1806 : max_items = MaxHeapTuplesPerPage;
3138 : }
3139 :
3140 36739 : return (int) max_items;
3141 : }
3142 :
3143 : /*
3144 : * Allocate dead_items (either using palloc, or in dynamic shared memory).
496 pg 3145 ECB : * Sets dead_items in vacrel for caller.
3146 : *
3147 : * Also handles parallel initialization as part of allocating dead_items in
3148 : * DSM when required.
3149 : */
3150 : static void
496 pg 3151 GIC 36739 : dead_items_alloc(LVRelState *vacrel, int nworkers)
3152 : {
3153 : VacDeadItems *dead_items;
3154 : int max_items;
3155 :
472 akapila 3156 36739 : max_items = dead_items_max_items(vacrel);
3157 36739 : Assert(max_items >= MaxHeapTuplesPerPage);
3158 :
734 pg 3159 ECB : /*
3160 : * Initialize state for a parallel vacuum. As of now, only one worker can
3161 : * be used for an index, so we invoke parallelism only if there are at
3162 : * least two indexes on a table.
3163 : */
732 pg 3164 CBC 36739 : if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
734 pg 3165 ECB : {
3166 : /*
3167 : * Since parallel workers cannot access data in temporary tables, we
3168 : * can't perform parallel vacuum on them.
3169 : */
734 pg 3170 GIC 15787 : if (RelationUsesLocalBuffers(vacrel->rel))
3171 : {
734 pg 3172 ECB : /*
3173 : * Give warning only if the user explicitly tries to perform a
3174 : * parallel vacuum on the temporary table.
3175 : */
734 pg 3176 GIC 3 : if (nworkers > 0)
3177 3 : ereport(WARNING,
734 pg 3178 ECB : (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3179 : vacrel->relname)));
3180 : }
3181 : else
472 akapila 3182 GIC 15784 : vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
3183 : vacrel->nindexes, nworkers,
450 pg 3184 ECB : max_items,
450 pg 3185 CBC 15784 : vacrel->verbose ? INFO : DEBUG2,
3186 : vacrel->bstrategy);
3187 :
3188 : /* If parallel mode started, dead_items space is allocated in DSM */
734 pg 3189 GIC 15787 : if (ParallelVacuumIsActive(vacrel))
472 akapila 3190 ECB : {
472 akapila 3191 GIC 9 : vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs);
734 pg 3192 9 : return;
472 akapila 3193 ECB : }
3194 : }
3195 :
3196 : /* Serial VACUUM case */
473 akapila 3197 CBC 36730 : dead_items = (VacDeadItems *) palloc(vac_max_items_to_alloc_size(max_items));
496 pg 3198 GIC 36730 : dead_items->max_items = max_items;
496 pg 3199 CBC 36730 : dead_items->num_items = 0;
1175 akapila 3200 ECB :
496 pg 3201 GIC 36730 : vacrel->dead_items = dead_items;
3202 : }
3203 :
3204 : /*
496 pg 3205 ECB : * Perform cleanup for resources allocated in dead_items_alloc
734 3206 : */
3207 : static void
496 pg 3208 GIC 36739 : dead_items_cleanup(LVRelState *vacrel)
734 pg 3209 ECB : {
734 pg 3210 GIC 36739 : if (!ParallelVacuumIsActive(vacrel))
3211 : {
3212 : /* Don't bother with pfree here */
3213 36730 : return;
3214 : }
3215 :
472 akapila 3216 ECB : /* End parallel mode */
472 akapila 3217 GIC 9 : parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
472 akapila 3218 CBC 9 : vacrel->pvs = NULL;
3219 : }
3220 :
3707 heikki.linnakangas 3221 ECB : /*
3222 : * Check if every tuple in the given page is visible to all current and future
3223 : * transactions. Also return the visibility_cutoff_xid which is the highest
3224 : * xmin amongst the visible tuples. Set *all_frozen to true if every tuple
2595 rhaas 3225 : * on this page is frozen.
514 pg 3226 : *
3227 : * This is a stripped down version of lazy_scan_prune(). If you change
3228 : * anything here, make sure that everything stays in sync. Note that an
3229 : * assertion calls us to verify that everybody still agrees. Be sure to avoid
3230 : * introducing new side-effects here.
3231 : */
3232 : static bool
734 pg 3233 GIC 159384 : heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
3234 : TransactionId *visibility_cutoff_xid,
3235 : bool *all_frozen)
3236 : {
2545 kgrittn 3237 159384 : Page page = BufferGetPage(buf);
2878 bruce 3238 159384 : BlockNumber blockno = BufferGetBlockNumber(buf);
3239 : OffsetNumber offnum,
3240 : maxoff;
3602 bruce 3241 CBC 159384 : bool all_visible = true;
3242 :
3707 heikki.linnakangas 3243 GIC 159384 : *visibility_cutoff_xid = InvalidTransactionId;
2595 rhaas 3244 159384 : *all_frozen = true;
3707 heikki.linnakangas 3245 ECB :
3707 heikki.linnakangas 3246 CBC 159384 : maxoff = PageGetMaxOffsetNumber(page);
3707 heikki.linnakangas 3247 GIC 159384 : for (offnum = FirstOffsetNumber;
3602 bruce 3248 9021375 : offnum <= maxoff && all_visible;
3602 bruce 3249 CBC 8861991 : offnum = OffsetNumberNext(offnum))
3250 : {
3602 bruce 3251 ECB : ItemId itemid;
3252 : HeapTupleData tuple;
3253 :
956 akapila 3254 : /*
3255 : * Set the offset number so that we can display it along with any
3256 : * error that occurred while processing this tuple.
3257 : */
734 pg 3258 GIC 8861992 : vacrel->offnum = offnum;
3707 heikki.linnakangas 3259 8861992 : itemid = PageGetItemId(page, offnum);
3260 :
3261 : /* Unused or redirect line pointers are of no interest */
3262 8861992 : if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3263 431347 : continue;
3264 :
3068 andres 3265 8430645 : ItemPointerSet(&(tuple.t_self), blockno, offnum);
3707 heikki.linnakangas 3266 ECB :
3267 : /*
3268 : * Dead line pointers can have index pointers pointing to them. So
3269 : * they can't be treated as visible
3270 : */
3707 heikki.linnakangas 3271 CBC 8430645 : if (ItemIdIsDead(itemid))
3272 : {
3273 1 : all_visible = false;
2501 rhaas 3274 GIC 1 : *all_frozen = false;
3707 heikki.linnakangas 3275 1 : break;
3276 : }
3277 :
3278 8430644 : Assert(ItemIdIsNormal(itemid));
3707 heikki.linnakangas 3279 ECB :
3707 heikki.linnakangas 3280 GIC 8430644 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3548 rhaas 3281 CBC 8430644 : tuple.t_len = ItemIdGetLength(itemid);
734 pg 3282 8430644 : tuple.t_tableOid = RelationGetRelid(vacrel->rel);
3707 heikki.linnakangas 3283 ECB :
108 pg 3284 GNC 8430644 : switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
3285 : buf))
3286 : {
3707 heikki.linnakangas 3287 CBC 8430508 : case HEAPTUPLE_LIVE:
3288 : {
3707 heikki.linnakangas 3289 ECB : TransactionId xmin;
3290 :
520 pg 3291 : /* Check comments in lazy_scan_prune. */
3395 rhaas 3292 GIC 8430508 : if (!HeapTupleHeaderXminCommitted(tuple.t_data))
3707 heikki.linnakangas 3293 ECB : {
3707 heikki.linnakangas 3294 UIC 0 : all_visible = false;
2501 rhaas 3295 0 : *all_frozen = false;
3707 heikki.linnakangas 3296 LBC 0 : break;
3297 : }
3298 :
3299 : /*
3300 : * The inserter definitely committed. But is it old enough
3602 bruce 3301 ECB : * that everyone sees it as committed?
3302 : */
3707 heikki.linnakangas 3303 GBC 8430508 : xmin = HeapTupleHeaderGetXmin(tuple.t_data);
108 pg 3304 GNC 8430508 : if (!TransactionIdPrecedes(xmin,
3305 : vacrel->cutoffs.OldestXmin))
3707 heikki.linnakangas 3306 EUB : {
3707 heikki.linnakangas 3307 GIC 57 : all_visible = false;
2501 rhaas 3308 57 : *all_frozen = false;
3707 heikki.linnakangas 3309 57 : break;
3310 : }
3311 :
3312 : /* Track newest xmin on page. */
97 pg 3313 GNC 8430451 : if (TransactionIdFollows(xmin, *visibility_cutoff_xid) &&
3314 : TransactionIdIsNormal(xmin))
3707 heikki.linnakangas 3315 CBC 24367 : *visibility_cutoff_xid = xmin;
3316 :
3317 : /* Check whether this tuple is already frozen or not */
2595 rhaas 3318 15458194 : if (all_visible && *all_frozen &&
3319 7027743 : heap_tuple_needs_eventual_freeze(tuple.t_data))
3320 13382 : *all_frozen = false;
3321 : }
3707 heikki.linnakangas 3322 GIC 8430451 : break;
3323 :
3707 heikki.linnakangas 3324 CBC 136 : case HEAPTUPLE_DEAD:
3325 : case HEAPTUPLE_RECENTLY_DEAD:
3707 heikki.linnakangas 3326 ECB : case HEAPTUPLE_INSERT_IN_PROGRESS:
3327 : case HEAPTUPLE_DELETE_IN_PROGRESS:
3328 : {
2501 rhaas 3329 CBC 136 : all_visible = false;
3330 136 : *all_frozen = false;
3331 136 : break;
3332 : }
3707 heikki.linnakangas 3333 LBC 0 : default:
3707 heikki.linnakangas 3334 UIC 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3707 heikki.linnakangas 3335 ECB : break;
3336 : }
3337 : } /* scan along page */
3338 :
3339 : /* Clear the offset information once we have processed the given page. */
734 pg 3340 CBC 159384 : vacrel->offnum = InvalidOffsetNumber;
956 akapila 3341 ECB :
3707 heikki.linnakangas 3342 CBC 159384 : return all_visible;
3343 : }
1175 akapila 3344 EUB :
3345 : /*
3346 : * Update index statistics in pg_class if the statistics are accurate.
3347 : */
3348 : static void
393 pg 3349 GIC 36607 : update_relstats_all_indexes(LVRelState *vacrel)
3350 : {
734 pg 3351 CBC 36607 : Relation *indrels = vacrel->indrels;
734 pg 3352 GIC 36607 : int nindexes = vacrel->nindexes;
734 pg 3353 CBC 36607 : IndexBulkDeleteResult **indstats = vacrel->indstats;
3354 :
393 pg 3355 GIC 36607 : Assert(vacrel->do_index_cleanup);
3356 :
734 3357 91245 : for (int idx = 0; idx < nindexes; idx++)
3358 : {
3359 54638 : Relation indrel = indrels[idx];
734 pg 3360 CBC 54638 : IndexBulkDeleteResult *istat = indstats[idx];
3361 :
3362 54638 : if (istat == NULL || istat->estimated_count)
1175 akapila 3363 50724 : continue;
1175 akapila 3364 ECB :
3365 : /* Update index statistics */
734 pg 3366 CBC 3914 : vac_update_relstats(indrel,
3367 : istat->num_pages,
734 pg 3368 ECB : istat->num_index_tuples,
3369 : 0,
1175 akapila 3370 : false,
3371 : InvalidTransactionId,
3372 : InvalidMultiXactId,
422 pg 3373 : NULL, NULL, false);
1175 akapila 3374 : }
1175 akapila 3375 GIC 36607 : }
3376 :
1175 akapila 3377 ECB : /*
3378 : * Error context callback for errors occurring during vacuum. The error
3379 : * context messages for index phases should match the messages set in parallel
3380 : * vacuum. If you change this function for those phases, change
3381 : * parallel_vacuum_error_callback() as well.
3382 : */
3383 : static void
1105 akapila 3384 GIC 22 : vacuum_error_callback(void *arg)
3385 : {
734 pg 3386 CBC 22 : LVRelState *errinfo = arg;
3387 :
1105 akapila 3388 GIC 22 : switch (errinfo->phase)
3389 : {
1105 akapila 3390 UIC 0 : case VACUUM_ERRCB_PHASE_SCAN_HEAP:
3391 0 : if (BlockNumberIsValid(errinfo->blkno))
3392 : {
956 3393 0 : if (OffsetNumberIsValid(errinfo->offnum))
610 peter 3394 0 : errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
956 akapila 3395 LBC 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3396 : else
3397 0 : errcontext("while scanning block %u of relation \"%s.%s\"",
3398 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
956 akapila 3399 ECB : }
3400 : else
958 akapila 3401 UBC 0 : errcontext("while scanning relation \"%s.%s\"",
958 akapila 3402 EUB : errinfo->relnamespace, errinfo->relname);
1105 akapila 3403 UIC 0 : break;
1105 akapila 3404 EUB :
1105 akapila 3405 GBC 2 : case VACUUM_ERRCB_PHASE_VACUUM_HEAP:
3406 2 : if (BlockNumberIsValid(errinfo->blkno))
3407 : {
956 akapila 3408 UBC 0 : if (OffsetNumberIsValid(errinfo->offnum))
610 peter 3409 UIC 0 : errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
956 akapila 3410 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3411 : else
956 akapila 3412 UBC 0 : errcontext("while vacuuming block %u of relation \"%s.%s\"",
3413 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
956 akapila 3414 EUB : }
3415 : else
958 akapila 3416 CBC 2 : errcontext("while vacuuming relation \"%s.%s\"",
958 akapila 3417 ECB : errinfo->relnamespace, errinfo->relname);
1105 akapila 3418 GIC 2 : break;
1105 akapila 3419 EUB :
1105 akapila 3420 GBC 5 : case VACUUM_ERRCB_PHASE_VACUUM_INDEX:
3421 5 : errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3422 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3423 5 : break;
3424 :
1105 akapila 3425 GIC 5 : case VACUUM_ERRCB_PHASE_INDEX_CLEANUP:
3426 5 : errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
1105 akapila 3427 ECB : errinfo->indname, errinfo->relnamespace, errinfo->relname);
1105 akapila 3428 GIC 5 : break;
1105 akapila 3429 ECB :
1105 akapila 3430 GIC 3 : case VACUUM_ERRCB_PHASE_TRUNCATE:
1105 akapila 3431 CBC 3 : if (BlockNumberIsValid(errinfo->blkno))
3432 3 : errcontext("while truncating relation \"%s.%s\" to %u blocks",
3433 : errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3434 3 : break;
3435 :
3436 7 : case VACUUM_ERRCB_PHASE_UNKNOWN:
1105 akapila 3437 ECB : default:
1105 akapila 3438 GIC 7 : return; /* do nothing; the errinfo may not be
1105 akapila 3439 ECB : * initialized */
3440 : }
3441 : }
3442 :
1012 3443 : /*
3444 : * Updates the information required for vacuum error callback. This also saves
3445 : * the current information which can be later restored via restore_vacuum_error_info.
3446 : */
1105 3447 : static void
734 pg 3448 GIC 250871 : update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel,
734 pg 3449 ECB : int phase, BlockNumber blkno, OffsetNumber offnum)
3450 : {
734 pg 3451 GIC 250871 : if (saved_vacrel)
3452 : {
3453 85683 : saved_vacrel->offnum = vacrel->offnum;
3454 85683 : saved_vacrel->blkno = vacrel->blkno;
3455 85683 : saved_vacrel->phase = vacrel->phase;
3456 : }
3457 :
3458 250871 : vacrel->blkno = blkno;
734 pg 3459 CBC 250871 : vacrel->offnum = offnum;
734 pg 3460 GIC 250871 : vacrel->phase = phase;
1012 akapila 3461 250871 : }
1105 akapila 3462 ECB :
3463 : /*
1012 3464 : * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3465 : */
3466 : static void
734 pg 3467 GIC 85683 : restore_vacuum_error_info(LVRelState *vacrel,
3468 : const LVSavedErrInfo *saved_vacrel)
1012 akapila 3469 ECB : {
734 pg 3470 CBC 85683 : vacrel->blkno = saved_vacrel->blkno;
3471 85683 : vacrel->offnum = saved_vacrel->offnum;
3472 85683 : vacrel->phase = saved_vacrel->phase;
1105 akapila 3473 GIC 85683 : }
|