LCOV - differential code coverage report
Current view: top level - src/backend/access/heap - vacuumlazy.c (source / functions) Coverage Total Hit UNC LBC UIC UBC GBC GIC GNC CBC EUB ECB DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 86.6 % 982 850 8 19 94 11 21 497 113 219 92 531 8 81
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 25 25 25 25
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * vacuumlazy.c
       4                 :  *    Concurrent ("lazy") vacuuming.
       5                 :  *
       6                 :  * The major space usage for vacuuming is storage for the array of dead TIDs
       7                 :  * that are to be removed from indexes.  We want to ensure we can vacuum even
       8                 :  * the very largest relations with finite memory space usage.  To do that, we
       9                 :  * set upper bounds on the number of TIDs we can keep track of at once.
      10                 :  *
      11                 :  * We are willing to use at most maintenance_work_mem (or perhaps
      12                 :  * autovacuum_work_mem) memory space to keep track of dead TIDs.  We initially
      13                 :  * allocate an array of TIDs of that size, with an upper limit that depends on
      14                 :  * table size (this limit ensures we don't allocate a huge area uselessly for
      15                 :  * vacuuming small tables).  If the array threatens to overflow, we must call
      16                 :  * lazy_vacuum to vacuum indexes (and to vacuum the pages that we've pruned).
      17                 :  * This frees up the memory space dedicated to storing dead TIDs.
      18                 :  *
      19                 :  * In practice VACUUM will often complete its initial pass over the target
      20                 :  * heap relation without ever running out of space to store TIDs.  This means
      21                 :  * that there only needs to be one call to lazy_vacuum, after the initial pass
      22                 :  * completes.
      23                 :  *
      24                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      25                 :  * Portions Copyright (c) 1994, Regents of the University of California
      26                 :  *
      27                 :  *
      28                 :  * IDENTIFICATION
      29                 :  *    src/backend/access/heap/vacuumlazy.c
      30                 :  *
      31                 :  *-------------------------------------------------------------------------
      32                 :  */
      33                 : #include "postgres.h"
      34                 : 
      35                 : #include <math.h>
      36                 : 
      37                 : #include "access/amapi.h"
      38                 : #include "access/genam.h"
      39                 : #include "access/heapam.h"
      40                 : #include "access/heapam_xlog.h"
      41                 : #include "access/htup_details.h"
      42                 : #include "access/multixact.h"
      43                 : #include "access/transam.h"
      44                 : #include "access/visibilitymap.h"
      45                 : #include "access/xact.h"
      46                 : #include "access/xlog.h"
      47                 : #include "access/xloginsert.h"
      48                 : #include "catalog/index.h"
      49                 : #include "catalog/storage.h"
      50                 : #include "commands/dbcommands.h"
      51                 : #include "commands/progress.h"
      52                 : #include "commands/vacuum.h"
      53                 : #include "executor/instrument.h"
      54                 : #include "miscadmin.h"
      55                 : #include "optimizer/paths.h"
      56                 : #include "pgstat.h"
      57                 : #include "portability/instr_time.h"
      58                 : #include "postmaster/autovacuum.h"
      59                 : #include "storage/bufmgr.h"
      60                 : #include "storage/freespace.h"
      61                 : #include "storage/lmgr.h"
      62                 : #include "tcop/tcopprot.h"
      63                 : #include "utils/lsyscache.h"
      64                 : #include "utils/memutils.h"
      65                 : #include "utils/pg_rusage.h"
      66                 : #include "utils/timestamp.h"
      67                 : 
      68                 : 
      69                 : /*
      70                 :  * Space/time tradeoff parameters: do these need to be user-tunable?
      71                 :  *
      72                 :  * To consider truncating the relation, we want there to be at least
      73                 :  * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
      74                 :  * is less) potentially-freeable pages.
      75                 :  */
      76                 : #define REL_TRUNCATE_MINIMUM    1000
      77                 : #define REL_TRUNCATE_FRACTION   16
      78                 : 
      79                 : /*
      80                 :  * Timing parameters for truncate locking heuristics.
      81                 :  *
      82                 :  * These were not exposed as user tunable GUC values because it didn't seem
      83                 :  * that the potential for improvement was great enough to merit the cost of
      84                 :  * supporting them.
      85                 :  */
      86                 : #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL     20  /* ms */
      87                 : #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL      50  /* ms */
      88                 : #define VACUUM_TRUNCATE_LOCK_TIMEOUT            5000    /* ms */
      89                 : 
      90                 : /*
      91                 :  * Threshold that controls whether we bypass index vacuuming and heap
      92                 :  * vacuuming as an optimization
      93                 :  */
      94                 : #define BYPASS_THRESHOLD_PAGES  0.02    /* i.e. 2% of rel_pages */
      95                 : 
      96                 : /*
      97                 :  * Perform a failsafe check each time we scan another 4GB of pages.
      98                 :  * (Note that this is deliberately kept to a power-of-two, usually 2^19.)
      99                 :  */
     100                 : #define FAILSAFE_EVERY_PAGES \
     101                 :     ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
     102                 : 
     103                 : /*
     104                 :  * When a table has no indexes, vacuum the FSM after every 8GB, approximately
     105                 :  * (it won't be exact because we only vacuum FSM after processing a heap page
     106                 :  * that has some removable tuples).  When there are indexes, this is ignored,
     107                 :  * and we vacuum FSM after each index/heap cleaning pass.
     108                 :  */
     109                 : #define VACUUM_FSM_EVERY_PAGES \
     110                 :     ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
     111                 : 
     112                 : /*
     113                 :  * Before we consider skipping a page that's marked as clean in
     114                 :  * visibility map, we must've seen at least this many clean pages.
     115                 :  */
     116                 : #define SKIP_PAGES_THRESHOLD    ((BlockNumber) 32)
     117                 : 
     118                 : /*
     119                 :  * Size of the prefetch window for lazy vacuum backwards truncation scan.
     120                 :  * Needs to be a power of 2.
     121                 :  */
     122                 : #define PREFETCH_SIZE           ((BlockNumber) 32)
     123                 : 
     124                 : /*
     125                 :  * Macro to check if we are in a parallel vacuum.  If true, we are in the
     126                 :  * parallel mode and the DSM segment is initialized.
     127                 :  */
     128                 : #define ParallelVacuumIsActive(vacrel) ((vacrel)->pvs != NULL)
     129                 : 
     130                 : /* Phases of vacuum during which we report error context. */
     131                 : typedef enum
     132                 : {
     133                 :     VACUUM_ERRCB_PHASE_UNKNOWN,
     134                 :     VACUUM_ERRCB_PHASE_SCAN_HEAP,
     135                 :     VACUUM_ERRCB_PHASE_VACUUM_INDEX,
     136                 :     VACUUM_ERRCB_PHASE_VACUUM_HEAP,
     137                 :     VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
     138                 :     VACUUM_ERRCB_PHASE_TRUNCATE
     139                 : } VacErrPhase;
     140                 : 
     141                 : typedef struct LVRelState
     142                 : {
     143                 :     /* Target heap relation and its indexes */
     144                 :     Relation    rel;
     145                 :     Relation   *indrels;
     146                 :     int         nindexes;
     147                 : 
     148                 :     /* Buffer access strategy and parallel vacuum state */
     149                 :     BufferAccessStrategy bstrategy;
     150                 :     ParallelVacuumState *pvs;
     151                 : 
     152                 :     /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
     153                 :     bool        aggressive;
     154                 :     /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
     155                 :     bool        skipwithvm;
     156                 :     /* Consider index vacuuming bypass optimization? */
     157                 :     bool        consider_bypass_optimization;
     158                 : 
     159                 :     /* Doing index vacuuming, index cleanup, rel truncation? */
     160                 :     bool        do_index_vacuuming;
     161                 :     bool        do_index_cleanup;
     162                 :     bool        do_rel_truncate;
     163                 : 
     164                 :     /* VACUUM operation's cutoffs for freezing and pruning */
     165                 :     struct VacuumCutoffs cutoffs;
     166                 :     GlobalVisState *vistest;
     167                 :     /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
     168                 :     TransactionId NewRelfrozenXid;
     169                 :     MultiXactId NewRelminMxid;
     170                 :     bool        skippedallvis;
     171                 : 
     172                 :     /* Error reporting state */
     173                 :     char       *dbname;
     174                 :     char       *relnamespace;
     175                 :     char       *relname;
     176                 :     char       *indname;        /* Current index name */
     177                 :     BlockNumber blkno;          /* used only for heap operations */
     178                 :     OffsetNumber offnum;        /* used only for heap operations */
     179                 :     VacErrPhase phase;
     180                 :     bool        verbose;        /* VACUUM VERBOSE? */
     181                 : 
     182                 :     /*
     183                 :      * dead_items stores TIDs whose index tuples are deleted by index
     184                 :      * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
     185                 :      * that has been processed by lazy_scan_prune.  Also needed by
     186                 :      * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
     187                 :      * LP_UNUSED during second heap pass.
     188                 :      */
     189                 :     VacDeadItems *dead_items;   /* TIDs whose index tuples we'll delete */
     190                 :     BlockNumber rel_pages;      /* total number of pages */
     191                 :     BlockNumber scanned_pages;  /* # pages examined (not skipped via VM) */
     192                 :     BlockNumber removed_pages;  /* # pages removed by relation truncation */
     193                 :     BlockNumber frozen_pages;   /* # pages with newly frozen tuples */
     194                 :     BlockNumber lpdead_item_pages;  /* # pages with LP_DEAD items */
     195                 :     BlockNumber missed_dead_pages;  /* # pages with missed dead tuples */
     196                 :     BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
     197                 : 
     198                 :     /* Statistics output by us, for table */
     199                 :     double      new_rel_tuples; /* new estimated total # of tuples */
     200                 :     double      new_live_tuples;    /* new estimated total # of live tuples */
     201                 :     /* Statistics output by index AMs */
     202                 :     IndexBulkDeleteResult **indstats;
     203                 : 
     204                 :     /* Instrumentation counters */
     205                 :     int         num_index_scans;
     206                 :     /* Counters that follow are only for scanned_pages */
     207                 :     int64       tuples_deleted; /* # deleted from table */
     208                 :     int64       tuples_frozen;  /* # newly frozen */
     209                 :     int64       lpdead_items;   /* # deleted from indexes */
     210                 :     int64       live_tuples;    /* # live tuples remaining */
     211                 :     int64       recently_dead_tuples;   /* # dead, but not yet removable */
     212                 :     int64       missed_dead_tuples; /* # removable, but not removed */
     213                 : } LVRelState;
     214                 : 
     215                 : /*
     216                 :  * State returned by lazy_scan_prune()
     217                 :  */
     218                 : typedef struct LVPagePruneState
     219                 : {
     220                 :     bool        hastup;         /* Page prevents rel truncation? */
     221                 :     bool        has_lpdead_items;   /* includes existing LP_DEAD items */
     222                 : 
     223                 :     /*
     224                 :      * State describes the proper VM bit states to set for the page following
     225                 :      * pruning and freezing.  all_visible implies !has_lpdead_items, but don't
     226                 :      * trust all_frozen result unless all_visible is also set to true.
     227                 :      */
     228                 :     bool        all_visible;    /* Every item visible to all? */
     229                 :     bool        all_frozen;     /* provided all_visible is also true */
     230                 :     TransactionId visibility_cutoff_xid;    /* For recovery conflicts */
     231                 : } LVPagePruneState;
     232                 : 
     233                 : /* Struct for saving and restoring vacuum error information. */
     234                 : typedef struct LVSavedErrInfo
     235                 : {
     236                 :     BlockNumber blkno;
     237                 :     OffsetNumber offnum;
     238                 :     VacErrPhase phase;
     239                 : } LVSavedErrInfo;
     240                 : 
     241                 : 
     242                 : /* non-export function prototypes */
     243                 : static void lazy_scan_heap(LVRelState *vacrel);
     244                 : static BlockNumber lazy_scan_skip(LVRelState *vacrel, Buffer *vmbuffer,
     245                 :                                   BlockNumber next_block,
     246                 :                                   bool *next_unskippable_allvis,
     247                 :                                   bool *skipping_current_range);
     248                 : static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
     249                 :                                    BlockNumber blkno, Page page,
     250                 :                                    bool sharelock, Buffer vmbuffer);
     251                 : static void lazy_scan_prune(LVRelState *vacrel, Buffer buf,
     252                 :                             BlockNumber blkno, Page page,
     253                 :                             LVPagePruneState *prunestate);
     254                 : static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
     255                 :                               BlockNumber blkno, Page page,
     256                 :                               bool *hastup, bool *recordfreespace);
     257                 : static void lazy_vacuum(LVRelState *vacrel);
     258                 : static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
     259                 : static void lazy_vacuum_heap_rel(LVRelState *vacrel);
     260                 : static int  lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
     261                 :                                   Buffer buffer, int index, Buffer vmbuffer);
     262                 : static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
     263                 : static void lazy_cleanup_all_indexes(LVRelState *vacrel);
     264                 : static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel,
     265                 :                                                     IndexBulkDeleteResult *istat,
     266                 :                                                     double reltuples,
     267                 :                                                     LVRelState *vacrel);
     268                 : static IndexBulkDeleteResult *lazy_cleanup_one_index(Relation indrel,
     269                 :                                                      IndexBulkDeleteResult *istat,
     270                 :                                                      double reltuples,
     271                 :                                                      bool estimated_count,
     272                 :                                                      LVRelState *vacrel);
     273                 : static bool should_attempt_truncation(LVRelState *vacrel);
     274                 : static void lazy_truncate_heap(LVRelState *vacrel);
     275                 : static BlockNumber count_nondeletable_pages(LVRelState *vacrel,
     276                 :                                             bool *lock_waiter_detected);
     277                 : static void dead_items_alloc(LVRelState *vacrel, int nworkers);
     278                 : static void dead_items_cleanup(LVRelState *vacrel);
     279                 : static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
     280                 :                                      TransactionId *visibility_cutoff_xid, bool *all_frozen);
     281                 : static void update_relstats_all_indexes(LVRelState *vacrel);
     282                 : static void vacuum_error_callback(void *arg);
     283                 : static void update_vacuum_error_info(LVRelState *vacrel,
     284                 :                                      LVSavedErrInfo *saved_vacrel,
     285                 :                                      int phase, BlockNumber blkno,
     286                 :                                      OffsetNumber offnum);
     287                 : static void restore_vacuum_error_info(LVRelState *vacrel,
     288                 :                                       const LVSavedErrInfo *saved_vacrel);
     289                 : 
     290                 : 
     291                 : /*
     292                 :  *  heap_vacuum_rel() -- perform VACUUM for one heap relation
     293                 :  *
     294                 :  *      This routine sets things up for and then calls lazy_scan_heap, where
     295                 :  *      almost all work actually takes place.  Finalizes everything after call
     296                 :  *      returns by managing relation truncation and updating rel's pg_class
     297 ECB             :  *      entry. (Also updates pg_class entries for any indexes that need it.)
     298                 :  *
     299                 :  *      At entry, we have already established a transaction and opened
     300                 :  *      and locked the relation.
     301                 :  */
     302                 : void
     303 GIC       36739 : heap_vacuum_rel(Relation rel, VacuumParams *params,
     304                 :                 BufferAccessStrategy bstrategy)
     305                 : {
     306                 :     LVRelState *vacrel;
     307                 :     bool        verbose,
     308                 :                 instrument,
     309 ECB             :                 skipwithvm,
     310                 :                 frozenxid_updated,
     311                 :                 minmulti_updated;
     312                 :     BlockNumber orig_rel_pages,
     313                 :                 new_rel_pages,
     314                 :                 new_rel_allvisible;
     315                 :     PGRUsage    ru0;
     316 CBC       36739 :     TimestampTz starttime = 0;
     317           36739 :     PgStat_Counter startreadtime = 0,
     318           36739 :                 startwritetime = 0;
     319 GIC       36739 :     WalUsage    startwalusage = pgWalUsage;
     320 CBC       36739 :     int64       StartPageHit = VacuumPageHit,
     321           36739 :                 StartPageMiss = VacuumPageMiss,
     322           36739 :                 StartPageDirty = VacuumPageDirty;
     323                 :     ErrorContextCallback errcallback;
     324 GBC       36739 :     char      **indnames = NULL;
     325 EUB             : 
     326 GIC       36739 :     verbose = (params->options & VACOPT_VERBOSE) != 0;
     327           36814 :     instrument = (verbose || (IsAutoVacuumWorkerProcess() &&
     328              75 :                               params->log_min_duration >= 0));
     329 CBC       36739 :     if (instrument)
     330                 :     {
     331 GIC          79 :         pg_rusage_init(&ru0);
     332              79 :         starttime = GetCurrentTimestamp();
     333              79 :         if (track_io_timing)
     334                 :         {
     335 UIC           0 :             startreadtime = pgStatBlockReadTime;
     336               0 :             startwritetime = pgStatBlockWriteTime;
     337                 :         }
     338                 :     }
     339                 : 
     340 GIC       36739 :     pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
     341                 :                                   RelationGetRelid(rel));
     342                 : 
     343                 :     /*
     344                 :      * Setup error traceback support for ereport() first.  The idea is to set
     345                 :      * up an error context callback to display additional information on any
     346                 :      * error during a vacuum.  During different phases of vacuum, we update
     347                 :      * the state so that the error context callback always display current
     348                 :      * information.
     349                 :      *
     350                 :      * Copy the names of heap rel into local memory for error reporting
     351                 :      * purposes, too.  It isn't always safe to assume that we can get the name
     352 ECB             :      * of each rel.  It's convenient for code in lazy_scan_heap to always use
     353                 :      * these temp copies.
     354                 :      */
     355 CBC       36739 :     vacrel = (LVRelState *) palloc0(sizeof(LVRelState));
     356 GNC       36739 :     vacrel->dbname = get_database_name(MyDatabaseId);
     357 CBC       36739 :     vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
     358           36739 :     vacrel->relname = pstrdup(RelationGetRelationName(rel));
     359           36739 :     vacrel->indname = NULL;
     360           36739 :     vacrel->phase = VACUUM_ERRCB_PHASE_UNKNOWN;
     361           36739 :     vacrel->verbose = verbose;
     362 GIC       36739 :     errcallback.callback = vacuum_error_callback;
     363           36739 :     errcallback.arg = vacrel;
     364 CBC       36739 :     errcallback.previous = error_context_stack;
     365           36739 :     error_context_stack = &errcallback;
     366 ECB             : 
     367                 :     /* Set up high level stuff about rel and its indexes */
     368 CBC       36739 :     vacrel->rel = rel;
     369           36739 :     vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
     370 ECB             :                      &vacrel->indrels);
     371 GNC       36739 :     vacrel->bstrategy = bstrategy;
     372 GIC       36739 :     if (instrument && vacrel->nindexes > 0)
     373                 :     {
     374                 :         /* Copy index names used by instrumentation (not error reporting) */
     375 CBC          71 :         indnames = palloc(sizeof(char *) * vacrel->nindexes);
     376             213 :         for (int i = 0; i < vacrel->nindexes; i++)
     377             142 :             indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
     378 ECB             :     }
     379                 : 
     380                 :     /*
     381                 :      * The index_cleanup param either disables index vacuuming and cleanup or
     382                 :      * forces it to go ahead when we would otherwise apply the index bypass
     383                 :      * optimization.  The default is 'auto', which leaves the final decision
     384                 :      * up to lazy_vacuum().
     385                 :      *
     386                 :      * The truncate param allows user to avoid attempting relation truncation,
     387                 :      * though it can't force truncation to happen.
     388                 :      */
     389 GIC       36739 :     Assert(params->index_cleanup != VACOPTVALUE_UNSPECIFIED);
     390           36739 :     Assert(params->truncate != VACOPTVALUE_UNSPECIFIED &&
     391                 :            params->truncate != VACOPTVALUE_AUTO);
     392 GNC       36739 :     Assert(!VacuumFailsafeActive);
     393 GIC       36739 :     vacrel->consider_bypass_optimization = true;
     394           36739 :     vacrel->do_index_vacuuming = true;
     395           36739 :     vacrel->do_index_cleanup = true;
     396           36739 :     vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
     397           36739 :     if (params->index_cleanup == VACOPTVALUE_DISABLED)
     398                 :     {
     399                 :         /* Force disable index vacuuming up-front */
     400             132 :         vacrel->do_index_vacuuming = false;
     401             132 :         vacrel->do_index_cleanup = false;
     402 ECB             :     }
     403 CBC       36607 :     else if (params->index_cleanup == VACOPTVALUE_ENABLED)
     404 ECB             :     {
     405                 :         /* Force index vacuuming.  Note that failsafe can still bypass. */
     406 CBC          15 :         vacrel->consider_bypass_optimization = false;
     407 ECB             :     }
     408                 :     else
     409                 :     {
     410                 :         /* Default/auto, make all decisions dynamically */
     411 GIC       36592 :         Assert(params->index_cleanup == VACOPTVALUE_AUTO);
     412                 :     }
     413                 : 
     414                 :     /* Initialize page counters explicitly (be tidy) */
     415 CBC       36739 :     vacrel->scanned_pages = 0;
     416 GIC       36739 :     vacrel->removed_pages = 0;
     417 GNC       36739 :     vacrel->frozen_pages = 0;
     418 CBC       36739 :     vacrel->lpdead_item_pages = 0;
     419 GIC       36739 :     vacrel->missed_dead_pages = 0;
     420 CBC       36739 :     vacrel->nonempty_pages = 0;
     421 EUB             :     /* dead_items_alloc allocates vacrel->dead_items later on */
     422                 : 
     423                 :     /* Allocate/initialize output statistics state */
     424 GIC       36739 :     vacrel->new_rel_tuples = 0;
     425           36739 :     vacrel->new_live_tuples = 0;
     426 CBC       36739 :     vacrel->indstats = (IndexBulkDeleteResult **)
     427 GIC       36739 :         palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
     428                 : 
     429                 :     /* Initialize remaining counters (be tidy) */
     430           36739 :     vacrel->num_index_scans = 0;
     431           36739 :     vacrel->tuples_deleted = 0;
     432 GNC       36739 :     vacrel->tuples_frozen = 0;
     433 GIC       36739 :     vacrel->lpdead_items = 0;
     434           36739 :     vacrel->live_tuples = 0;
     435           36739 :     vacrel->recently_dead_tuples = 0;
     436           36739 :     vacrel->missed_dead_tuples = 0;
     437                 : 
     438                 :     /*
     439                 :      * Get cutoffs that determine which deleted tuples are considered DEAD,
     440                 :      * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze.  Then determine
     441                 :      * the extent of the blocks that we'll scan in lazy_scan_heap.  It has to
     442                 :      * happen in this order to ensure that the OldestXmin cutoff field works
     443                 :      * as an upper bound on the XIDs stored in the pages we'll actually scan
     444                 :      * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
     445 ECB             :      *
     446                 :      * Next acquire vistest, a related cutoff that's used in heap_page_prune.
     447                 :      * We expect vistest will always make heap_page_prune remove any deleted
     448                 :      * tuple whose xmax is < OldestXmin.  lazy_scan_prune must never become
     449                 :      * confused about whether a tuple should be frozen or removed.  (In the
     450                 :      * future we might want to teach lazy_scan_prune to recompute vistest from
     451                 :      * time to time, to increase the number of dead tuples it can prune away.)
     452                 :      */
     453 GNC       36739 :     vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
     454 CBC       36739 :     vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
     455 GIC       36739 :     vacrel->vistest = GlobalVisTestFor(rel);
     456                 :     /* Initialize state used to track oldest extant XID/MXID */
     457 GNC       36739 :     vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
     458           36739 :     vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
     459 GIC       36739 :     vacrel->skippedallvis = false;
     460 GNC       36739 :     skipwithvm = true;
     461           36739 :     if (params->options & VACOPT_DISABLE_PAGE_SKIPPING)
     462                 :     {
     463                 :         /*
     464                 :          * Force aggressive mode, and disable skipping blocks using the
     465                 :          * visibility map (even those set all-frozen)
     466                 :          */
     467             146 :         vacrel->aggressive = true;
     468             146 :         skipwithvm = false;
     469                 :     }
     470                 : 
     471           36739 :     vacrel->skipwithvm = skipwithvm;
     472                 : 
     473           36739 :     if (verbose)
     474                 :     {
     475               4 :         if (vacrel->aggressive)
     476 UNC           0 :             ereport(INFO,
     477                 :                     (errmsg("aggressively vacuuming \"%s.%s.%s\"",
     478                 :                             vacrel->dbname, vacrel->relnamespace,
     479                 :                             vacrel->relname)));
     480                 :         else
     481 GNC           4 :             ereport(INFO,
     482                 :                     (errmsg("vacuuming \"%s.%s.%s\"",
     483                 :                             vacrel->dbname, vacrel->relnamespace,
     484                 :                             vacrel->relname)));
     485                 :     }
     486                 : 
     487 ECB             :     /*
     488                 :      * Allocate dead_items array memory using dead_items_alloc.  This handles
     489                 :      * parallel VACUUM initialization as part of allocating shared memory
     490                 :      * space used for dead_items.  (But do a failsafe precheck first, to
     491                 :      * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
     492                 :      * is already dangerously old.)
     493                 :      */
     494 CBC       36739 :     lazy_check_wraparound_failsafe(vacrel);
     495 GIC       36739 :     dead_items_alloc(vacrel, params->nworkers);
     496                 : 
     497 ECB             :     /*
     498                 :      * Call lazy_scan_heap to perform all required heap pruning, index
     499                 :      * vacuuming, and heap vacuuming (plus related processing)
     500                 :      */
     501 GIC       36739 :     lazy_scan_heap(vacrel);
     502                 : 
     503                 :     /*
     504                 :      * Free resources managed by dead_items_alloc.  This ends parallel mode in
     505                 :      * passing when necessary.
     506                 :      */
     507 CBC       36739 :     dead_items_cleanup(vacrel);
     508 GIC       36739 :     Assert(!IsInParallelMode());
     509                 : 
     510                 :     /*
     511 ECB             :      * Update pg_class entries for each of rel's indexes where appropriate.
     512                 :      *
     513                 :      * Unlike the later update to rel's pg_class entry, this is not critical.
     514                 :      * Maintains relpages/reltuples statistics used by the planner only.
     515                 :      */
     516 GIC       36739 :     if (vacrel->do_index_cleanup)
     517           36607 :         update_relstats_all_indexes(vacrel);
     518                 : 
     519                 :     /* Done with rel's indexes */
     520           36739 :     vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
     521                 : 
     522 ECB             :     /* Optionally truncate rel */
     523 CBC       36739 :     if (should_attempt_truncation(vacrel))
     524              95 :         lazy_truncate_heap(vacrel);
     525                 : 
     526                 :     /* Pop the error context stack */
     527 GIC       36739 :     error_context_stack = errcallback.previous;
     528                 : 
     529                 :     /* Report that we are now doing final cleanup */
     530           36739 :     pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
     531 ECB             :                                  PROGRESS_VACUUM_PHASE_FINAL_CLEANUP);
     532                 : 
     533                 :     /*
     534 EUB             :      * Prepare to update rel's pg_class entry.
     535                 :      *
     536                 :      * Aggressive VACUUMs must always be able to advance relfrozenxid to a
     537                 :      * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
     538                 :      * Non-aggressive VACUUMs may advance them by any amount, or not at all.
     539                 :      */
     540 GNC       36739 :     Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
     541                 :            TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
     542                 :                                          vacrel->cutoffs.relfrozenxid,
     543 ECB             :                                          vacrel->NewRelfrozenXid));
     544 GNC       36739 :     Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
     545                 :            MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
     546                 :                                        vacrel->cutoffs.relminmxid,
     547                 :                                        vacrel->NewRelminMxid));
     548 GIC       36739 :     if (vacrel->skippedallvis)
     549                 :     {
     550                 :         /*
     551                 :          * Must keep original relfrozenxid in a non-aggressive VACUUM that
     552                 :          * chose to skip an all-visible page range.  The state that tracks new
     553                 :          * values will have missed unfrozen XIDs from the pages we skipped.
     554                 :          */
     555 GNC          22 :         Assert(!vacrel->aggressive);
     556 GIC          22 :         vacrel->NewRelfrozenXid = InvalidTransactionId;
     557              22 :         vacrel->NewRelminMxid = InvalidMultiXactId;
     558 ECB             :     }
     559                 : 
     560                 :     /*
     561                 :      * For safety, clamp relallvisible to be not more than what we're setting
     562                 :      * pg_class.relpages to
     563                 :      */
     564 GIC       36739 :     new_rel_pages = vacrel->rel_pages;   /* After possible rel truncation */
     565 CBC       36739 :     visibilitymap_count(rel, &new_rel_allvisible, NULL);
     566 GIC       36739 :     if (new_rel_allvisible > new_rel_pages)
     567 LBC           0 :         new_rel_allvisible = new_rel_pages;
     568                 : 
     569 ECB             :     /*
     570                 :      * Now actually update rel's pg_class entry.
     571                 :      *
     572                 :      * In principle new_live_tuples could be -1 indicating that we (still)
     573                 :      * don't know the tuple count.  In practice that can't happen, since we
     574                 :      * scan every page that isn't skipped using the visibility map.
     575                 :      */
     576 GIC       36739 :     vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
     577           36739 :                         new_rel_allvisible, vacrel->nindexes > 0,
     578                 :                         vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
     579 ECB             :                         &frozenxid_updated, &minmulti_updated, false);
     580                 : 
     581                 :     /*
     582                 :      * Report results to the cumulative stats system, too.
     583                 :      *
     584                 :      * Deliberately avoid telling the stats system about LP_DEAD items that
     585                 :      * remain in the table due to VACUUM bypassing index and heap vacuuming.
     586                 :      * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
     587                 :      * It seems like a good idea to err on the side of not vacuuming again too
     588                 :      * soon in cases where the failsafe prevented significant amounts of heap
     589                 :      * vacuuming.
     590                 :      */
     591 GIC       22099 :     pgstat_report_vacuum(RelationGetRelid(rel),
     592           36739 :                          rel->rd_rel->relisshared,
     593           14640 :                          Max(vacrel->new_live_tuples, 0),
     594           36739 :                          vacrel->recently_dead_tuples +
     595           36739 :                          vacrel->missed_dead_tuples);
     596 CBC       36739 :     pgstat_progress_end_command();
     597 ECB             : 
     598 GIC       36739 :     if (instrument)
     599 ECB             :     {
     600 GIC          79 :         TimestampTz endtime = GetCurrentTimestamp();
     601                 : 
     602             113 :         if (verbose || params->log_min_duration == 0 ||
     603              34 :             TimestampDifferenceExceeds(starttime, endtime,
     604                 :                                        params->log_min_duration))
     605                 :         {
     606                 :             long        secs_dur;
     607 EUB             :             int         usecs_dur;
     608                 :             WalUsage    walusage;
     609                 :             StringInfoData buf;
     610                 :             char       *msgfmt;
     611                 :             int32       diff;
     612 GIC          45 :             int64       PageHitOp = VacuumPageHit - StartPageHit,
     613              45 :                         PageMissOp = VacuumPageMiss - StartPageMiss,
     614 CBC          45 :                         PageDirtyOp = VacuumPageDirty - StartPageDirty;
     615              45 :             double      read_rate = 0,
     616 GIC          45 :                         write_rate = 0;
     617 ECB             : 
     618 GIC          45 :             TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
     619 CBC          45 :             memset(&walusage, 0, sizeof(WalUsage));
     620 GIC          45 :             WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
     621                 : 
     622              45 :             initStringInfo(&buf);
     623              45 :             if (verbose)
     624 ECB             :             {
     625                 :                 /*
     626                 :                  * Aggressiveness already reported earlier, in dedicated
     627                 :                  * VACUUM VERBOSE ereport
     628                 :                  */
     629 CBC           4 :                 Assert(!params->is_wraparound);
     630               4 :                 msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
     631 ECB             :             }
     632 CBC          41 :             else if (params->is_wraparound)
     633 ECB             :             {
     634                 :                 /*
     635                 :                  * While it's possible for a VACUUM to be both is_wraparound
     636 EUB             :                  * and !aggressive, that's just a corner-case -- is_wraparound
     637                 :                  * implies aggressive.  Produce distinct output for the corner
     638                 :                  * case all the same, just in case.
     639                 :                  */
     640 UNC           0 :                 if (vacrel->aggressive)
     641 LBC           0 :                     msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
     642 ECB             :                 else
     643 LBC           0 :                     msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
     644                 :             }
     645 ECB             :             else
     646                 :             {
     647 GNC          41 :                 if (vacrel->aggressive)
     648 CBC           3 :                     msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
     649 ECB             :                 else
     650 CBC          38 :                     msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
     651                 :             }
     652 GIC          45 :             appendStringInfo(&buf, msgfmt,
     653                 :                              vacrel->dbname,
     654                 :                              vacrel->relnamespace,
     655 ECB             :                              vacrel->relname,
     656                 :                              vacrel->num_index_scans);
     657 CBC          90 :             appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total)\n"),
     658 ECB             :                              vacrel->removed_pages,
     659                 :                              new_rel_pages,
     660                 :                              vacrel->scanned_pages,
     661                 :                              orig_rel_pages == 0 ? 100.0 :
     662 GIC          45 :                              100.0 * vacrel->scanned_pages / orig_rel_pages);
     663              45 :             appendStringInfo(&buf,
     664 CBC          45 :                              _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"),
     665              45 :                              (long long) vacrel->tuples_deleted,
     666              45 :                              (long long) vacrel->new_rel_tuples,
     667 GIC          45 :                              (long long) vacrel->recently_dead_tuples);
     668 CBC          45 :             if (vacrel->missed_dead_tuples > 0)
     669 LBC           0 :                 appendStringInfo(&buf,
     670 UIC           0 :                                  _("tuples missed: %lld dead from %u pages not removed due to cleanup lock contention\n"),
     671 LBC           0 :                                  (long long) vacrel->missed_dead_tuples,
     672                 :                                  vacrel->missed_dead_pages);
     673 GNC          45 :             diff = (int32) (ReadNextTransactionId() -
     674              45 :                             vacrel->cutoffs.OldestXmin);
     675 GIC          45 :             appendStringInfo(&buf,
     676              45 :                              _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
     677                 :                              vacrel->cutoffs.OldestXmin, diff);
     678 CBC          45 :             if (frozenxid_updated)
     679 ECB             :             {
     680 GNC          32 :                 diff = (int32) (vacrel->NewRelfrozenXid -
     681              32 :                                 vacrel->cutoffs.relfrozenxid);
     682 GBC          32 :                 appendStringInfo(&buf,
     683 GIC          32 :                                  _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
     684 ECB             :                                  vacrel->NewRelfrozenXid, diff);
     685                 :             }
     686 CBC          45 :             if (minmulti_updated)
     687                 :             {
     688 GNC          11 :                 diff = (int32) (vacrel->NewRelminMxid -
     689              11 :                                 vacrel->cutoffs.relminmxid);
     690 CBC          11 :                 appendStringInfo(&buf,
     691              11 :                                  _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
     692 ECB             :                                  vacrel->NewRelminMxid, diff);
     693                 :             }
     694 GNC          45 :             appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %lld tuples frozen\n"),
     695                 :                              vacrel->frozen_pages,
     696                 :                              orig_rel_pages == 0 ? 100.0 :
     697              45 :                              100.0 * vacrel->frozen_pages / orig_rel_pages,
     698              45 :                              (long long) vacrel->tuples_frozen);
     699 CBC          45 :             if (vacrel->do_index_vacuuming)
     700                 :             {
     701              44 :                 if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
     702              14 :                     appendStringInfoString(&buf, _("index scan not needed: "));
     703                 :                 else
     704              30 :                     appendStringInfoString(&buf, _("index scan needed: "));
     705 ECB             : 
     706 CBC          44 :                 msgfmt = _("%u pages from table (%.2f%% of total) had %lld dead item identifiers removed\n");
     707                 :             }
     708                 :             else
     709                 :             {
     710 GNC           1 :                 if (!VacuumFailsafeActive)
     711 GIC           1 :                     appendStringInfoString(&buf, _("index scan bypassed: "));
     712 ECB             :                 else
     713 UIC           0 :                     appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
     714 EUB             : 
     715 GBC           1 :                 msgfmt = _("%u pages from table (%.2f%% of total) have %lld dead item identifiers\n");
     716                 :             }
     717              45 :             appendStringInfo(&buf, msgfmt,
     718                 :                              vacrel->lpdead_item_pages,
     719                 :                              orig_rel_pages == 0 ? 100.0 :
     720 CBC          45 :                              100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
     721 GIC          45 :                              (long long) vacrel->lpdead_items);
     722 CBC         124 :             for (int i = 0; i < vacrel->nindexes; i++)
     723 ECB             :             {
     724 CBC          79 :                 IndexBulkDeleteResult *istat = vacrel->indstats[i];
     725 ECB             : 
     726 GIC          79 :                 if (!istat)
     727 CBC           5 :                     continue;
     728                 : 
     729              74 :                 appendStringInfo(&buf,
     730              74 :                                  _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
     731 GIC          74 :                                  indnames[i],
     732                 :                                  istat->num_pages,
     733                 :                                  istat->pages_newly_deleted,
     734 ECB             :                                  istat->pages_deleted,
     735                 :                                  istat->pages_free);
     736                 :             }
     737 CBC          45 :             if (track_io_timing)
     738 ECB             :             {
     739 LBC           0 :                 double      read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
     740 UIC           0 :                 double      write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
     741 ECB             : 
     742 UIC           0 :                 appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
     743 ECB             :                                  read_ms, write_ms);
     744                 :             }
     745 GIC          45 :             if (secs_dur > 0 || usecs_dur > 0)
     746                 :             {
     747              45 :                 read_rate = (double) BLCKSZ * PageMissOp / (1024 * 1024) /
     748 CBC          45 :                     (secs_dur + usecs_dur / 1000000.0);
     749 GIC          45 :                 write_rate = (double) BLCKSZ * PageDirtyOp / (1024 * 1024) /
     750 CBC          45 :                     (secs_dur + usecs_dur / 1000000.0);
     751 ECB             :             }
     752 GIC          45 :             appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
     753 ECB             :                              read_rate, write_rate);
     754 CBC          45 :             appendStringInfo(&buf,
     755 GIC          45 :                              _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
     756 ECB             :                              (long long) PageHitOp,
     757                 :                              (long long) PageMissOp,
     758                 :                              (long long) PageDirtyOp);
     759 GIC          45 :             appendStringInfo(&buf,
     760              45 :                              _("WAL usage: %lld records, %lld full page images, %llu bytes\n"),
     761              45 :                              (long long) walusage.wal_records,
     762              45 :                              (long long) walusage.wal_fpi,
     763              45 :                              (unsigned long long) walusage.wal_bytes);
     764              45 :             appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
     765                 : 
     766              45 :             ereport(verbose ? INFO : LOG,
     767                 :                     (errmsg_internal("%s", buf.data)));
     768              45 :             pfree(buf.data);
     769                 :         }
     770                 :     }
     771                 : 
     772                 :     /* Cleanup index statistics and index names */
     773           91592 :     for (int i = 0; i < vacrel->nindexes; i++)
     774                 :     {
     775           54853 :         if (vacrel->indstats[i])
     776            3919 :             pfree(vacrel->indstats[i]);
     777                 : 
     778           54853 :         if (instrument)
     779             142 :             pfree(indnames[i]);
     780                 :     }
     781           36739 : }
     782                 : 
     783                 : /*
     784                 :  *  lazy_scan_heap() -- workhorse function for VACUUM
     785                 :  *
     786                 :  *      This routine prunes each page in the heap, and considers the need to
     787                 :  *      freeze remaining tuples with storage (not including pages that can be
     788                 :  *      skipped using the visibility map).  Also performs related maintenance
     789                 :  *      of the FSM and visibility map.  These steps all take place during an
     790                 :  *      initial pass over the target heap relation.
     791                 :  *
     792                 :  *      Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
     793                 :  *      consists of deleting index tuples that point to LP_DEAD items left in
     794                 :  *      heap pages following pruning.  Earlier initial pass over the heap will
     795 ECB             :  *      have collected the TIDs whose index tuples need to be removed.
     796                 :  *
     797                 :  *      Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
     798                 :  *      largely consists of marking LP_DEAD items (from collected TID array)
     799                 :  *      as LP_UNUSED.  This has to happen in a second, final pass over the
     800                 :  *      heap, to preserve a basic invariant that all index AMs rely on: no
     801                 :  *      extant index tuple can ever be allowed to contain a TID that points to
     802                 :  *      an LP_UNUSED line pointer in the heap.  We must disallow premature
     803                 :  *      recycling of line pointers to avoid index scans that get confused
     804                 :  *      about which TID points to which tuple immediately after recycling.
     805                 :  *      (Actually, this isn't a concern when target heap relation happens to
     806                 :  *      have no indexes, which allows us to safely apply the one-pass strategy
     807                 :  *      as an optimization).
     808                 :  *
     809                 :  *      In practice we often have enough space to fit all TIDs, and so won't
     810                 :  *      need to call lazy_vacuum more than once, after our initial pass over
     811                 :  *      the heap has totally finished.  Otherwise things are slightly more
     812                 :  *      complicated: our "initial pass" over the heap applies only to those
     813                 :  *      pages that were pruned before we needed to call lazy_vacuum, and our
     814                 :  *      "final pass" over the heap only vacuums these same heap pages.
     815                 :  *      However, we process indexes in full every time lazy_vacuum is called,
     816                 :  *      which makes index processing very inefficient when memory is in short
     817                 :  *      supply.
     818                 :  */
     819                 : static void
     820 GIC       36739 : lazy_scan_heap(LVRelState *vacrel)
     821                 : {
     822 CBC       36739 :     BlockNumber rel_pages = vacrel->rel_pages,
     823                 :                 blkno,
     824                 :                 next_unskippable_block,
     825 GIC       36739 :                 next_fsm_block_to_vacuum = 0;
     826           36739 :     VacDeadItems *dead_items = vacrel->dead_items;
     827           36739 :     Buffer      vmbuffer = InvalidBuffer;
     828 ECB             :     bool        next_unskippable_allvis,
     829                 :                 skipping_current_range;
     830 GIC       36739 :     const int   initprog_index[] = {
     831                 :         PROGRESS_VACUUM_PHASE,
     832                 :         PROGRESS_VACUUM_TOTAL_HEAP_BLKS,
     833                 :         PROGRESS_VACUUM_MAX_DEAD_TUPLES
     834 ECB             :     };
     835                 :     int64       initprog_val[3];
     836                 : 
     837                 :     /* Report that we're scanning the heap, advertising total # of blocks */
     838 GIC       36739 :     initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
     839           36739 :     initprog_val[1] = rel_pages;
     840 CBC       36739 :     initprog_val[2] = dead_items->max_items;
     841 GIC       36739 :     pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
     842                 : 
     843                 :     /* Set up an initial range of skippable blocks using the visibility map */
     844           36739 :     next_unskippable_block = lazy_scan_skip(vacrel, &vmbuffer, 0,
     845 ECB             :                                             &next_unskippable_allvis,
     846                 :                                             &skipping_current_range);
     847 CBC      210697 :     for (blkno = 0; blkno < rel_pages; blkno++)
     848 ECB             :     {
     849                 :         Buffer      buf;
     850                 :         Page        page;
     851                 :         bool        all_visible_according_to_vm;
     852                 :         LVPagePruneState prunestate;
     853                 : 
     854 CBC      173958 :         if (blkno == next_unskippable_block)
     855                 :         {
     856                 :             /*
     857 ECB             :              * Can't skip this page safely.  Must scan the page.  But
     858                 :              * determine the next skippable range after the page first.
     859                 :              */
     860 GIC      161858 :             all_visible_according_to_vm = next_unskippable_allvis;
     861 CBC      161858 :             next_unskippable_block = lazy_scan_skip(vacrel, &vmbuffer,
     862                 :                                                     blkno + 1,
     863                 :                                                     &next_unskippable_allvis,
     864                 :                                                     &skipping_current_range);
     865                 : 
     866 GIC      161858 :             Assert(next_unskippable_block >= blkno + 1);
     867                 :         }
     868                 :         else
     869                 :         {
     870                 :             /* Last page always scanned (may need to set nonempty_pages) */
     871           12100 :             Assert(blkno < rel_pages - 1);
     872 ECB             : 
     873 GBC       12100 :             if (skipping_current_range)
     874 GIC        9226 :                 continue;
     875                 : 
     876                 :             /* Current range is too small to skip -- just scan the page */
     877            3235 :             all_visible_according_to_vm = true;
     878                 :         }
     879                 : 
     880          165093 :         vacrel->scanned_pages++;
     881 ECB             : 
     882                 :         /* Report as block scanned, update error traceback information */
     883 GIC      165093 :         pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
     884          165093 :         update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
     885                 :                                  blkno, InvalidOffsetNumber);
     886                 : 
     887          165093 :         vacuum_delay_point();
     888                 : 
     889                 :         /*
     890 EUB             :          * Regularly check if wraparound failsafe should trigger.
     891                 :          *
     892                 :          * There is a similar check inside lazy_vacuum_all_indexes(), but
     893                 :          * relfrozenxid might start to look dangerously old before we reach
     894                 :          * that point.  This check also provides failsafe coverage for the
     895                 :          * one-pass strategy, and the two-pass strategy with the index_cleanup
     896                 :          * param set to 'off'.
     897                 :          */
     898 GNC      165093 :         if (vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
     899 UIC           0 :             lazy_check_wraparound_failsafe(vacrel);
     900                 : 
     901 EUB             :         /*
     902                 :          * Consider if we definitely have enough space to process TIDs on page
     903                 :          * already.  If we are close to overrunning the available space for
     904                 :          * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
     905                 :          * this page.
     906                 :          */
     907 GIC      165093 :         Assert(dead_items->max_items >= MaxHeapTuplesPerPage);
     908          165093 :         if (dead_items->max_items - dead_items->num_items < MaxHeapTuplesPerPage)
     909                 :         {
     910                 :             /*
     911                 :              * Before beginning index vacuuming, we release any pin we may
     912                 :              * hold on the visibility map page.  This isn't necessary for
     913                 :              * correctness, but we do it anyway to avoid holding the pin
     914                 :              * across a lengthy, unrelated operation.
     915 ECB             :              */
     916 UIC           0 :             if (BufferIsValid(vmbuffer))
     917                 :             {
     918               0 :                 ReleaseBuffer(vmbuffer);
     919               0 :                 vmbuffer = InvalidBuffer;
     920                 :             }
     921                 : 
     922                 :             /* Perform a round of index and heap vacuuming */
     923 LBC           0 :             vacrel->consider_bypass_optimization = false;
     924 UIC           0 :             lazy_vacuum(vacrel);
     925 ECB             : 
     926                 :             /*
     927                 :              * Vacuum the Free Space Map to make newly-freed space visible on
     928                 :              * upper-level FSM pages.  Note we have not yet processed blkno.
     929                 :              */
     930 UIC           0 :             FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
     931 ECB             :                                     blkno);
     932 UIC           0 :             next_fsm_block_to_vacuum = blkno;
     933                 : 
     934 ECB             :             /* Report that we are once again scanning the heap */
     935 UIC           0 :             pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
     936                 :                                          PROGRESS_VACUUM_PHASE_SCAN_HEAP);
     937                 :         }
     938 ECB             : 
     939                 :         /*
     940                 :          * Pin the visibility map page in case we need to mark the page
     941                 :          * all-visible.  In most cases this will be very cheap, because we'll
     942                 :          * already have the correct page pinned anyway.
     943                 :          */
     944 CBC      165093 :         visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
     945 ECB             : 
     946                 :         /*
     947                 :          * We need a buffer cleanup lock to prune HOT chains and defragment
     948                 :          * the page in lazy_scan_prune.  But when it's not possible to acquire
     949                 :          * a cleanup lock right away, we may be able to settle for reduced
     950                 :          * processing using lazy_scan_noprune.
     951                 :          */
     952 GNC      165093 :         buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
     953                 :                                  vacrel->bstrategy);
     954          165093 :         page = BufferGetPage(buf);
     955 CBC      165093 :         if (!ConditionalLockBufferForCleanup(buf))
     956 ECB             :         {
     957                 :             bool        hastup,
     958                 :                         recordfreespace;
     959                 : 
     960 GIC           4 :             LockBuffer(buf, BUFFER_LOCK_SHARE);
     961                 : 
     962                 :             /* Check for new or empty pages before lazy_scan_noprune call */
     963               4 :             if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, true,
     964                 :                                        vmbuffer))
     965 EUB             :             {
     966                 :                 /* Processed as new/empty page (lock and pin released) */
     967 GBC           4 :                 continue;
     968                 :             }
     969                 : 
     970                 :             /* Collect LP_DEAD items in dead_items array, count tuples */
     971 CBC           4 :             if (lazy_scan_noprune(vacrel, buf, blkno, page, &hastup,
     972                 :                                   &recordfreespace))
     973 GIC           4 :             {
     974 GBC           4 :                 Size        freespace = 0;
     975                 : 
     976                 :                 /*
     977                 :                  * Processed page successfully (without cleanup lock) -- just
     978                 :                  * need to perform rel truncation and FSM steps, much like the
     979                 :                  * lazy_scan_prune case.  Don't bother trying to match its
     980                 :                  * visibility map setting steps, though.
     981                 :                  */
     982 GIC           4 :                 if (hastup)
     983               4 :                     vacrel->nonempty_pages = blkno + 1;
     984               4 :                 if (recordfreespace)
     985               4 :                     freespace = PageGetHeapFreeSpace(page);
     986 CBC           4 :                 UnlockReleaseBuffer(buf);
     987 GIC           4 :                 if (recordfreespace)
     988 CBC           4 :                     RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
     989 GIC           4 :                 continue;
     990                 :             }
     991 ECB             : 
     992                 :             /*
     993                 :              * lazy_scan_noprune could not do all required processing.  Wait
     994                 :              * for a cleanup lock, and call lazy_scan_prune in the usual way.
     995                 :              */
     996 UIC           0 :             Assert(vacrel->aggressive);
     997               0 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     998               0 :             LockBufferForCleanup(buf);
     999                 :         }
    1000                 : 
    1001                 :         /* Check for new or empty pages before lazy_scan_prune call */
    1002 GIC      165089 :         if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, false, vmbuffer))
    1003                 :         {
    1004 ECB             :             /* Processed as new/empty page (lock and pin released) */
    1005 LBC           0 :             continue;
    1006                 :         }
    1007                 : 
    1008 ECB             :         /*
    1009                 :          * Prune, freeze, and count tuples.
    1010                 :          *
    1011                 :          * Accumulates details of remaining LP_DEAD line pointers on page in
    1012                 :          * dead_items array.  This includes LP_DEAD line pointers that we
    1013                 :          * pruned ourselves, as well as existing LP_DEAD line pointers that
    1014                 :          * were pruned some time earlier.  Also considers freezing XIDs in the
    1015                 :          * tuple headers of remaining items with storage.
    1016                 :          */
    1017 GIC      165089 :         lazy_scan_prune(vacrel, buf, blkno, page, &prunestate);
    1018 ECB             : 
    1019 GIC      165089 :         Assert(!prunestate.all_visible || !prunestate.has_lpdead_items);
    1020 EUB             : 
    1021                 :         /* Remember the location of the last page with nonremovable tuples */
    1022 GBC      165089 :         if (prunestate.hastup)
    1023 GIC      159788 :             vacrel->nonempty_pages = blkno + 1;
    1024                 : 
    1025          165089 :         if (vacrel->nindexes == 0)
    1026                 :         {
    1027                 :             /*
    1028                 :              * Consider the need to do page-at-a-time heap vacuuming when
    1029                 :              * using the one-pass strategy now.
    1030                 :              *
    1031                 :              * The one-pass strategy will never call lazy_vacuum().  The steps
    1032                 :              * performed here can be thought of as the one-pass equivalent of
    1033                 :              * a call to lazy_vacuum().
    1034 ECB             :              */
    1035 GIC        8078 :             if (prunestate.has_lpdead_items)
    1036 CBC         357 :             {
    1037 ECB             :                 Size        freespace;
    1038                 : 
    1039 GNC         357 :                 lazy_vacuum_heap_page(vacrel, blkno, buf, 0, vmbuffer);
    1040                 : 
    1041                 :                 /* Forget the LP_DEAD items that we just vacuumed */
    1042 GIC         357 :                 dead_items->num_items = 0;
    1043                 : 
    1044                 :                 /*
    1045                 :                  * Periodically perform FSM vacuuming to make newly-freed
    1046                 :                  * space visible on upper FSM pages.  Note we have not yet
    1047                 :                  * performed FSM processing for blkno.
    1048 ECB             :                  */
    1049 GIC         357 :                 if (blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
    1050                 :                 {
    1051 UIC           0 :                     FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
    1052                 :                                             blkno);
    1053               0 :                     next_fsm_block_to_vacuum = blkno;
    1054                 :                 }
    1055 ECB             : 
    1056                 :                 /*
    1057                 :                  * Now perform FSM processing for blkno, and move on to next
    1058                 :                  * page.
    1059                 :                  *
    1060                 :                  * Our call to lazy_vacuum_heap_page() will have considered if
    1061                 :                  * it's possible to set all_visible/all_frozen independently
    1062                 :                  * of lazy_scan_prune().  Note that prunestate was invalidated
    1063                 :                  * by lazy_vacuum_heap_page() call.
    1064                 :                  */
    1065 GIC         357 :                 freespace = PageGetHeapFreeSpace(page);
    1066                 : 
    1067             357 :                 UnlockReleaseBuffer(buf);
    1068             357 :                 RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
    1069             357 :                 continue;
    1070                 :             }
    1071                 : 
    1072                 :             /*
    1073                 :              * There was no call to lazy_vacuum_heap_page() because pruning
    1074                 :              * didn't encounter/create any LP_DEAD items that needed to be
    1075                 :              * vacuumed.  Prune state has not been invalidated, so proceed
    1076                 :              * with prunestate-driven visibility map and FSM steps (just like
    1077                 :              * the two-pass strategy).
    1078 ECB             :              */
    1079 CBC        7721 :             Assert(dead_items->num_items == 0);
    1080 ECB             :         }
    1081                 : 
    1082                 :         /*
    1083                 :          * Handle setting visibility map bit based on information from the VM
    1084                 :          * (as of last lazy_scan_skip() call), and from prunestate
    1085                 :          */
    1086 GIC      164732 :         if (!all_visible_according_to_vm && prunestate.all_visible)
    1087          129786 :         {
    1088          129786 :             uint8       flags = VISIBILITYMAP_ALL_VISIBLE;
    1089                 : 
    1090          129786 :             if (prunestate.all_frozen)
    1091                 :             {
    1092 GNC      121554 :                 Assert(!TransactionIdIsValid(prunestate.visibility_cutoff_xid));
    1093 CBC      121554 :                 flags |= VISIBILITYMAP_ALL_FROZEN;
    1094                 :             }
    1095 EUB             : 
    1096                 :             /*
    1097                 :              * It should never be the case that the visibility map page is set
    1098                 :              * while the page-level bit is clear, but the reverse is allowed
    1099                 :              * (if checksums are not enabled).  Regardless, set both bits so
    1100                 :              * that we get back in sync.
    1101                 :              *
    1102                 :              * NB: If the heap page is all-visible but the VM bit is not set,
    1103                 :              * we don't need to dirty the heap page.  However, if checksums
    1104                 :              * are enabled, we do need to make sure that the heap page is
    1105                 :              * dirtied before passing it to visibilitymap_set(), because it
    1106                 :              * may be logged.  Given that this situation should only happen in
    1107                 :              * rare cases after a crash, it is not worth optimizing.
    1108                 :              */
    1109 GIC      129786 :             PageSetAllVisible(page);
    1110          129786 :             MarkBufferDirty(buf);
    1111          129786 :             visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
    1112                 :                               vmbuffer, prunestate.visibility_cutoff_xid,
    1113                 :                               flags);
    1114                 :         }
    1115                 : 
    1116                 :         /*
    1117                 :          * As of PostgreSQL 9.2, the visibility map bit should never be set if
    1118 ECB             :          * the page-level bit is clear.  However, it's possible that the bit
    1119                 :          * got cleared after lazy_scan_skip() was called, so we must recheck
    1120 EUB             :          * with buffer lock before concluding that the VM is corrupt.
    1121                 :          */
    1122 GNC       34946 :         else if (all_visible_according_to_vm && !PageIsAllVisible(page) &&
    1123 UNC           0 :                  visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0)
    1124 EUB             :         {
    1125 UIC           0 :             elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
    1126                 :                  vacrel->relname, blkno);
    1127               0 :             visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
    1128                 :                                 VISIBILITYMAP_VALID_BITS);
    1129                 :         }
    1130                 : 
    1131                 :         /*
    1132                 :          * It's possible for the value returned by
    1133 ECB             :          * GetOldestNonRemovableTransactionId() to move backwards, so it's not
    1134                 :          * wrong for us to see tuples that appear to not be visible to
    1135                 :          * everyone yet, while PD_ALL_VISIBLE is already set. The real safe
    1136                 :          * xmin value never moves backwards, but
    1137                 :          * GetOldestNonRemovableTransactionId() is conservative and sometimes
    1138                 :          * returns a value that's unnecessarily small, so if we see that
    1139                 :          * contradiction it just means that the tuples that we think are not
    1140                 :          * visible to everyone yet actually are, and the PD_ALL_VISIBLE flag
    1141                 :          * is correct.
    1142                 :          *
    1143                 :          * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE
    1144 EUB             :          * set, however.
    1145                 :          */
    1146 GIC       34946 :         else if (prunestate.has_lpdead_items && PageIsAllVisible(page))
    1147                 :         {
    1148 UIC           0 :             elog(WARNING, "page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
    1149                 :                  vacrel->relname, blkno);
    1150               0 :             PageClearAllVisible(page);
    1151               0 :             MarkBufferDirty(buf);
    1152               0 :             visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
    1153                 :                                 VISIBILITYMAP_VALID_BITS);
    1154                 :         }
    1155 ECB             : 
    1156                 :         /*
    1157                 :          * If the all-visible page is all-frozen but not marked as such yet,
    1158                 :          * mark it as all-frozen.  Note that all_frozen is only valid if
    1159                 :          * all_visible is true, so we must check both prunestate fields.
    1160                 :          */
    1161 GIC       34946 :         else if (all_visible_according_to_vm && prunestate.all_visible &&
    1162            4044 :                  prunestate.all_frozen &&
    1163            3645 :                  !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
    1164                 :         {
    1165                 :             /*
    1166                 :              * Avoid relying on all_visible_according_to_vm as a proxy for the
    1167                 :              * page-level PD_ALL_VISIBLE bit being set, since it might have
    1168                 :              * become stale -- even when all_visible is set in prunestate
    1169                 :              */
    1170 GNC          11 :             if (!PageIsAllVisible(page))
    1171                 :             {
    1172 UNC           0 :                 PageSetAllVisible(page);
    1173               0 :                 MarkBufferDirty(buf);
    1174                 :             }
    1175                 : 
    1176                 :             /*
    1177                 :              * Set the page all-frozen (and all-visible) in the VM.
    1178                 :              *
    1179                 :              * We can pass InvalidTransactionId as our visibility_cutoff_xid,
    1180                 :              * since a snapshotConflictHorizon sufficient to make everything
    1181                 :              * safe for REDO was logged when the page's tuples were frozen.
    1182                 :              */
    1183 GNC          11 :             Assert(!TransactionIdIsValid(prunestate.visibility_cutoff_xid));
    1184 GIC          11 :             visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
    1185                 :                               vmbuffer, InvalidTransactionId,
    1186                 :                               VISIBILITYMAP_ALL_VISIBLE |
    1187                 :                               VISIBILITYMAP_ALL_FROZEN);
    1188                 :         }
    1189                 : 
    1190                 :         /*
    1191                 :          * Final steps for block: drop cleanup lock, record free space in the
    1192                 :          * FSM
    1193                 :          */
    1194          164732 :         if (prunestate.has_lpdead_items && vacrel->do_index_vacuuming)
    1195                 :         {
    1196                 :             /*
    1197                 :              * Wait until lazy_vacuum_heap_rel() to save free space.  This
    1198                 :              * doesn't just save us some cycles; it also allows us to record
    1199                 :              * any additional free space that lazy_vacuum_heap_page() will
    1200                 :              * make available in cases where it's possible to truncate the
    1201 ECB             :              * page's line pointer array.
    1202                 :              *
    1203                 :              * Note: It's not in fact 100% certain that we really will call
    1204                 :              * lazy_vacuum_heap_rel() -- lazy_vacuum() might yet opt to skip
    1205                 :              * index vacuuming (and so must skip heap vacuuming).  This is
    1206                 :              * deemed okay because it only happens in emergencies, or when
    1207                 :              * there is very little free space anyway. (Besides, we start
    1208                 :              * recording free space in the FSM once index vacuuming has been
    1209                 :              * abandoned.)
    1210                 :              *
    1211                 :              * Note: The one-pass (no indexes) case is only supposed to make
    1212                 :              * it this far when there were no LP_DEAD items during pruning.
    1213                 :              */
    1214 CBC       25199 :             Assert(vacrel->nindexes > 0);
    1215           25199 :             UnlockReleaseBuffer(buf);
    1216                 :         }
    1217                 :         else
    1218 ECB             :         {
    1219 GIC      139533 :             Size        freespace = PageGetHeapFreeSpace(page);
    1220                 : 
    1221 CBC      139533 :             UnlockReleaseBuffer(buf);
    1222 GIC      139533 :             RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
    1223 ECB             :         }
    1224                 :     }
    1225                 : 
    1226 GIC       36739 :     vacrel->blkno = InvalidBlockNumber;
    1227           36739 :     if (BufferIsValid(vmbuffer))
    1228           14699 :         ReleaseBuffer(vmbuffer);
    1229 ECB             : 
    1230                 :     /* report that everything is now scanned */
    1231 CBC       36739 :     pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
    1232                 : 
    1233                 :     /* now we can compute the new value for pg_class.reltuples */
    1234 GIC       73478 :     vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
    1235                 :                                                      vacrel->scanned_pages,
    1236           36739 :                                                      vacrel->live_tuples);
    1237 ECB             : 
    1238                 :     /*
    1239                 :      * Also compute the total number of surviving heap entries.  In the
    1240                 :      * (unlikely) scenario that new_live_tuples is -1, take it as zero.
    1241                 :      */
    1242 GIC       36739 :     vacrel->new_rel_tuples =
    1243           36739 :         Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
    1244 CBC       36739 :         vacrel->missed_dead_tuples;
    1245 ECB             : 
    1246                 :     /*
    1247                 :      * Do index vacuuming (call each index's ambulkdelete routine), then do
    1248                 :      * related heap vacuuming
    1249                 :      */
    1250 GIC       36739 :     if (dead_items->num_items > 0)
    1251 CBC        1765 :         lazy_vacuum(vacrel);
    1252 ECB             : 
    1253                 :     /*
    1254                 :      * Vacuum the remainder of the Free Space Map.  We must do this whether or
    1255                 :      * not there were indexes, and whether or not we bypassed index vacuuming.
    1256                 :      */
    1257 GIC       36739 :     if (blkno > next_fsm_block_to_vacuum)
    1258           14699 :         FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno);
    1259                 : 
    1260                 :     /* report all blocks vacuumed */
    1261           36739 :     pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
    1262                 : 
    1263                 :     /* Do final index cleanup (call each index's amvacuumcleanup routine) */
    1264           36739 :     if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
    1265           34805 :         lazy_cleanup_all_indexes(vacrel);
    1266           36739 : }
    1267                 : 
    1268                 : /*
    1269                 :  *  lazy_scan_skip() -- set up range of skippable blocks using visibility map.
    1270                 :  *
    1271                 :  * lazy_scan_heap() calls here every time it needs to set up a new range of
    1272                 :  * blocks to skip via the visibility map.  Caller passes the next block in
    1273                 :  * line.  We return a next_unskippable_block for this range.  When there are
    1274                 :  * no skippable blocks we just return caller's next_block.  The all-visible
    1275                 :  * status of the returned block is set in *next_unskippable_allvis for caller,
    1276                 :  * too.  Block usually won't be all-visible (since it's unskippable), but it
    1277                 :  * can be during aggressive VACUUMs (as well as in certain edge cases).
    1278 ECB             :  *
    1279                 :  * Sets *skipping_current_range to indicate if caller should skip this range.
    1280                 :  * Costs and benefits drive our decision.  Very small ranges won't be skipped.
    1281                 :  *
    1282                 :  * Note: our opinion of which blocks can be skipped can go stale immediately.
    1283                 :  * It's okay if caller "misses" a page whose all-visible or all-frozen marking
    1284                 :  * was concurrently cleared, though.  All that matters is that caller scan all
    1285                 :  * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
    1286                 :  * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
    1287                 :  * older XIDs/MXIDs.  The vacrel->skippedallvis flag will be set here when the
    1288                 :  * choice to skip such a range is actually made, making everything safe.)
    1289                 :  */
    1290                 : static BlockNumber
    1291 GIC      198597 : lazy_scan_skip(LVRelState *vacrel, Buffer *vmbuffer, BlockNumber next_block,
    1292                 :                bool *next_unskippable_allvis, bool *skipping_current_range)
    1293 ECB             : {
    1294 GIC      198597 :     BlockNumber rel_pages = vacrel->rel_pages,
    1295 CBC      198597 :                 next_unskippable_block = next_block,
    1296          198597 :                 nskippable_blocks = 0;
    1297          198597 :     bool        skipsallvis = false;
    1298                 : 
    1299 GIC      198597 :     *next_unskippable_allvis = true;
    1300          210697 :     while (next_unskippable_block < rel_pages)
    1301                 :     {
    1302          173958 :         uint8       mapbits = visibilitymap_get_status(vacrel->rel,
    1303                 :                                                        next_unskippable_block,
    1304                 :                                                        vmbuffer);
    1305                 : 
    1306          173958 :         if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
    1307                 :         {
    1308          160671 :             Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0);
    1309          160671 :             *next_unskippable_allvis = false;
    1310 CBC      160671 :             break;
    1311 ECB             :         }
    1312                 : 
    1313                 :         /*
    1314                 :          * Caller must scan the last page to determine whether it has tuples
    1315                 :          * (caller must have the opportunity to set vacrel->nonempty_pages).
    1316                 :          * This rule avoids having lazy_truncate_heap() take access-exclusive
    1317                 :          * lock on rel to attempt a truncation that fails anyway, just because
    1318                 :          * there are tuples on the last page (it is likely that there will be
    1319                 :          * tuples on other nearby pages as well, but those can be skipped).
    1320                 :          *
    1321                 :          * Implement this by always treating the last block as unsafe to skip.
    1322                 :          */
    1323 GIC       13287 :         if (next_unskippable_block == rel_pages - 1)
    1324             807 :             break;
    1325                 : 
    1326 ECB             :         /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
    1327 GIC       12480 :         if (!vacrel->skipwithvm)
    1328                 :         {
    1329                 :             /* Caller shouldn't rely on all_visible_according_to_vm */
    1330 GNC         378 :             *next_unskippable_allvis = false;
    1331 CBC         378 :             break;
    1332                 :         }
    1333 ECB             : 
    1334                 :         /*
    1335                 :          * Aggressive VACUUM caller can't skip pages just because they are
    1336                 :          * all-visible.  They may still skip all-frozen pages, which can't
    1337                 :          * contain XIDs < OldestXmin (XIDs that aren't already frozen by now).
    1338                 :          */
    1339 CBC       12102 :         if ((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0)
    1340                 :         {
    1341 GIC        3166 :             if (vacrel->aggressive)
    1342 CBC           2 :                 break;
    1343 ECB             : 
    1344                 :             /*
    1345                 :              * All-visible block is safe to skip in non-aggressive case.  But
    1346                 :              * remember that the final range contains such a block for later.
    1347                 :              */
    1348 GIC        3164 :             skipsallvis = true;
    1349                 :         }
    1350                 : 
    1351           12100 :         vacuum_delay_point();
    1352           12100 :         next_unskippable_block++;
    1353           12100 :         nskippable_blocks++;
    1354                 :     }
    1355                 : 
    1356                 :     /*
    1357 ECB             :      * We only skip a range with at least SKIP_PAGES_THRESHOLD consecutive
    1358                 :      * pages.  Since we're reading sequentially, the OS should be doing
    1359                 :      * readahead for us, so there's no gain in skipping a page now and then.
    1360                 :      * Skipping such a range might even discourage sequential detection.
    1361                 :      *
    1362                 :      * This test also enables more frequent relfrozenxid advancement during
    1363                 :      * non-aggressive VACUUMs.  If the range has any all-visible pages then
    1364                 :      * skipping makes updating relfrozenxid unsafe, which is a real downside.
    1365                 :      */
    1366 CBC      198597 :     if (nskippable_blocks < SKIP_PAGES_THRESHOLD)
    1367 GIC      198504 :         *skipping_current_range = false;
    1368                 :     else
    1369                 :     {
    1370              93 :         *skipping_current_range = true;
    1371              93 :         if (skipsallvis)
    1372              22 :             vacrel->skippedallvis = true;
    1373                 :     }
    1374                 : 
    1375          198597 :     return next_unskippable_block;
    1376                 : }
    1377                 : 
    1378                 : /*
    1379                 :  *  lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
    1380                 :  *
    1381                 :  * Must call here to handle both new and empty pages before calling
    1382                 :  * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
    1383                 :  * with new or empty pages.
    1384                 :  *
    1385                 :  * It's necessary to consider new pages as a special case, since the rules for
    1386                 :  * maintaining the visibility map and FSM with empty pages are a little
    1387                 :  * different (though new pages can be truncated away during rel truncation).
    1388                 :  *
    1389                 :  * Empty pages are not really a special case -- they're just heap pages that
    1390                 :  * have no allocated tuples (including even LP_UNUSED items).  You might
    1391                 :  * wonder why we need to handle them here all the same.  It's only necessary
    1392                 :  * because of a corner-case involving a hard crash during heap relation
    1393                 :  * extension.  If we ever make relation-extension crash safe, then it should
    1394                 :  * no longer be necessary to deal with empty pages here (or new pages, for
    1395                 :  * that matter).
    1396                 :  *
    1397                 :  * Caller must hold at least a shared lock.  We might need to escalate the
    1398 ECB             :  * lock in that case, so the type of lock caller holds needs to be specified
    1399                 :  * using 'sharelock' argument.
    1400                 :  *
    1401                 :  * Returns false in common case where caller should go on to call
    1402                 :  * lazy_scan_prune (or lazy_scan_noprune).  Otherwise returns true, indicating
    1403                 :  * that lazy_scan_heap is done processing the page, releasing lock on caller's
    1404                 :  * behalf.
    1405                 :  */
    1406                 : static bool
    1407 GIC      165093 : lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
    1408                 :                        Page page, bool sharelock, Buffer vmbuffer)
    1409                 : {
    1410                 :     Size        freespace;
    1411                 : 
    1412          165093 :     if (PageIsNew(page))
    1413                 :     {
    1414                 :         /*
    1415                 :          * All-zeroes pages can be left over if either a backend extends the
    1416                 :          * relation by a single page, but crashes before the newly initialized
    1417                 :          * page has been written out, or when bulk-extending the relation
    1418                 :          * (which creates a number of empty pages at the tail end of the
    1419                 :          * relation), and then enters them into the FSM.
    1420                 :          *
    1421                 :          * Note we do not enter the page into the visibilitymap. That has the
    1422                 :          * downside that we repeatedly visit this page in subsequent vacuums,
    1423                 :          * but otherwise we'll never discover the space on a promoted standby.
    1424 EUB             :          * The harm of repeated checking ought to normally not be too bad. The
    1425                 :          * space usually should be used at some point, otherwise there
    1426                 :          * wouldn't be any regular vacuums.
    1427                 :          *
    1428                 :          * Make sure these pages are in the FSM, to ensure they can be reused.
    1429                 :          * Do that by testing if there's any space recorded for the page. If
    1430                 :          * not, enter it. We do so after releasing the lock on the heap page,
    1431                 :          * the FSM is approximate, after all.
    1432                 :          */
    1433 UBC           0 :         UnlockReleaseBuffer(buf);
    1434                 : 
    1435 UIC           0 :         if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
    1436 ECB             :         {
    1437 UIC           0 :             freespace = BLCKSZ - SizeOfPageHeaderData;
    1438                 : 
    1439               0 :             RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
    1440                 :         }
    1441                 : 
    1442               0 :         return true;
    1443 EUB             :     }
    1444                 : 
    1445 GBC      165093 :     if (PageIsEmpty(page))
    1446 EUB             :     {
    1447                 :         /*
    1448                 :          * It seems likely that caller will always be able to get a cleanup
    1449                 :          * lock on an empty page.  But don't take any chances -- escalate to
    1450                 :          * an exclusive lock (still don't need a cleanup lock, though).
    1451                 :          */
    1452 UIC           0 :         if (sharelock)
    1453                 :         {
    1454               0 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
    1455               0 :             LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
    1456                 : 
    1457               0 :             if (!PageIsEmpty(page))
    1458                 :             {
    1459                 :                 /* page isn't new or empty -- keep lock and pin for now */
    1460               0 :                 return false;
    1461                 :             }
    1462                 :         }
    1463 EUB             :         else
    1464                 :         {
    1465                 :             /* Already have a full cleanup lock (which is more than enough) */
    1466                 :         }
    1467                 : 
    1468                 :         /*
    1469                 :          * Unlike new pages, empty pages are always set all-visible and
    1470                 :          * all-frozen.
    1471                 :          */
    1472 UIC           0 :         if (!PageIsAllVisible(page))
    1473                 :         {
    1474               0 :             START_CRIT_SECTION();
    1475                 : 
    1476                 :             /* mark buffer dirty before writing a WAL record */
    1477               0 :             MarkBufferDirty(buf);
    1478                 : 
    1479 EUB             :             /*
    1480                 :              * It's possible that another backend has extended the heap,
    1481                 :              * initialized the page, and then failed to WAL-log the page due
    1482                 :              * to an ERROR.  Since heap extension is not WAL-logged, recovery
    1483                 :              * might try to replay our record setting the page all-visible and
    1484                 :              * find that the page isn't initialized, which will cause a PANIC.
    1485                 :              * To prevent that, check whether the page has been previously
    1486                 :              * WAL-logged, and if not, do that now.
    1487                 :              */
    1488 UIC           0 :             if (RelationNeedsWAL(vacrel->rel) &&
    1489               0 :                 PageGetLSN(page) == InvalidXLogRecPtr)
    1490 UBC           0 :                 log_newpage_buffer(buf, true);
    1491 EUB             : 
    1492 UBC           0 :             PageSetAllVisible(page);
    1493               0 :             visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
    1494                 :                               vmbuffer, InvalidTransactionId,
    1495                 :                               VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
    1496 UIC           0 :             END_CRIT_SECTION();
    1497 ECB             :         }
    1498                 : 
    1499 UIC           0 :         freespace = PageGetHeapFreeSpace(page);
    1500               0 :         UnlockReleaseBuffer(buf);
    1501               0 :         RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
    1502               0 :         return true;
    1503                 :     }
    1504                 : 
    1505                 :     /* page isn't new or empty -- keep lock and pin */
    1506 GIC      165093 :     return false;
    1507                 : }
    1508                 : 
    1509                 : /*
    1510                 :  *  lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
    1511                 :  *
    1512                 :  * Caller must hold pin and buffer cleanup lock on the buffer.
    1513                 :  *
    1514                 :  * Prior to PostgreSQL 14 there were very rare cases where heap_page_prune()
    1515                 :  * was allowed to disagree with our HeapTupleSatisfiesVacuum() call about
    1516                 :  * whether or not a tuple should be considered DEAD.  This happened when an
    1517                 :  * inserting transaction concurrently aborted (after our heap_page_prune()
    1518                 :  * call, before our HeapTupleSatisfiesVacuum() call).  There was rather a lot
    1519                 :  * of complexity just so we could deal with tuples that were DEAD to VACUUM,
    1520                 :  * but nevertheless were left with storage after pruning.
    1521 ECB             :  *
    1522                 :  * The approach we take now is to restart pruning when the race condition is
    1523                 :  * detected.  This allows heap_page_prune() to prune the tuples inserted by
    1524                 :  * the now-aborted transaction.  This is a little crude, but it guarantees
    1525                 :  * that any items that make it into the dead_items array are simple LP_DEAD
    1526                 :  * line pointers, and that every remaining item with tuple storage is
    1527                 :  * considered as a candidate for freezing.
    1528                 :  */
    1529                 : static void
    1530 GIC      165089 : lazy_scan_prune(LVRelState *vacrel,
    1531                 :                 Buffer buf,
    1532                 :                 BlockNumber blkno,
    1533                 :                 Page page,
    1534                 :                 LVPagePruneState *prunestate)
    1535                 : {
    1536          165089 :     Relation    rel = vacrel->rel;
    1537                 :     OffsetNumber offnum,
    1538                 :                 maxoff;
    1539                 :     ItemId      itemid;
    1540 ECB             :     HeapTupleData tuple;
    1541                 :     HTSV_Result res;
    1542                 :     int         tuples_deleted,
    1543                 :                 tuples_frozen,
    1544                 :                 lpdead_items,
    1545                 :                 live_tuples,
    1546                 :                 recently_dead_tuples;
    1547                 :     int         nnewlpdead;
    1548                 :     HeapPageFreeze pagefrz;
    1549 GNC      165089 :     int64       fpi_before = pgWalUsage.wal_fpi;
    1550                 :     OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
    1551                 :     HeapTupleFreeze frozen[MaxHeapTuplesPerPage];
    1552                 : 
    1553 CBC      165089 :     Assert(BufferGetBlockNumber(buf) == blkno);
    1554                 : 
    1555                 :     /*
    1556 ECB             :      * maxoff might be reduced following line pointer array truncation in
    1557                 :      * heap_page_prune.  That's safe for us to ignore, since the reclaimed
    1558                 :      * space will continue to look like LP_UNUSED items below.
    1559                 :      */
    1560 CBC      165089 :     maxoff = PageGetMaxOffsetNumber(page);
    1561 ECB             : 
    1562 CBC      165089 : retry:
    1563 ECB             : 
    1564                 :     /* Initialize (or reset) page-level state */
    1565 GNC      165089 :     pagefrz.freeze_required = false;
    1566          165089 :     pagefrz.FreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
    1567          165089 :     pagefrz.FreezePageRelminMxid = vacrel->NewRelminMxid;
    1568          165089 :     pagefrz.NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
    1569          165089 :     pagefrz.NoFreezePageRelminMxid = vacrel->NewRelminMxid;
    1570 GIC      165089 :     tuples_deleted = 0;
    1571 GNC      165089 :     tuples_frozen = 0;
    1572 GIC      165089 :     lpdead_items = 0;
    1573          165089 :     live_tuples = 0;
    1574          165089 :     recently_dead_tuples = 0;
    1575                 : 
    1576                 :     /*
    1577                 :      * Prune all HOT-update chains in this page.
    1578                 :      *
    1579                 :      * We count tuples removed by the pruning step as tuples_deleted.  Its
    1580 ECB             :      * final value can be thought of as the number of tuples that have been
    1581                 :      * deleted from the table.  It should not be confused with lpdead_items;
    1582                 :      * lpdead_items's final value can be thought of as the number of tuples
    1583                 :      * that were deleted from indexes.
    1584                 :      */
    1585 GIC      165089 :     tuples_deleted = heap_page_prune(rel, buf, vacrel->vistest,
    1586                 :                                      InvalidTransactionId, 0, &nnewlpdead,
    1587                 :                                      &vacrel->offnum);
    1588 ECB             : 
    1589                 :     /*
    1590                 :      * Now scan the page to collect LP_DEAD items and check for tuples
    1591                 :      * requiring freezing among remaining tuples with storage
    1592                 :      */
    1593 GIC      165089 :     prunestate->hastup = false;
    1594 CBC      165089 :     prunestate->has_lpdead_items = false;
    1595          165089 :     prunestate->all_visible = true;
    1596          165089 :     prunestate->all_frozen = true;
    1597 GIC      165089 :     prunestate->visibility_cutoff_xid = InvalidTransactionId;
    1598                 : 
    1599          165089 :     for (offnum = FirstOffsetNumber;
    1600        10322367 :          offnum <= maxoff;
    1601        10157278 :          offnum = OffsetNumberNext(offnum))
    1602                 :     {
    1603                 :         bool        totally_frozen;
    1604 ECB             : 
    1605                 :         /*
    1606                 :          * Set the offset number so that we can display it along with any
    1607                 :          * error that occurred while processing this tuple.
    1608                 :          */
    1609 GIC    10157278 :         vacrel->offnum = offnum;
    1610 CBC    10157278 :         itemid = PageGetItemId(page, offnum);
    1611                 : 
    1612 GIC    10157278 :         if (!ItemIdIsUsed(itemid))
    1613 CBC     1016162 :             continue;
    1614 ECB             : 
    1615                 :         /* Redirect items mustn't be touched */
    1616 GIC    10029656 :         if (ItemIdIsRedirected(itemid))
    1617 ECB             :         {
    1618                 :             /* page makes rel truncation unsafe */
    1619 GNC       67635 :             prunestate->hastup = true;
    1620 GIC       67635 :             continue;
    1621                 :         }
    1622                 : 
    1623         9962021 :         if (ItemIdIsDead(itemid))
    1624 ECB             :         {
    1625                 :             /*
    1626                 :              * Deliberately don't set hastup for LP_DEAD items.  We make the
    1627                 :              * soft assumption that any LP_DEAD items encountered here will
    1628                 :              * become LP_UNUSED later on, before count_nondeletable_pages is
    1629                 :              * reached.  If we don't make this assumption then rel truncation
    1630                 :              * will only happen every other VACUUM, at most.  Besides, VACUUM
    1631                 :              * must treat hastup/nonempty_pages as provisional no matter how
    1632                 :              * LP_DEAD items are handled (handled here, or handled later on).
    1633                 :              *
    1634                 :              * Also deliberately delay unsetting all_visible until just before
    1635                 :              * we return to lazy_scan_heap caller, as explained in full below.
    1636                 :              * (This is another case where it's useful to anticipate that any
    1637                 :              * LP_DEAD items will become LP_UNUSED during the ongoing VACUUM.)
    1638                 :              */
    1639 CBC      820905 :             deadoffsets[lpdead_items++] = offnum;
    1640 GIC      820905 :             continue;
    1641                 :         }
    1642                 : 
    1643         9141116 :         Assert(ItemIdIsNormal(itemid));
    1644                 : 
    1645         9141116 :         ItemPointerSet(&(tuple.t_self), blkno, offnum);
    1646         9141116 :         tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
    1647 CBC     9141116 :         tuple.t_len = ItemIdGetLength(itemid);
    1648 GIC     9141116 :         tuple.t_tableOid = RelationGetRelid(rel);
    1649                 : 
    1650 ECB             :         /*
    1651 EUB             :          * DEAD tuples are almost always pruned into LP_DEAD line pointers by
    1652                 :          * heap_page_prune(), but it's possible that the tuple state changed
    1653                 :          * since heap_page_prune() looked.  Handle that here by restarting.
    1654                 :          * (See comments at the top of function for a full explanation.)
    1655                 :          */
    1656 GNC     9141116 :         res = HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
    1657                 :                                        buf);
    1658                 : 
    1659 GIC     9141116 :         if (unlikely(res == HEAPTUPLE_DEAD))
    1660 UIC           0 :             goto retry;
    1661                 : 
    1662                 :         /*
    1663                 :          * The criteria for counting a tuple as live in this block need to
    1664                 :          * match what analyze.c's acquire_sample_rows() does, otherwise VACUUM
    1665                 :          * and ANALYZE may produce wildly different reltuples values, e.g.
    1666                 :          * when there are many recently-dead tuples.
    1667                 :          *
    1668                 :          * The logic here is a bit simpler than acquire_sample_rows(), as
    1669                 :          * VACUUM can't run inside a transaction block, which makes some cases
    1670                 :          * impossible (e.g. in-progress insert from the same transaction).
    1671                 :          *
    1672                 :          * We treat LP_DEAD items (which are the closest thing to DEAD tuples
    1673 ECB             :          * that might be seen here) differently, too: we assume that they'll
    1674                 :          * become LP_UNUSED before VACUUM finishes.  This difference is only
    1675                 :          * superficial.  VACUUM effectively agrees with ANALYZE about DEAD
    1676                 :          * items, in the end.  VACUUM won't remember LP_DEAD items, but only
    1677                 :          * because they're not supposed to be left behind when it is done.
    1678                 :          * (Cases where we bypass index vacuuming will violate this optimistic
    1679                 :          * assumption, but the overall impact of that should be negligible.)
    1680                 :          */
    1681 CBC     9141116 :         switch (res)
    1682                 :         {
    1683 GIC     8821538 :             case HEAPTUPLE_LIVE:
    1684                 : 
    1685                 :                 /*
    1686                 :                  * Count it as live.  Not only is this natural, but it's also
    1687                 :                  * what acquire_sample_rows() does.
    1688                 :                  */
    1689         8821538 :                 live_tuples++;
    1690                 : 
    1691 ECB             :                 /*
    1692                 :                  * Is the tuple definitely visible to all transactions?
    1693                 :                  *
    1694                 :                  * NB: Like with per-tuple hint bits, we can't set the
    1695                 :                  * PD_ALL_VISIBLE flag if the inserter committed
    1696                 :                  * asynchronously. See SetHintBits for more info. Check that
    1697                 :                  * the tuple is hinted xmin-committed because of that.
    1698                 :                  */
    1699 GIC     8821538 :                 if (prunestate->all_visible)
    1700                 :                 {
    1701                 :                     TransactionId xmin;
    1702                 : 
    1703         8435183 :                     if (!HeapTupleHeaderXminCommitted(tuple.t_data))
    1704                 :                     {
    1705 CBC          34 :                         prunestate->all_visible = false;
    1706              34 :                         break;
    1707                 :                     }
    1708                 : 
    1709 ECB             :                     /*
    1710                 :                      * The inserter definitely committed. But is it old enough
    1711                 :                      * that everyone sees it as committed?
    1712                 :                      */
    1713 GIC     8435149 :                     xmin = HeapTupleHeaderGetXmin(tuple.t_data);
    1714 GNC     8435149 :                     if (!TransactionIdPrecedes(xmin,
    1715                 :                                                vacrel->cutoffs.OldestXmin))
    1716                 :                     {
    1717 CBC        3129 :                         prunestate->all_visible = false;
    1718 GIC        3129 :                         break;
    1719 ECB             :                     }
    1720                 : 
    1721                 :                     /* Track newest xmin on page. */
    1722 GNC     8432020 :                     if (TransactionIdFollows(xmin, prunestate->visibility_cutoff_xid) &&
    1723                 :                         TransactionIdIsNormal(xmin))
    1724 GIC      483725 :                         prunestate->visibility_cutoff_xid = xmin;
    1725                 :                 }
    1726         8818375 :                 break;
    1727          319145 :             case HEAPTUPLE_RECENTLY_DEAD:
    1728 ECB             : 
    1729                 :                 /*
    1730                 :                  * If tuple is recently dead then we must not remove it from
    1731                 :                  * the relation.  (We only remove items that are LP_DEAD from
    1732                 :                  * pruning.)
    1733                 :                  */
    1734 GIC      319145 :                 recently_dead_tuples++;
    1735          319145 :                 prunestate->all_visible = false;
    1736          319145 :                 break;
    1737             429 :             case HEAPTUPLE_INSERT_IN_PROGRESS:
    1738                 : 
    1739                 :                 /*
    1740 ECB             :                  * We do not count these rows as live, because we expect the
    1741                 :                  * inserting transaction to update the counters at commit, and
    1742                 :                  * we assume that will happen only after we report our
    1743                 :                  * results.  This assumption is a bit shaky, but it is what
    1744                 :                  * acquire_sample_rows() does, so be consistent.
    1745                 :                  */
    1746 GIC         429 :                 prunestate->all_visible = false;
    1747             429 :                 break;
    1748               4 :             case HEAPTUPLE_DELETE_IN_PROGRESS:
    1749                 :                 /* This is an expected case during concurrent vacuum */
    1750               4 :                 prunestate->all_visible = false;
    1751 ECB             : 
    1752                 :                 /*
    1753 EUB             :                  * Count such rows as live.  As above, we assume the deleting
    1754                 :                  * transaction will commit and update the counters after we
    1755                 :                  * report.
    1756                 :                  */
    1757 GIC           4 :                 live_tuples++;
    1758 CBC           4 :                 break;
    1759 UIC           0 :             default:
    1760               0 :                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
    1761 ECB             :                 break;
    1762                 :         }
    1763                 : 
    1764 GIC     9141116 :         prunestate->hastup = true;   /* page makes rel truncation unsafe */
    1765                 : 
    1766                 :         /* Tuple with storage -- consider need to freeze */
    1767 GNC     9141116 :         if (heap_prepare_freeze_tuple(tuple.t_data, &vacrel->cutoffs, &pagefrz,
    1768                 :                                       &frozen[tuples_frozen], &totally_frozen))
    1769                 :         {
    1770                 :             /* Save prepared freeze plan for later */
    1771         4474824 :             frozen[tuples_frozen++].offset = offnum;
    1772                 :         }
    1773                 : 
    1774 ECB             :         /*
    1775                 :          * If any tuple isn't either totally frozen already or eligible to
    1776                 :          * become totally frozen (according to its freeze plan), then the page
    1777                 :          * definitely cannot be set all-frozen in the visibility map later on
    1778                 :          */
    1779 GNC     9141116 :         if (!totally_frozen)
    1780 GIC      706764 :             prunestate->all_frozen = false;
    1781                 :     }
    1782                 : 
    1783 ECB             :     /*
    1784                 :      * We have now divided every item on the page into either an LP_DEAD item
    1785                 :      * that will need to be vacuumed in indexes later, or a LP_NORMAL tuple
    1786                 :      * that remains and needs to be considered for freezing now (LP_UNUSED and
    1787                 :      * LP_REDIRECT items also remain, but are of no further interest to us).
    1788                 :      */
    1789 GNC      165089 :     vacrel->offnum = InvalidOffsetNumber;
    1790                 : 
    1791 ECB             :     /*
    1792                 :      * Freeze the page when heap_prepare_freeze_tuple indicates that at least
    1793                 :      * one XID/MXID from before FreezeLimit/MultiXactCutoff is present.  Also
    1794                 :      * freeze when pruning generated an FPI, if doing so means that we set the
    1795                 :      * page all-frozen afterwards (might not happen until final heap pass).
    1796                 :      */
    1797 GNC      165089 :     if (pagefrz.freeze_required || tuples_frozen == 0 ||
    1798           13782 :         (prunestate->all_visible && prunestate->all_frozen &&
    1799           13553 :          fpi_before != pgWalUsage.wal_fpi))
    1800                 :     {
    1801                 :         /*
    1802                 :          * We're freezing the page.  Our final NewRelfrozenXid doesn't need to
    1803                 :          * be affected by the XIDs that are just about to be frozen anyway.
    1804                 :          */
    1805          151486 :         vacrel->NewRelfrozenXid = pagefrz.FreezePageRelfrozenXid;
    1806          151486 :         vacrel->NewRelminMxid = pagefrz.FreezePageRelminMxid;
    1807                 : 
    1808          151486 :         if (tuples_frozen == 0)
    1809                 :         {
    1810                 :             /*
    1811                 :              * We have no freeze plans to execute, so there's no added cost
    1812                 :              * from following the freeze path.  That's why it was chosen.
    1813                 :              * This is important in the case where the page only contains
    1814                 :              * totally frozen tuples at this point (perhaps only following
    1815                 :              * pruning).  Such pages can be marked all-frozen in the VM by our
    1816                 :              * caller, even though none of its tuples were newly frozen here
    1817                 :              * (note that the "no freeze" path never sets pages all-frozen).
    1818                 :              *
    1819                 :              * We never increment the frozen_pages instrumentation counter
    1820                 :              * here, since it only counts pages with newly frozen tuples
    1821                 :              * (don't confuse that with pages newly set all-frozen in VM).
    1822                 :              */
    1823                 :         }
    1824                 :         else
    1825                 :         {
    1826                 :             TransactionId snapshotConflictHorizon;
    1827                 : 
    1828           80091 :             vacrel->frozen_pages++;
    1829                 : 
    1830                 :             /*
    1831                 :              * We can use visibility_cutoff_xid as our cutoff for conflicts
    1832                 :              * when the whole page is eligible to become all-frozen in the VM
    1833                 :              * once we're done with it.  Otherwise we generate a conservative
    1834                 :              * cutoff by stepping back from OldestXmin.
    1835                 :              */
    1836           80091 :             if (prunestate->all_visible && prunestate->all_frozen)
    1837                 :             {
    1838                 :                 /* Using same cutoff when setting VM is now unnecessary */
    1839           80067 :                 snapshotConflictHorizon = prunestate->visibility_cutoff_xid;
    1840           80067 :                 prunestate->visibility_cutoff_xid = InvalidTransactionId;
    1841                 :             }
    1842                 :             else
    1843                 :             {
    1844                 :                 /* Avoids false conflicts when hot_standby_feedback in use */
    1845              24 :                 snapshotConflictHorizon = vacrel->cutoffs.OldestXmin;
    1846              24 :                 TransactionIdRetreat(snapshotConflictHorizon);
    1847                 :             }
    1848                 : 
    1849                 :             /* Execute all freeze plans for page as a single atomic action */
    1850           80091 :             heap_freeze_execute_prepared(vacrel->rel, buf,
    1851                 :                                          snapshotConflictHorizon,
    1852                 :                                          frozen, tuples_frozen);
    1853                 :         }
    1854                 :     }
    1855                 :     else
    1856                 :     {
    1857                 :         /*
    1858                 :          * Page requires "no freeze" processing.  It might be set all-visible
    1859                 :          * in the visibility map, but it can never be set all-frozen.
    1860                 :          */
    1861           13603 :         vacrel->NewRelfrozenXid = pagefrz.NoFreezePageRelfrozenXid;
    1862           13603 :         vacrel->NewRelminMxid = pagefrz.NoFreezePageRelminMxid;
    1863           13603 :         prunestate->all_frozen = false;
    1864           13603 :         tuples_frozen = 0;      /* avoid miscounts in instrumentation */
    1865 ECB             :     }
    1866                 : 
    1867                 :     /*
    1868                 :      * VACUUM will call heap_page_is_all_visible() during the second pass over
    1869                 :      * the heap to determine all_visible and all_frozen for the page -- this
    1870                 :      * is a specialized version of the logic from this function.  Now that
    1871                 :      * we've finished pruning and freezing, make sure that we're in total
    1872                 :      * agreement with heap_page_is_all_visible() using an assertion.
    1873                 :      */
    1874                 : #ifdef USE_ASSERT_CHECKING
    1875                 :     /* Note that all_frozen value does not matter when !all_visible */
    1876 GNC      165089 :     if (prunestate->all_visible && lpdead_items == 0)
    1877                 :     {
    1878                 :         TransactionId cutoff;
    1879                 :         bool        all_frozen;
    1880                 : 
    1881 GIC      133830 :         if (!heap_page_is_all_visible(vacrel, buf, &cutoff, &all_frozen))
    1882 UIC           0 :             Assert(false);
    1883                 : 
    1884 GNC      133830 :         Assert(!TransactionIdIsValid(cutoff) ||
    1885                 :                cutoff == prunestate->visibility_cutoff_xid);
    1886                 :     }
    1887                 : #endif
    1888                 : 
    1889                 :     /*
    1890                 :      * Now save details of the LP_DEAD items from the page in vacrel
    1891                 :      */
    1892 CBC      165089 :     if (lpdead_items > 0)
    1893                 :     {
    1894           25629 :         VacDeadItems *dead_items = vacrel->dead_items;
    1895                 :         ItemPointerData tmp;
    1896                 : 
    1897           25629 :         vacrel->lpdead_item_pages++;
    1898 GNC       25629 :         prunestate->has_lpdead_items = true;
    1899                 : 
    1900 CBC       25629 :         ItemPointerSetBlockNumber(&tmp, blkno);
    1901                 : 
    1902          846534 :         for (int i = 0; i < lpdead_items; i++)
    1903 ECB             :         {
    1904 GIC      820905 :             ItemPointerSetOffsetNumber(&tmp, deadoffsets[i]);
    1905          820905 :             dead_items->items[dead_items->num_items++] = tmp;
    1906 ECB             :         }
    1907                 : 
    1908 CBC       25629 :         Assert(dead_items->num_items <= dead_items->max_items);
    1909 GIC       25629 :         pgstat_progress_update_param(PROGRESS_VACUUM_NUM_DEAD_TUPLES,
    1910           25629 :                                      dead_items->num_items);
    1911                 : 
    1912                 :         /*
    1913                 :          * It was convenient to ignore LP_DEAD items in all_visible earlier on
    1914                 :          * to make the choice of whether or not to freeze the page unaffected
    1915                 :          * by the short-term presence of LP_DEAD items.  These LP_DEAD items
    1916                 :          * were effectively assumed to be LP_UNUSED items in the making.  It
    1917                 :          * doesn't matter which heap pass (initial pass or final pass) ends up
    1918                 :          * setting the page all-frozen, as long as the ongoing VACUUM does it.
    1919                 :          *
    1920                 :          * Now that freezing has been finalized, unset all_visible.  It needs
    1921                 :          * to reflect the present state of things, as expected by our caller.
    1922                 :          */
    1923 GNC       25629 :         prunestate->all_visible = false;
    1924                 :     }
    1925                 : 
    1926                 :     /* Finally, add page-local counts to whole-VACUUM counts */
    1927 GIC      165089 :     vacrel->tuples_deleted += tuples_deleted;
    1928 GNC      165089 :     vacrel->tuples_frozen += tuples_frozen;
    1929 GIC      165089 :     vacrel->lpdead_items += lpdead_items;
    1930          165089 :     vacrel->live_tuples += live_tuples;
    1931          165089 :     vacrel->recently_dead_tuples += recently_dead_tuples;
    1932          165089 : }
    1933                 : 
    1934                 : /*
    1935 ECB             :  *  lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
    1936                 :  *
    1937                 :  * Caller need only hold a pin and share lock on the buffer, unlike
    1938                 :  * lazy_scan_prune, which requires a full cleanup lock.  While pruning isn't
    1939                 :  * performed here, it's quite possible that an earlier opportunistic pruning
    1940                 :  * operation left LP_DEAD items behind.  We'll at least collect any such items
    1941                 :  * in the dead_items array for removal from indexes.
    1942                 :  *
    1943                 :  * For aggressive VACUUM callers, we may return false to indicate that a full
    1944                 :  * cleanup lock is required for processing by lazy_scan_prune.  This is only
    1945                 :  * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
    1946                 :  * one or more tuples on the page.  We always return true for non-aggressive
    1947                 :  * callers.
    1948                 :  *
    1949                 :  * See lazy_scan_prune for an explanation of hastup return flag.
    1950                 :  * recordfreespace flag instructs caller on whether or not it should do
    1951                 :  * generic FSM processing for page.
    1952                 :  */
    1953                 : static bool
    1954 GIC           4 : lazy_scan_noprune(LVRelState *vacrel,
    1955                 :                   Buffer buf,
    1956                 :                   BlockNumber blkno,
    1957                 :                   Page page,
    1958                 :                   bool *hastup,
    1959                 :                   bool *recordfreespace)
    1960                 : {
    1961                 :     OffsetNumber offnum,
    1962                 :                 maxoff;
    1963                 :     int         lpdead_items,
    1964                 :                 live_tuples,
    1965                 :                 recently_dead_tuples,
    1966 ECB             :                 missed_dead_tuples;
    1967                 :     HeapTupleHeader tupleheader;
    1968 GNC           4 :     TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
    1969               4 :     MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
    1970                 :     OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
    1971                 : 
    1972 GIC           4 :     Assert(BufferGetBlockNumber(buf) == blkno);
    1973                 : 
    1974               4 :     *hastup = false;            /* for now */
    1975               4 :     *recordfreespace = false;   /* for now */
    1976                 : 
    1977               4 :     lpdead_items = 0;
    1978               4 :     live_tuples = 0;
    1979               4 :     recently_dead_tuples = 0;
    1980 CBC           4 :     missed_dead_tuples = 0;
    1981 ECB             : 
    1982 GIC           4 :     maxoff = PageGetMaxOffsetNumber(page);
    1983               4 :     for (offnum = FirstOffsetNumber;
    1984 CBC          90 :          offnum <= maxoff;
    1985 GIC          86 :          offnum = OffsetNumberNext(offnum))
    1986 ECB             :     {
    1987                 :         ItemId      itemid;
    1988                 :         HeapTupleData tuple;
    1989                 : 
    1990 CBC          86 :         vacrel->offnum = offnum;
    1991              86 :         itemid = PageGetItemId(page, offnum);
    1992 ECB             : 
    1993 GIC          86 :         if (!ItemIdIsUsed(itemid))
    1994 LBC           0 :             continue;
    1995 ECB             : 
    1996 CBC          86 :         if (ItemIdIsRedirected(itemid))
    1997 ECB             :         {
    1998 UIC           0 :             *hastup = true;
    1999               0 :             continue;
    2000                 :         }
    2001                 : 
    2002 CBC          86 :         if (ItemIdIsDead(itemid))
    2003 ECB             :         {
    2004                 :             /*
    2005                 :              * Deliberately don't set hastup=true here.  See same point in
    2006 EUB             :              * lazy_scan_prune for an explanation.
    2007                 :              */
    2008 LBC           0 :             deadoffsets[lpdead_items++] = offnum;
    2009 UIC           0 :             continue;
    2010 EUB             :         }
    2011                 : 
    2012 GIC          86 :         *hastup = true;         /* page prevents rel truncation */
    2013              86 :         tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
    2014 GNC          86 :         if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs,
    2015                 :                                      &NoFreezePageRelfrozenXid,
    2016                 :                                      &NoFreezePageRelminMxid))
    2017                 :         {
    2018                 :             /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
    2019 GBC          64 :             if (vacrel->aggressive)
    2020 EUB             :             {
    2021                 :                 /*
    2022                 :                  * Aggressive VACUUMs must always be able to advance rel's
    2023 ECB             :                  * relfrozenxid to a value >= FreezeLimit (and be able to
    2024                 :                  * advance rel's relminmxid to a value >= MultiXactCutoff).
    2025                 :                  * The ongoing aggressive VACUUM won't be able to do that
    2026                 :                  * unless it can freeze an XID (or MXID) from this tuple now.
    2027                 :                  *
    2028                 :                  * The only safe option is to have caller perform processing
    2029                 :                  * of this page using lazy_scan_prune.  Caller might have to
    2030                 :                  * wait a while for a cleanup lock, but it can't be helped.
    2031                 :                  */
    2032 UIC           0 :                 vacrel->offnum = InvalidOffsetNumber;
    2033               0 :                 return false;
    2034                 :             }
    2035                 : 
    2036                 :             /*
    2037                 :              * Non-aggressive VACUUMs are under no obligation to advance
    2038                 :              * relfrozenxid (even by one XID).  We can be much laxer here.
    2039                 :              *
    2040                 :              * Currently we always just accept an older final relfrozenxid
    2041                 :              * and/or relminmxid value.  We never make caller wait or work a
    2042                 :              * little harder, even when it likely makes sense to do so.
    2043 EUB             :              */
    2044                 :         }
    2045                 : 
    2046 GIC          86 :         ItemPointerSet(&(tuple.t_self), blkno, offnum);
    2047              86 :         tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
    2048              86 :         tuple.t_len = ItemIdGetLength(itemid);
    2049              86 :         tuple.t_tableOid = RelationGetRelid(vacrel->rel);
    2050                 : 
    2051 GNC          86 :         switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
    2052                 :                                          buf))
    2053                 :         {
    2054 GIC          83 :             case HEAPTUPLE_DELETE_IN_PROGRESS:
    2055                 :             case HEAPTUPLE_LIVE:
    2056                 : 
    2057                 :                 /*
    2058 ECB             :                  * Count both cases as live, just like lazy_scan_prune
    2059                 :                  */
    2060 CBC          83 :                 live_tuples++;
    2061 ECB             : 
    2062 GIC          83 :                 break;
    2063 CBC           1 :             case HEAPTUPLE_DEAD:
    2064                 : 
    2065                 :                 /*
    2066 ECB             :                  * There is some useful work for pruning to do, that won't be
    2067                 :                  * done due to failure to get a cleanup lock.
    2068                 :                  */
    2069 GIC           1 :                 missed_dead_tuples++;
    2070               1 :                 break;
    2071               2 :             case HEAPTUPLE_RECENTLY_DEAD:
    2072 ECB             : 
    2073                 :                 /*
    2074                 :                  * Count in recently_dead_tuples, just like lazy_scan_prune
    2075                 :                  */
    2076 GIC           2 :                 recently_dead_tuples++;
    2077               2 :                 break;
    2078 UIC           0 :             case HEAPTUPLE_INSERT_IN_PROGRESS:
    2079                 : 
    2080                 :                 /*
    2081 ECB             :                  * Do not count these rows as live, just like lazy_scan_prune
    2082                 :                  */
    2083 LBC           0 :                 break;
    2084 UIC           0 :             default:
    2085               0 :                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
    2086                 :                 break;
    2087                 :         }
    2088 ECB             :     }
    2089                 : 
    2090 GBC           4 :     vacrel->offnum = InvalidOffsetNumber;
    2091                 : 
    2092                 :     /*
    2093                 :      * By here we know for sure that caller can put off freezing and pruning
    2094                 :      * this particular page until the next VACUUM.  Remember its details now.
    2095 EUB             :      * (lazy_scan_prune expects a clean slate, so we have to do this last.)
    2096                 :      */
    2097 GNC           4 :     vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
    2098               4 :     vacrel->NewRelminMxid = NoFreezePageRelminMxid;
    2099                 : 
    2100                 :     /* Save any LP_DEAD items found on the page in dead_items array */
    2101 GIC           4 :     if (vacrel->nindexes == 0)
    2102 ECB             :     {
    2103                 :         /* Using one-pass strategy (since table has no indexes) */
    2104 UIC           0 :         if (lpdead_items > 0)
    2105                 :         {
    2106                 :             /*
    2107                 :              * Perfunctory handling for the corner case where a single pass
    2108                 :              * strategy VACUUM cannot get a cleanup lock, and it turns out
    2109 ECB             :              * that there is one or more LP_DEAD items: just count the LP_DEAD
    2110                 :              * items as missed_dead_tuples instead. (This is a bit dishonest,
    2111                 :              * but it beats having to maintain specialized heap vacuuming code
    2112                 :              * forever, for vanishingly little benefit.)
    2113                 :              */
    2114 UIC           0 :             *hastup = true;
    2115               0 :             missed_dead_tuples += lpdead_items;
    2116 EUB             :         }
    2117                 : 
    2118 UIC           0 :         *recordfreespace = true;
    2119                 :     }
    2120 GIC           4 :     else if (lpdead_items == 0)
    2121                 :     {
    2122                 :         /*
    2123                 :          * Won't be vacuuming this page later, so record page's freespace in
    2124                 :          * the FSM now
    2125                 :          */
    2126 GBC           4 :         *recordfreespace = true;
    2127 EUB             :     }
    2128                 :     else
    2129                 :     {
    2130 UBC           0 :         VacDeadItems *dead_items = vacrel->dead_items;
    2131                 :         ItemPointerData tmp;
    2132 ECB             : 
    2133                 :         /*
    2134                 :          * Page has LP_DEAD items, and so any references/TIDs that remain in
    2135                 :          * indexes will be deleted during index vacuuming (and then marked
    2136                 :          * LP_UNUSED in the heap)
    2137                 :          */
    2138 LBC           0 :         vacrel->lpdead_item_pages++;
    2139                 : 
    2140 UIC           0 :         ItemPointerSetBlockNumber(&tmp, blkno);
    2141                 : 
    2142 UBC           0 :         for (int i = 0; i < lpdead_items; i++)
    2143                 :         {
    2144 UIC           0 :             ItemPointerSetOffsetNumber(&tmp, deadoffsets[i]);
    2145               0 :             dead_items->items[dead_items->num_items++] = tmp;
    2146                 :         }
    2147                 : 
    2148               0 :         Assert(dead_items->num_items <= dead_items->max_items);
    2149               0 :         pgstat_progress_update_param(PROGRESS_VACUUM_NUM_DEAD_TUPLES,
    2150 UBC           0 :                                      dead_items->num_items);
    2151                 : 
    2152               0 :         vacrel->lpdead_items += lpdead_items;
    2153                 : 
    2154 EUB             :         /*
    2155                 :          * Assume that we'll go on to vacuum this heap page during final pass
    2156                 :          * over the heap.  Don't record free space until then.
    2157                 :          */
    2158 UIC           0 :         *recordfreespace = false;
    2159                 :     }
    2160 EUB             : 
    2161                 :     /*
    2162                 :      * Finally, add relevant page-local counts to whole-VACUUM counts
    2163                 :      */
    2164 GBC           4 :     vacrel->live_tuples += live_tuples;
    2165 GIC           4 :     vacrel->recently_dead_tuples += recently_dead_tuples;
    2166               4 :     vacrel->missed_dead_tuples += missed_dead_tuples;
    2167               4 :     if (missed_dead_tuples > 0)
    2168               1 :         vacrel->missed_dead_pages++;
    2169                 : 
    2170 EUB             :     /* Caller won't need to call lazy_scan_prune with same page */
    2171 GIC           4 :     return true;
    2172                 : }
    2173                 : 
    2174                 : /*
    2175                 :  * Main entry point for index vacuuming and heap vacuuming.
    2176 ECB             :  *
    2177                 :  * Removes items collected in dead_items from table's indexes, then marks the
    2178                 :  * same items LP_UNUSED in the heap.  See the comments above lazy_scan_heap
    2179                 :  * for full details.
    2180                 :  *
    2181                 :  * Also empties dead_items, freeing up space for later TIDs.
    2182                 :  *
    2183                 :  * We may choose to bypass index vacuuming at this point, though only when the
    2184                 :  * ongoing VACUUM operation will definitely only have one index scan/round of
    2185                 :  * index vacuuming.
    2186                 :  */
    2187                 : static void
    2188 GIC        1765 : lazy_vacuum(LVRelState *vacrel)
    2189                 : {
    2190                 :     bool        bypass;
    2191                 : 
    2192                 :     /* Should not end up here with no indexes */
    2193            1765 :     Assert(vacrel->nindexes > 0);
    2194            1765 :     Assert(vacrel->lpdead_item_pages > 0);
    2195                 : 
    2196            1765 :     if (!vacrel->do_index_vacuuming)
    2197                 :     {
    2198               9 :         Assert(!vacrel->do_index_cleanup);
    2199               9 :         vacrel->dead_items->num_items = 0;
    2200 CBC           9 :         return;
    2201                 :     }
    2202                 : 
    2203                 :     /*
    2204                 :      * Consider bypassing index vacuuming (and heap vacuuming) entirely.
    2205 ECB             :      *
    2206                 :      * We currently only do this in cases where the number of LP_DEAD items
    2207                 :      * for the entire VACUUM operation is close to zero.  This avoids sharp
    2208                 :      * discontinuities in the duration and overhead of successive VACUUM
    2209                 :      * operations that run against the same table with a fixed workload.
    2210                 :      * Ideally, successive VACUUM operations will behave as if there are
    2211                 :      * exactly zero LP_DEAD items in cases where there are close to zero.
    2212                 :      *
    2213                 :      * This is likely to be helpful with a table that is continually affected
    2214                 :      * by UPDATEs that can mostly apply the HOT optimization, but occasionally
    2215                 :      * have small aberrations that lead to just a few heap pages retaining
    2216                 :      * only one or two LP_DEAD items.  This is pretty common; even when the
    2217                 :      * DBA goes out of their way to make UPDATEs use HOT, it is practically
    2218                 :      * impossible to predict whether HOT will be applied in 100% of cases.
    2219                 :      * It's far easier to ensure that 99%+ of all UPDATEs against a table use
    2220                 :      * HOT through careful tuning.
    2221                 :      */
    2222 GIC        1756 :     bypass = false;
    2223            1756 :     if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
    2224                 :     {
    2225                 :         BlockNumber threshold;
    2226                 : 
    2227            1745 :         Assert(vacrel->num_index_scans == 0);
    2228            1745 :         Assert(vacrel->lpdead_items == vacrel->dead_items->num_items);
    2229            1745 :         Assert(vacrel->do_index_vacuuming);
    2230            1745 :         Assert(vacrel->do_index_cleanup);
    2231                 : 
    2232                 :         /*
    2233                 :          * This crossover point at which we'll start to do index vacuuming is
    2234 ECB             :          * expressed as a percentage of the total number of heap pages in the
    2235                 :          * table that are known to have at least one LP_DEAD item.  This is
    2236                 :          * much more important than the total number of LP_DEAD items, since
    2237                 :          * it's a proxy for the number of heap pages whose visibility map bits
    2238                 :          * cannot be set on account of bypassing index and heap vacuuming.
    2239                 :          *
    2240                 :          * We apply one further precautionary test: the space currently used
    2241                 :          * to store the TIDs (TIDs that now all point to LP_DEAD items) must
    2242                 :          * not exceed 32MB.  This limits the risk that we will bypass index
    2243                 :          * vacuuming again and again until eventually there is a VACUUM whose
    2244                 :          * dead_items space is not CPU cache resident.
    2245                 :          *
    2246                 :          * We don't take any special steps to remember the LP_DEAD items (such
    2247                 :          * as counting them in our final update to the stats system) when the
    2248                 :          * optimization is applied.  Though the accounting used in analyze.c's
    2249                 :          * acquire_sample_rows() will recognize the same LP_DEAD items as dead
    2250                 :          * rows in its own stats report, that's okay. The discrepancy should
    2251                 :          * be negligible.  If this optimization is ever expanded to cover more
    2252                 :          * cases then this may need to be reconsidered.
    2253                 :          */
    2254 GIC        1745 :         threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
    2255            1747 :         bypass = (vacrel->lpdead_item_pages < threshold &&
    2256               2 :                   vacrel->lpdead_items < MAXDEADITEMS(32L * 1024L * 1024L));
    2257                 :     }
    2258                 : 
    2259            1756 :     if (bypass)
    2260                 :     {
    2261                 :         /*
    2262                 :          * There are almost zero TIDs.  Behave as if there were precisely
    2263                 :          * zero: bypass index vacuuming, but do index cleanup.
    2264                 :          *
    2265                 :          * We expect that the ongoing VACUUM operation will finish very
    2266 ECB             :          * quickly, so there is no point in considering speeding up as a
    2267                 :          * failsafe against wraparound failure. (Index cleanup is expected to
    2268                 :          * finish very quickly in cases where there were no ambulkdelete()
    2269                 :          * calls.)
    2270                 :          */
    2271 CBC           2 :         vacrel->do_index_vacuuming = false;
    2272                 :     }
    2273 GIC        1754 :     else if (lazy_vacuum_all_indexes(vacrel))
    2274                 :     {
    2275                 :         /*
    2276                 :          * We successfully completed a round of index vacuuming.  Do related
    2277                 :          * heap vacuuming now.
    2278                 :          */
    2279            1754 :         lazy_vacuum_heap_rel(vacrel);
    2280                 :     }
    2281                 :     else
    2282                 :     {
    2283 ECB             :         /*
    2284                 :          * Failsafe case.
    2285                 :          *
    2286                 :          * We attempted index vacuuming, but didn't finish a full round/full
    2287                 :          * index scan.  This happens when relfrozenxid or relminmxid is too
    2288                 :          * far in the past.
    2289                 :          *
    2290                 :          * From this point on the VACUUM operation will do no further index
    2291                 :          * vacuuming or heap vacuuming.  This VACUUM operation won't end up
    2292                 :          * back here again.
    2293                 :          */
    2294 UNC           0 :         Assert(VacuumFailsafeActive);
    2295                 :     }
    2296                 : 
    2297                 :     /*
    2298                 :      * Forget the LP_DEAD items that we just vacuumed (or just decided to not
    2299                 :      * vacuum)
    2300                 :      */
    2301 GIC        1756 :     vacrel->dead_items->num_items = 0;
    2302                 : }
    2303                 : 
    2304                 : /*
    2305                 :  *  lazy_vacuum_all_indexes() -- Main entry for index vacuuming
    2306 EUB             :  *
    2307                 :  * Returns true in the common case when all indexes were successfully
    2308                 :  * vacuumed.  Returns false in rare cases where we determined that the ongoing
    2309                 :  * VACUUM operation is at risk of taking too long to finish, leading to
    2310                 :  * wraparound failure.
    2311                 :  */
    2312                 : static bool
    2313 CBC        1754 : lazy_vacuum_all_indexes(LVRelState *vacrel)
    2314                 : {
    2315 GIC        1754 :     bool        allindexes = true;
    2316 GNC        1754 :     double      old_live_tuples = vacrel->rel->rd_rel->reltuples;
    2317                 : 
    2318 GIC        1754 :     Assert(vacrel->nindexes > 0);
    2319            1754 :     Assert(vacrel->do_index_vacuuming);
    2320            1754 :     Assert(vacrel->do_index_cleanup);
    2321                 : 
    2322                 :     /* Precheck for XID wraparound emergencies */
    2323            1754 :     if (lazy_check_wraparound_failsafe(vacrel))
    2324                 :     {
    2325                 :         /* Wraparound emergency -- don't even start an index scan */
    2326 LBC           0 :         return false;
    2327                 :     }
    2328 ECB             : 
    2329                 :     /* Report that we are now vacuuming indexes */
    2330 GIC        1754 :     pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
    2331 ECB             :                                  PROGRESS_VACUUM_PHASE_VACUUM_INDEX);
    2332                 : 
    2333 CBC        1754 :     if (!ParallelVacuumIsActive(vacrel))
    2334                 :     {
    2335 GIC        5531 :         for (int idx = 0; idx < vacrel->nindexes; idx++)
    2336 ECB             :         {
    2337 GIC        3782 :             Relation    indrel = vacrel->indrels[idx];
    2338            3782 :             IndexBulkDeleteResult *istat = vacrel->indstats[idx];
    2339 EUB             : 
    2340 GNC        3782 :             vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
    2341                 :                                                           old_live_tuples,
    2342                 :                                                           vacrel);
    2343 ECB             : 
    2344 GIC        3782 :             if (lazy_check_wraparound_failsafe(vacrel))
    2345                 :             {
    2346 ECB             :                 /* Wraparound emergency -- end current index scan */
    2347 UIC           0 :                 allindexes = false;
    2348 LBC           0 :                 break;
    2349                 :             }
    2350 ECB             :         }
    2351                 :     }
    2352                 :     else
    2353                 :     {
    2354                 :         /* Outsource everything to parallel variant */
    2355 GNC           5 :         parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples,
    2356                 :                                             vacrel->num_index_scans);
    2357 ECB             : 
    2358                 :         /*
    2359                 :          * Do a postcheck to consider applying wraparound failsafe now.  Note
    2360 EUB             :          * that parallel VACUUM only gets the precheck and this postcheck.
    2361                 :          */
    2362 GIC           5 :         if (lazy_check_wraparound_failsafe(vacrel))
    2363 UIC           0 :             allindexes = false;
    2364                 :     }
    2365                 : 
    2366                 :     /*
    2367                 :      * We delete all LP_DEAD items from the first heap pass in all indexes on
    2368 ECB             :      * each call here (except calls where we choose to do the failsafe). This
    2369                 :      * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
    2370                 :      * of the failsafe triggering, which prevents the next call from taking
    2371                 :      * place).
    2372                 :      */
    2373 GIC        1754 :     Assert(vacrel->num_index_scans > 0 ||
    2374                 :            vacrel->dead_items->num_items == vacrel->lpdead_items);
    2375 GNC        1754 :     Assert(allindexes || VacuumFailsafeActive);
    2376 EUB             : 
    2377                 :     /*
    2378                 :      * Increase and report the number of index scans.
    2379                 :      *
    2380                 :      * We deliberately include the case where we started a round of bulk
    2381                 :      * deletes that we weren't able to finish due to the failsafe triggering.
    2382                 :      */
    2383 GIC        1754 :     vacrel->num_index_scans++;
    2384            1754 :     pgstat_progress_update_param(PROGRESS_VACUUM_NUM_INDEX_VACUUMS,
    2385            1754 :                                  vacrel->num_index_scans);
    2386 ECB             : 
    2387 GIC        1754 :     return allindexes;
    2388 ECB             : }
    2389                 : 
    2390                 : /*
    2391                 :  *  lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
    2392                 :  *
    2393                 :  * This routine marks LP_DEAD items in vacrel->dead_items array as LP_UNUSED.
    2394                 :  * Pages that never had lazy_scan_prune record LP_DEAD items are not visited
    2395                 :  * at all.
    2396                 :  *
    2397                 :  * We may also be able to truncate the line pointer array of the heap pages we
    2398                 :  * visit.  If there is a contiguous group of LP_UNUSED items at the end of the
    2399                 :  * array, it can be reclaimed as free space.  These LP_UNUSED items usually
    2400                 :  * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
    2401                 :  * each page to LP_UNUSED, and then consider if it's possible to truncate the
    2402                 :  * page's line pointer array).
    2403                 :  *
    2404                 :  * Note: the reason for doing this as a second pass is we cannot remove the
    2405                 :  * tuples until we've removed their index entries, and we want to process
    2406                 :  * index entry removal in batches as large as possible.
    2407                 :  */
    2408                 : static void
    2409 GIC        1754 : lazy_vacuum_heap_rel(LVRelState *vacrel)
    2410                 : {
    2411 GNC        1754 :     int         index = 0;
    2412            1754 :     BlockNumber vacuumed_pages = 0;
    2413 GIC        1754 :     Buffer      vmbuffer = InvalidBuffer;
    2414                 :     LVSavedErrInfo saved_err_info;
    2415                 : 
    2416            1754 :     Assert(vacrel->do_index_vacuuming);
    2417            1754 :     Assert(vacrel->do_index_cleanup);
    2418            1754 :     Assert(vacrel->num_index_scans > 0);
    2419                 : 
    2420                 :     /* Report that we are now vacuuming the heap */
    2421            1754 :     pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
    2422 ECB             :                                  PROGRESS_VACUUM_PHASE_VACUUM_HEAP);
    2423                 : 
    2424                 :     /* Update error traceback information */
    2425 CBC        1754 :     update_vacuum_error_info(vacrel, &saved_err_info,
    2426 ECB             :                              VACUUM_ERRCB_PHASE_VACUUM_HEAP,
    2427                 :                              InvalidBlockNumber, InvalidOffsetNumber);
    2428                 : 
    2429 GIC       26951 :     while (index < vacrel->dead_items->num_items)
    2430                 :     {
    2431                 :         BlockNumber blkno;
    2432                 :         Buffer      buf;
    2433                 :         Page        page;
    2434                 :         Size        freespace;
    2435 ECB             : 
    2436 GIC       25197 :         vacuum_delay_point();
    2437                 : 
    2438 GNC       25197 :         blkno = ItemPointerGetBlockNumber(&vacrel->dead_items->items[index]);
    2439           25197 :         vacrel->blkno = blkno;
    2440                 : 
    2441                 :         /*
    2442                 :          * Pin the visibility map page in case we need to mark the page
    2443                 :          * all-visible.  In most cases this will be very cheap, because we'll
    2444                 :          * already have the correct page pinned anyway.
    2445                 :          */
    2446           25197 :         visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
    2447                 : 
    2448                 :         /* We need a non-cleanup exclusive lock to mark dead_items unused */
    2449           25197 :         buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
    2450                 :                                  vacrel->bstrategy);
    2451 GIC       25197 :         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
    2452 GNC       25197 :         index = lazy_vacuum_heap_page(vacrel, blkno, buf, index, vmbuffer);
    2453                 : 
    2454                 :         /* Now that we've vacuumed the page, record its available space */
    2455 CBC       25197 :         page = BufferGetPage(buf);
    2456 GIC       25197 :         freespace = PageGetHeapFreeSpace(page);
    2457 ECB             : 
    2458 CBC       25197 :         UnlockReleaseBuffer(buf);
    2459 GNC       25197 :         RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
    2460 GIC       25197 :         vacuumed_pages++;
    2461                 :     }
    2462                 : 
    2463            1754 :     vacrel->blkno = InvalidBlockNumber;
    2464            1754 :     if (BufferIsValid(vmbuffer))
    2465 CBC        1754 :         ReleaseBuffer(vmbuffer);
    2466 ECB             : 
    2467                 :     /*
    2468                 :      * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
    2469                 :      * the second heap pass.  No more, no less.
    2470                 :      */
    2471 GIC        1754 :     Assert(index > 0);
    2472 CBC        1754 :     Assert(vacrel->num_index_scans > 1 ||
    2473 ECB             :            (index == vacrel->lpdead_items &&
    2474                 :             vacuumed_pages == vacrel->lpdead_item_pages));
    2475                 : 
    2476 GIC        1754 :     ereport(DEBUG2,
    2477 ECB             :             (errmsg("table \"%s\": removed %lld dead item identifiers in %u pages",
    2478                 :                     vacrel->relname, (long long) index, vacuumed_pages)));
    2479                 : 
    2480                 :     /* Revert to the previous phase information for error traceback */
    2481 GIC        1754 :     restore_vacuum_error_info(vacrel, &saved_err_info);
    2482            1754 : }
    2483                 : 
    2484                 : /*
    2485 ECB             :  *  lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
    2486                 :  *                        vacrel->dead_items array.
    2487                 :  *
    2488                 :  * Caller must have an exclusive buffer lock on the buffer (though a full
    2489                 :  * cleanup lock is also acceptable).  vmbuffer must be valid and already have
    2490                 :  * a pin on blkno's visibility map page.
    2491                 :  *
    2492                 :  * index is an offset into the vacrel->dead_items array for the first listed
    2493                 :  * LP_DEAD item on the page.  The return value is the first index immediately
    2494                 :  * after all LP_DEAD items for the same page in the array.
    2495                 :  */
    2496                 : static int
    2497 CBC       25554 : lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
    2498                 :                       int index, Buffer vmbuffer)
    2499                 : {
    2500 GIC       25554 :     VacDeadItems *dead_items = vacrel->dead_items;
    2501           25554 :     Page        page = BufferGetPage(buffer);
    2502                 :     OffsetNumber unused[MaxHeapTuplesPerPage];
    2503 GNC       25554 :     int         nunused = 0;
    2504                 :     TransactionId visibility_cutoff_xid;
    2505                 :     bool        all_frozen;
    2506                 :     LVSavedErrInfo saved_err_info;
    2507                 : 
    2508 GIC       25554 :     Assert(vacrel->nindexes == 0 || vacrel->do_index_vacuuming);
    2509                 : 
    2510           25554 :     pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
    2511                 : 
    2512 ECB             :     /* Update error traceback information */
    2513 GIC       25554 :     update_vacuum_error_info(vacrel, &saved_err_info,
    2514                 :                              VACUUM_ERRCB_PHASE_VACUUM_HEAP, blkno,
    2515 ECB             :                              InvalidOffsetNumber);
    2516                 : 
    2517 GIC       25554 :     START_CRIT_SECTION();
    2518 ECB             : 
    2519 GIC      845958 :     for (; index < dead_items->num_items; index++)
    2520                 :     {
    2521                 :         BlockNumber tblk;
    2522                 :         OffsetNumber toff;
    2523 ECB             :         ItemId      itemid;
    2524                 : 
    2525 CBC      843847 :         tblk = ItemPointerGetBlockNumber(&dead_items->items[index]);
    2526 GIC      843847 :         if (tblk != blkno)
    2527           23443 :             break;              /* past end of tuples for this block */
    2528 CBC      820404 :         toff = ItemPointerGetOffsetNumber(&dead_items->items[index]);
    2529 GIC      820404 :         itemid = PageGetItemId(page, toff);
    2530                 : 
    2531          820404 :         Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
    2532 CBC      820404 :         ItemIdSetUnused(itemid);
    2533 GNC      820404 :         unused[nunused++] = toff;
    2534 ECB             :     }
    2535                 : 
    2536 GNC       25554 :     Assert(nunused > 0);
    2537                 : 
    2538                 :     /* Attempt to truncate line pointer array now */
    2539 GIC       25554 :     PageTruncateLinePointerArray(page);
    2540 ECB             : 
    2541                 :     /*
    2542                 :      * Mark buffer dirty before we write WAL.
    2543                 :      */
    2544 CBC       25554 :     MarkBufferDirty(buffer);
    2545                 : 
    2546 ECB             :     /* XLOG stuff */
    2547 CBC       25554 :     if (RelationNeedsWAL(vacrel->rel))
    2548 ECB             :     {
    2549                 :         xl_heap_vacuum xlrec;
    2550                 :         XLogRecPtr  recptr;
    2551                 : 
    2552 GNC       24803 :         xlrec.nunused = nunused;
    2553                 : 
    2554 CBC       24803 :         XLogBeginInsert();
    2555 GIC       24803 :         XLogRegisterData((char *) &xlrec, SizeOfHeapVacuum);
    2556                 : 
    2557           24803 :         XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
    2558 GNC       24803 :         XLogRegisterBufData(0, (char *) unused, nunused * sizeof(OffsetNumber));
    2559 ECB             : 
    2560 GIC       24803 :         recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_VACUUM);
    2561                 : 
    2562 CBC       24803 :         PageSetLSN(page, recptr);
    2563                 :     }
    2564                 : 
    2565                 :     /*
    2566                 :      * End critical section, so we safely can do visibility tests (which
    2567 ECB             :      * possibly need to perform IO and allocate memory!). If we crash now the
    2568                 :      * page (including the corresponding vm bit) might not be marked all
    2569                 :      * visible, but that's fine. A later vacuum will fix that.
    2570                 :      */
    2571 GIC       25554 :     END_CRIT_SECTION();
    2572 ECB             : 
    2573                 :     /*
    2574                 :      * Now that we have removed the LD_DEAD items from the page, once again
    2575                 :      * check if the page has become all-visible.  The page is already marked
    2576                 :      * dirty, exclusively locked, and, if needed, a full page image has been
    2577                 :      * emitted.
    2578                 :      */
    2579 GNC       25554 :     Assert(!PageIsAllVisible(page));
    2580 GIC       25554 :     if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid,
    2581                 :                                  &all_frozen))
    2582                 :     {
    2583 GNC       25360 :         uint8       flags = VISIBILITYMAP_ALL_VISIBLE;
    2584                 : 
    2585           25360 :         if (all_frozen)
    2586                 :         {
    2587           20658 :             Assert(!TransactionIdIsValid(visibility_cutoff_xid));
    2588 CBC       20658 :             flags |= VISIBILITYMAP_ALL_FROZEN;
    2589                 :         }
    2590                 : 
    2591 GNC       25360 :         PageSetAllVisible(page);
    2592           25360 :         visibilitymap_set(vacrel->rel, blkno, buffer, InvalidXLogRecPtr,
    2593                 :                           vmbuffer, visibility_cutoff_xid, flags);
    2594                 :     }
    2595                 : 
    2596 ECB             :     /* Revert to the previous phase information for error traceback */
    2597 CBC       25554 :     restore_vacuum_error_info(vacrel, &saved_err_info);
    2598 GIC       25554 :     return index;
    2599                 : }
    2600                 : 
    2601                 : /*
    2602 ECB             :  * Trigger the failsafe to avoid wraparound failure when vacrel table has a
    2603                 :  * relfrozenxid and/or relminmxid that is dangerously far in the past.
    2604                 :  * Triggering the failsafe makes the ongoing VACUUM bypass any further index
    2605                 :  * vacuuming and heap vacuuming.  Truncating the heap is also bypassed.
    2606                 :  *
    2607                 :  * Any remaining work (work that VACUUM cannot just bypass) is typically sped
    2608                 :  * up when the failsafe triggers.  VACUUM stops applying any cost-based delay
    2609                 :  * that it started out with.
    2610                 :  *
    2611                 :  * Returns true when failsafe has been triggered.
    2612                 :  */
    2613                 : static bool
    2614 GIC       42280 : lazy_check_wraparound_failsafe(LVRelState *vacrel)
    2615                 : {
    2616 ECB             :     /* Don't warn more than once per VACUUM */
    2617 GNC       42280 :     if (VacuumFailsafeActive)
    2618 UIC           0 :         return true;
    2619 ECB             : 
    2620 GNC       42280 :     if (unlikely(vacuum_xid_failsafe_check(&vacrel->cutoffs)))
    2621 ECB             :     {
    2622 UNC           0 :         VacuumFailsafeActive = true;
    2623                 : 
    2624                 :         /*
    2625                 :          * Abandon use of a buffer access strategy to allow use of all of
    2626                 :          * shared buffers.  We assume the caller who allocated the memory for
    2627                 :          * the BufferAccessStrategy will free it.
    2628                 :          */
    2629               0 :         vacrel->bstrategy = NULL;
    2630 EUB             : 
    2631                 :         /* Disable index vacuuming, index cleanup, and heap rel truncation */
    2632 UIC           0 :         vacrel->do_index_vacuuming = false;
    2633               0 :         vacrel->do_index_cleanup = false;
    2634               0 :         vacrel->do_rel_truncate = false;
    2635                 : 
    2636               0 :         ereport(WARNING,
    2637 EUB             :                 (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
    2638                 :                         vacrel->dbname, vacrel->relnamespace, vacrel->relname,
    2639                 :                         vacrel->num_index_scans),
    2640                 :                  errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
    2641                 :                  errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
    2642                 :                          "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
    2643                 : 
    2644                 :         /* Stop applying cost limits from this point on */
    2645 UIC           0 :         VacuumCostActive = false;
    2646               0 :         VacuumCostBalance = 0;
    2647                 : 
    2648               0 :         return true;
    2649                 :     }
    2650                 : 
    2651 GBC       42280 :     return false;
    2652 EUB             : }
    2653                 : 
    2654                 : /*
    2655                 :  *  lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
    2656                 :  */
    2657 ECB             : static void
    2658 GIC       34805 : lazy_cleanup_all_indexes(LVRelState *vacrel)
    2659                 : {
    2660           34805 :     double      reltuples = vacrel->new_rel_tuples;
    2661           34805 :     bool        estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
    2662                 : 
    2663           34805 :     Assert(vacrel->do_index_cleanup);
    2664 CBC       34805 :     Assert(vacrel->nindexes > 0);
    2665                 : 
    2666 ECB             :     /* Report that we are now cleaning up indexes */
    2667 CBC       34805 :     pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
    2668                 :                                  PROGRESS_VACUUM_PHASE_INDEX_CLEANUP);
    2669 ECB             : 
    2670 CBC       34805 :     if (!ParallelVacuumIsActive(vacrel))
    2671                 :     {
    2672 GIC       89389 :         for (int idx = 0; idx < vacrel->nindexes; idx++)
    2673 ECB             :         {
    2674 GIC       54593 :             Relation    indrel = vacrel->indrels[idx];
    2675           54593 :             IndexBulkDeleteResult *istat = vacrel->indstats[idx];
    2676 ECB             : 
    2677 GIC       54593 :             vacrel->indstats[idx] =
    2678 CBC       54593 :                 lazy_cleanup_one_index(indrel, istat, reltuples,
    2679                 :                                        estimated_count, vacrel);
    2680 ECB             :         }
    2681                 :     }
    2682                 :     else
    2683                 :     {
    2684                 :         /* Outsource everything to parallel variant */
    2685 GIC           9 :         parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples,
    2686                 :                                             vacrel->num_index_scans,
    2687                 :                                             estimated_count);
    2688                 :     }
    2689           34805 : }
    2690                 : 
    2691 ECB             : /*
    2692                 :  *  lazy_vacuum_one_index() -- vacuum index relation.
    2693                 :  *
    2694                 :  *      Delete all the index tuples containing a TID collected in
    2695                 :  *      vacrel->dead_items array.  Also update running statistics.
    2696                 :  *      Exact details depend on index AM's ambulkdelete routine.
    2697                 :  *
    2698                 :  *      reltuples is the number of heap tuples to be passed to the
    2699                 :  *      bulkdelete callback.  It's always assumed to be estimated.
    2700                 :  *      See indexam.sgml for more info.
    2701                 :  *
    2702                 :  * Returns bulk delete stats derived from input stats
    2703                 :  */
    2704                 : static IndexBulkDeleteResult *
    2705 GIC        3782 : lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat,
    2706                 :                       double reltuples, LVRelState *vacrel)
    2707                 : {
    2708                 :     IndexVacuumInfo ivinfo;
    2709                 :     LVSavedErrInfo saved_err_info;
    2710                 : 
    2711 CBC        3782 :     ivinfo.index = indrel;
    2712 GNC        3782 :     ivinfo.heaprel = vacrel->rel;
    2713 GIC        3782 :     ivinfo.analyze_only = false;
    2714            3782 :     ivinfo.report_progress = false;
    2715            3782 :     ivinfo.estimated_count = true;
    2716            3782 :     ivinfo.message_level = DEBUG2;
    2717            3782 :     ivinfo.num_heap_tuples = reltuples;
    2718 CBC        3782 :     ivinfo.strategy = vacrel->bstrategy;
    2719 ECB             : 
    2720                 :     /*
    2721                 :      * Update error traceback information.
    2722                 :      *
    2723                 :      * The index name is saved during this phase and restored immediately
    2724                 :      * after this phase.  See vacuum_error_callback.
    2725                 :      */
    2726 GIC        3782 :     Assert(vacrel->indname == NULL);
    2727            3782 :     vacrel->indname = pstrdup(RelationGetRelationName(indrel));
    2728            3782 :     update_vacuum_error_info(vacrel, &saved_err_info,
    2729                 :                              VACUUM_ERRCB_PHASE_VACUUM_INDEX,
    2730                 :                              InvalidBlockNumber, InvalidOffsetNumber);
    2731                 : 
    2732                 :     /* Do bulk deletion */
    2733 CBC        3782 :     istat = vac_bulkdel_one_index(&ivinfo, istat, (void *) vacrel->dead_items);
    2734 ECB             : 
    2735                 :     /* Revert to the previous phase information for error traceback */
    2736 GIC        3782 :     restore_vacuum_error_info(vacrel, &saved_err_info);
    2737            3782 :     pfree(vacrel->indname);
    2738            3782 :     vacrel->indname = NULL;
    2739                 : 
    2740 CBC        3782 :     return istat;
    2741                 : }
    2742                 : 
    2743 ECB             : /*
    2744                 :  *  lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
    2745                 :  *
    2746                 :  *      Calls index AM's amvacuumcleanup routine.  reltuples is the number
    2747                 :  *      of heap tuples and estimated_count is true if reltuples is an
    2748                 :  *      estimated value.  See indexam.sgml for more info.
    2749                 :  *
    2750                 :  * Returns bulk delete stats derived from input stats
    2751                 :  */
    2752                 : static IndexBulkDeleteResult *
    2753 GIC       54593 : lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat,
    2754                 :                        double reltuples, bool estimated_count,
    2755                 :                        LVRelState *vacrel)
    2756                 : {
    2757                 :     IndexVacuumInfo ivinfo;
    2758                 :     LVSavedErrInfo saved_err_info;
    2759                 : 
    2760 CBC       54593 :     ivinfo.index = indrel;
    2761 GNC       54593 :     ivinfo.heaprel = vacrel->rel;
    2762 GIC       54593 :     ivinfo.analyze_only = false;
    2763           54593 :     ivinfo.report_progress = false;
    2764           54593 :     ivinfo.estimated_count = estimated_count;
    2765           54593 :     ivinfo.message_level = DEBUG2;
    2766                 : 
    2767           54593 :     ivinfo.num_heap_tuples = reltuples;
    2768 CBC       54593 :     ivinfo.strategy = vacrel->bstrategy;
    2769 ECB             : 
    2770                 :     /*
    2771                 :      * Update error traceback information.
    2772                 :      *
    2773                 :      * The index name is saved during this phase and restored immediately
    2774                 :      * after this phase.  See vacuum_error_callback.
    2775                 :      */
    2776 CBC       54593 :     Assert(vacrel->indname == NULL);
    2777 GIC       54593 :     vacrel->indname = pstrdup(RelationGetRelationName(indrel));
    2778           54593 :     update_vacuum_error_info(vacrel, &saved_err_info,
    2779                 :                              VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
    2780                 :                              InvalidBlockNumber, InvalidOffsetNumber);
    2781                 : 
    2782           54593 :     istat = vac_cleanup_one_index(&ivinfo, istat);
    2783                 : 
    2784 ECB             :     /* Revert to the previous phase information for error traceback */
    2785 CBC       54593 :     restore_vacuum_error_info(vacrel, &saved_err_info);
    2786           54593 :     pfree(vacrel->indname);
    2787 GIC       54593 :     vacrel->indname = NULL;
    2788                 : 
    2789           54593 :     return istat;
    2790 ECB             : }
    2791                 : 
    2792                 : /*
    2793                 :  * should_attempt_truncation - should we attempt to truncate the heap?
    2794                 :  *
    2795                 :  * Don't even think about it unless we have a shot at releasing a goodly
    2796                 :  * number of pages.  Otherwise, the time taken isn't worth it, mainly because
    2797                 :  * an AccessExclusive lock must be replayed on any hot standby, where it can
    2798                 :  * be particularly disruptive.
    2799                 :  *
    2800                 :  * Also don't attempt it if wraparound failsafe is in effect.  The entire
    2801                 :  * system might be refusing to allocate new XIDs at this point.  The system
    2802                 :  * definitely won't return to normal unless and until VACUUM actually advances
    2803                 :  * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
    2804                 :  * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
    2805                 :  * truncate the table under these circumstances, an XID exhaustion error might
    2806                 :  * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
    2807                 :  * There is very little chance of truncation working out when the failsafe is
    2808                 :  * in effect in any case.  lazy_scan_prune makes the optimistic assumption
    2809                 :  * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
    2810                 :  * we're called.
    2811                 :  *
    2812                 :  * Also don't attempt it if we are doing early pruning/vacuuming, because a
    2813                 :  * scan which cannot find a truncated heap page cannot determine that the
    2814                 :  * snapshot is too old to read that page.
    2815                 :  */
    2816                 : static bool
    2817 GIC       36739 : should_attempt_truncation(LVRelState *vacrel)
    2818                 : {
    2819                 :     BlockNumber possibly_freeable;
    2820                 : 
    2821 GNC       36739 :     if (!vacrel->do_rel_truncate || VacuumFailsafeActive ||
    2822 GIC       36619 :         old_snapshot_threshold >= 0)
    2823             123 :         return false;
    2824                 : 
    2825 CBC       36616 :     possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
    2826 GIC       36616 :     if (possibly_freeable > 0 &&
    2827             107 :         (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
    2828             107 :          possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION))
    2829 CBC          95 :         return true;
    2830 ECB             : 
    2831 CBC       36521 :     return false;
    2832                 : }
    2833 ECB             : 
    2834                 : /*
    2835                 :  * lazy_truncate_heap - try to truncate off any empty pages at the end
    2836                 :  */
    2837                 : static void
    2838 GIC          95 : lazy_truncate_heap(LVRelState *vacrel)
    2839 ECB             : {
    2840 GIC          95 :     BlockNumber orig_rel_pages = vacrel->rel_pages;
    2841                 :     BlockNumber new_rel_pages;
    2842                 :     bool        lock_waiter_detected;
    2843                 :     int         lock_retry;
    2844                 : 
    2845                 :     /* Report that we are now truncating */
    2846 CBC          95 :     pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
    2847                 :                                  PROGRESS_VACUUM_PHASE_TRUNCATE);
    2848 ECB             : 
    2849                 :     /* Update error traceback information one last time */
    2850 GIC          95 :     update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_TRUNCATE,
    2851                 :                              vacrel->nonempty_pages, InvalidOffsetNumber);
    2852                 : 
    2853                 :     /*
    2854 ECB             :      * Loop until no more truncating can be done.
    2855                 :      */
    2856                 :     do
    2857                 :     {
    2858                 :         /*
    2859                 :          * We need full exclusive lock on the relation in order to do
    2860                 :          * truncation. If we can't get it, give up rather than waiting --- we
    2861                 :          * don't want to block other backends, and we don't want to deadlock
    2862                 :          * (which is quite possible considering we already hold a lower-grade
    2863                 :          * lock).
    2864                 :          */
    2865 GIC          95 :         lock_waiter_detected = false;
    2866              95 :         lock_retry = 0;
    2867                 :         while (true)
    2868                 :         {
    2869             295 :             if (ConditionalLockRelation(vacrel->rel, AccessExclusiveLock))
    2870              93 :                 break;
    2871                 : 
    2872                 :             /*
    2873 ECB             :              * Check for interrupts while trying to (re-)acquire the exclusive
    2874                 :              * lock.
    2875                 :              */
    2876 GIC         202 :             CHECK_FOR_INTERRUPTS();
    2877 ECB             : 
    2878 CBC         202 :             if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
    2879                 :                                 VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL))
    2880                 :             {
    2881                 :                 /*
    2882                 :                  * We failed to establish the lock in the specified number of
    2883                 :                  * retries. This means we give up truncating.
    2884 ECB             :                  */
    2885 GIC           2 :                 ereport(vacrel->verbose ? INFO : DEBUG2,
    2886 ECB             :                         (errmsg("\"%s\": stopping truncate due to conflicting lock request",
    2887                 :                                 vacrel->relname)));
    2888 GIC           3 :                 return;
    2889                 :             }
    2890                 : 
    2891             200 :             (void) WaitLatch(MyLatch,
    2892                 :                              WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
    2893 ECB             :                              VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL,
    2894                 :                              WAIT_EVENT_VACUUM_TRUNCATE);
    2895 GIC         200 :             ResetLatch(MyLatch);
    2896 ECB             :         }
    2897                 : 
    2898                 :         /*
    2899                 :          * Now that we have exclusive lock, look to see if the rel has grown
    2900                 :          * whilst we were vacuuming with non-exclusive lock.  If so, give up;
    2901                 :          * the newly added pages presumably contain non-deletable tuples.
    2902                 :          */
    2903 CBC          93 :         new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel);
    2904 GIC          93 :         if (new_rel_pages != orig_rel_pages)
    2905                 :         {
    2906                 :             /*
    2907                 :              * Note: we intentionally don't update vacrel->rel_pages with the
    2908                 :              * new rel size here.  If we did, it would amount to assuming that
    2909                 :              * the new pages are empty, which is unlikely. Leaving the numbers
    2910                 :              * alone amounts to assuming that the new pages have the same
    2911 ECB             :              * tuple density as existing ones, which is less unlikely.
    2912                 :              */
    2913 UIC           0 :             UnlockRelation(vacrel->rel, AccessExclusiveLock);
    2914               0 :             return;
    2915                 :         }
    2916                 : 
    2917                 :         /*
    2918                 :          * Scan backwards from the end to verify that the end pages actually
    2919                 :          * contain no tuples.  This is *necessary*, not optional, because
    2920                 :          * other backends could have added tuples to these pages whilst we
    2921 EUB             :          * were vacuuming.
    2922                 :          */
    2923 GIC          93 :         new_rel_pages = count_nondeletable_pages(vacrel, &lock_waiter_detected);
    2924              93 :         vacrel->blkno = new_rel_pages;
    2925                 : 
    2926              93 :         if (new_rel_pages >= orig_rel_pages)
    2927                 :         {
    2928                 :             /* can't do anything after all */
    2929               1 :             UnlockRelation(vacrel->rel, AccessExclusiveLock);
    2930               1 :             return;
    2931 ECB             :         }
    2932                 : 
    2933                 :         /*
    2934                 :          * Okay to truncate.
    2935                 :          */
    2936 GIC          92 :         RelationTruncate(vacrel->rel, new_rel_pages);
    2937 ECB             : 
    2938                 :         /*
    2939                 :          * We can release the exclusive lock as soon as we have truncated.
    2940                 :          * Other backends can't safely access the relation until they have
    2941                 :          * processed the smgr invalidation that smgrtruncate sent out ... but
    2942                 :          * that should happen as part of standard invalidation processing once
    2943                 :          * they acquire lock on the relation.
    2944                 :          */
    2945 GIC          92 :         UnlockRelation(vacrel->rel, AccessExclusiveLock);
    2946                 : 
    2947                 :         /*
    2948                 :          * Update statistics.  Here, it *is* correct to adjust rel_pages
    2949                 :          * without also touching reltuples, since the tuple count wasn't
    2950                 :          * changed by the truncation.
    2951                 :          */
    2952              92 :         vacrel->removed_pages += orig_rel_pages - new_rel_pages;
    2953 CBC          92 :         vacrel->rel_pages = new_rel_pages;
    2954                 : 
    2955 GIC          92 :         ereport(vacrel->verbose ? INFO : DEBUG2,
    2956                 :                 (errmsg("table \"%s\": truncated %u to %u pages",
    2957                 :                         vacrel->relname,
    2958                 :                         orig_rel_pages, new_rel_pages)));
    2959              92 :         orig_rel_pages = new_rel_pages;
    2960 CBC          92 :     } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
    2961 ECB             : }
    2962                 : 
    2963                 : /*
    2964                 :  * Rescan end pages to verify that they are (still) empty of tuples.
    2965                 :  *
    2966                 :  * Returns number of nondeletable pages (last nonempty page + 1).
    2967                 :  */
    2968                 : static BlockNumber
    2969 GIC          93 : count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
    2970                 : {
    2971                 :     BlockNumber blkno;
    2972                 :     BlockNumber prefetchedUntil;
    2973                 :     instr_time  starttime;
    2974                 : 
    2975                 :     /* Initialize the starttime if we check for conflicting lock requests */
    2976              93 :     INSTR_TIME_SET_CURRENT(starttime);
    2977 ECB             : 
    2978                 :     /*
    2979                 :      * Start checking blocks at what we believe relation end to be and move
    2980                 :      * backwards.  (Strange coding of loop control is needed because blkno is
    2981                 :      * unsigned.)  To make the scan faster, we prefetch a few blocks at a time
    2982                 :      * in forward direction, so that OS-level readahead can kick in.
    2983                 :      */
    2984 CBC          93 :     blkno = vacrel->rel_pages;
    2985                 :     StaticAssertStmt((PREFETCH_SIZE & (PREFETCH_SIZE - 1)) == 0,
    2986                 :                      "prefetch size must be power of 2");
    2987 GIC          93 :     prefetchedUntil = InvalidBlockNumber;
    2988            1409 :     while (blkno > vacrel->nonempty_pages)
    2989                 :     {
    2990                 :         Buffer      buf;
    2991                 :         Page        page;
    2992 ECB             :         OffsetNumber offnum,
    2993                 :                     maxoff;
    2994                 :         bool        hastup;
    2995                 : 
    2996                 :         /*
    2997                 :          * Check if another process requests a lock on our relation. We are
    2998                 :          * holding an AccessExclusiveLock here, so they will be waiting. We
    2999                 :          * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
    3000                 :          * only check if that interval has elapsed once every 32 blocks to
    3001                 :          * keep the number of system calls and actual shared lock table
    3002                 :          * lookups to a minimum.
    3003                 :          */
    3004 GIC        1320 :         if ((blkno % 32) == 0)
    3005                 :         {
    3006                 :             instr_time  currenttime;
    3007                 :             instr_time  elapsed;
    3008                 : 
    3009              32 :             INSTR_TIME_SET_CURRENT(currenttime);
    3010              32 :             elapsed = currenttime;
    3011              32 :             INSTR_TIME_SUBTRACT(elapsed, starttime);
    3012 CBC          32 :             if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
    3013                 :                 >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL)
    3014                 :             {
    3015 UIC           0 :                 if (LockHasWaitersRelation(vacrel->rel, AccessExclusiveLock))
    3016                 :                 {
    3017 LBC           0 :                     ereport(vacrel->verbose ? INFO : DEBUG2,
    3018 ECB             :                             (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
    3019                 :                                     vacrel->relname)));
    3020                 : 
    3021 UIC           0 :                     *lock_waiter_detected = true;
    3022               0 :                     return blkno;
    3023 EUB             :                 }
    3024 UIC           0 :                 starttime = currenttime;
    3025 EUB             :             }
    3026                 :         }
    3027                 : 
    3028                 :         /*
    3029                 :          * We don't insert a vacuum delay point here, because we have an
    3030                 :          * exclusive lock on the table which we want to hold for as short a
    3031                 :          * time as possible.  We still need to check for interrupts however.
    3032                 :          */
    3033 GIC        1320 :         CHECK_FOR_INTERRUPTS();
    3034                 : 
    3035            1320 :         blkno--;
    3036                 : 
    3037                 :         /* If we haven't prefetched this lot yet, do so now. */
    3038            1320 :         if (prefetchedUntil > blkno)
    3039                 :         {
    3040                 :             BlockNumber prefetchStart;
    3041 ECB             :             BlockNumber pblkno;
    3042                 : 
    3043 CBC         125 :             prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
    3044 GIC        1610 :             for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
    3045                 :             {
    3046 CBC        1485 :                 PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno);
    3047 GIC        1485 :                 CHECK_FOR_INTERRUPTS();
    3048                 :             }
    3049             125 :             prefetchedUntil = prefetchStart;
    3050                 :         }
    3051 ECB             : 
    3052 CBC        1320 :         buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
    3053                 :                                  vacrel->bstrategy);
    3054 ECB             : 
    3055                 :         /* In this phase we only need shared access to the buffer */
    3056 GIC        1320 :         LockBuffer(buf, BUFFER_LOCK_SHARE);
    3057 ECB             : 
    3058 GIC        1320 :         page = BufferGetPage(buf);
    3059                 : 
    3060 CBC        1320 :         if (PageIsNew(page) || PageIsEmpty(page))
    3061                 :         {
    3062 UIC           0 :             UnlockReleaseBuffer(buf);
    3063               0 :             continue;
    3064 ECB             :         }
    3065                 : 
    3066 CBC        1320 :         hastup = false;
    3067 GIC        1320 :         maxoff = PageGetMaxOffsetNumber(page);
    3068 CBC        1320 :         for (offnum = FirstOffsetNumber;
    3069 GIC        2636 :              offnum <= maxoff;
    3070 GBC        1316 :              offnum = OffsetNumberNext(offnum))
    3071 EUB             :         {
    3072                 :             ItemId      itemid;
    3073                 : 
    3074 CBC        1320 :             itemid = PageGetItemId(page, offnum);
    3075 ECB             : 
    3076                 :             /*
    3077                 :              * Note: any non-unused item should be taken as a reason to keep
    3078                 :              * this page.  Even an LP_DEAD item makes truncation unsafe, since
    3079                 :              * we must not have cleaned out its index entries.
    3080                 :              */
    3081 GIC        1320 :             if (ItemIdIsUsed(itemid))
    3082 ECB             :             {
    3083 GIC           4 :                 hastup = true;
    3084               4 :                 break;          /* can stop scanning */
    3085                 :             }
    3086                 :         }                       /* scan along page */
    3087                 : 
    3088            1320 :         UnlockReleaseBuffer(buf);
    3089 ECB             : 
    3090                 :         /* Done scanning if we found a tuple here */
    3091 CBC        1320 :         if (hastup)
    3092               4 :             return blkno + 1;
    3093                 :     }
    3094                 : 
    3095                 :     /*
    3096 ECB             :      * If we fall out of the loop, all the previously-thought-to-be-empty
    3097                 :      * pages still are; we need not bother to look at the last known-nonempty
    3098                 :      * page.
    3099                 :      */
    3100 CBC          89 :     return vacrel->nonempty_pages;
    3101                 : }
    3102                 : 
    3103                 : /*
    3104                 :  * Returns the number of dead TIDs that VACUUM should allocate space to
    3105                 :  * store, given a heap rel of size vacrel->rel_pages, and given current
    3106                 :  * maintenance_work_mem setting (or current autovacuum_work_mem setting,
    3107                 :  * when applicable).
    3108 ECB             :  *
    3109                 :  * See the comments at the head of this file for rationale.
    3110                 :  */
    3111                 : static int
    3112 GIC       36739 : dead_items_max_items(LVRelState *vacrel)
    3113                 : {
    3114                 :     int64       max_items;
    3115           36739 :     int         vac_work_mem = IsAutoVacuumWorkerProcess() &&
    3116              75 :     autovacuum_work_mem != -1 ?
    3117           36814 :     autovacuum_work_mem : maintenance_work_mem;
    3118                 : 
    3119           36739 :     if (vacrel->nindexes > 0)
    3120 ECB             :     {
    3121 GIC       34933 :         BlockNumber rel_pages = vacrel->rel_pages;
    3122                 : 
    3123 CBC       34933 :         max_items = MAXDEADITEMS(vac_work_mem * 1024L);
    3124           34933 :         max_items = Min(max_items, INT_MAX);
    3125           34933 :         max_items = Min(max_items, MAXDEADITEMS(MaxAllocSize));
    3126                 : 
    3127 ECB             :         /* curious coding here to ensure the multiplication can't overflow */
    3128 GIC       34933 :         if ((BlockNumber) (max_items / MaxHeapTuplesPerPage) > rel_pages)
    3129 CBC       34933 :             max_items = rel_pages * MaxHeapTuplesPerPage;
    3130                 : 
    3131 ECB             :         /* stay sane if small maintenance_work_mem */
    3132 CBC       34933 :         max_items = Max(max_items, MaxHeapTuplesPerPage);
    3133 ECB             :     }
    3134                 :     else
    3135                 :     {
    3136                 :         /* One-pass case only stores a single heap page's TIDs at a time */
    3137 CBC        1806 :         max_items = MaxHeapTuplesPerPage;
    3138                 :     }
    3139                 : 
    3140           36739 :     return (int) max_items;
    3141                 : }
    3142                 : 
    3143                 : /*
    3144                 :  * Allocate dead_items (either using palloc, or in dynamic shared memory).
    3145 ECB             :  * Sets dead_items in vacrel for caller.
    3146                 :  *
    3147                 :  * Also handles parallel initialization as part of allocating dead_items in
    3148                 :  * DSM when required.
    3149                 :  */
    3150                 : static void
    3151 GIC       36739 : dead_items_alloc(LVRelState *vacrel, int nworkers)
    3152                 : {
    3153                 :     VacDeadItems *dead_items;
    3154                 :     int         max_items;
    3155                 : 
    3156           36739 :     max_items = dead_items_max_items(vacrel);
    3157           36739 :     Assert(max_items >= MaxHeapTuplesPerPage);
    3158                 : 
    3159 ECB             :     /*
    3160                 :      * Initialize state for a parallel vacuum.  As of now, only one worker can
    3161                 :      * be used for an index, so we invoke parallelism only if there are at
    3162                 :      * least two indexes on a table.
    3163                 :      */
    3164 CBC       36739 :     if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
    3165 ECB             :     {
    3166                 :         /*
    3167                 :          * Since parallel workers cannot access data in temporary tables, we
    3168                 :          * can't perform parallel vacuum on them.
    3169                 :          */
    3170 GIC       15787 :         if (RelationUsesLocalBuffers(vacrel->rel))
    3171                 :         {
    3172 ECB             :             /*
    3173                 :              * Give warning only if the user explicitly tries to perform a
    3174                 :              * parallel vacuum on the temporary table.
    3175                 :              */
    3176 GIC           3 :             if (nworkers > 0)
    3177               3 :                 ereport(WARNING,
    3178 ECB             :                         (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
    3179                 :                                 vacrel->relname)));
    3180                 :         }
    3181                 :         else
    3182 GIC       15784 :             vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
    3183                 :                                                vacrel->nindexes, nworkers,
    3184 ECB             :                                                max_items,
    3185 CBC       15784 :                                                vacrel->verbose ? INFO : DEBUG2,
    3186                 :                                                vacrel->bstrategy);
    3187                 : 
    3188                 :         /* If parallel mode started, dead_items space is allocated in DSM */
    3189 GIC       15787 :         if (ParallelVacuumIsActive(vacrel))
    3190 ECB             :         {
    3191 GIC           9 :             vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs);
    3192               9 :             return;
    3193 ECB             :         }
    3194                 :     }
    3195                 : 
    3196                 :     /* Serial VACUUM case */
    3197 CBC       36730 :     dead_items = (VacDeadItems *) palloc(vac_max_items_to_alloc_size(max_items));
    3198 GIC       36730 :     dead_items->max_items = max_items;
    3199 CBC       36730 :     dead_items->num_items = 0;
    3200 ECB             : 
    3201 GIC       36730 :     vacrel->dead_items = dead_items;
    3202                 : }
    3203                 : 
    3204                 : /*
    3205 ECB             :  * Perform cleanup for resources allocated in dead_items_alloc
    3206                 :  */
    3207                 : static void
    3208 GIC       36739 : dead_items_cleanup(LVRelState *vacrel)
    3209 ECB             : {
    3210 GIC       36739 :     if (!ParallelVacuumIsActive(vacrel))
    3211                 :     {
    3212                 :         /* Don't bother with pfree here */
    3213           36730 :         return;
    3214                 :     }
    3215                 : 
    3216 ECB             :     /* End parallel mode */
    3217 GIC           9 :     parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
    3218 CBC           9 :     vacrel->pvs = NULL;
    3219                 : }
    3220                 : 
    3221 ECB             : /*
    3222                 :  * Check if every tuple in the given page is visible to all current and future
    3223                 :  * transactions. Also return the visibility_cutoff_xid which is the highest
    3224                 :  * xmin amongst the visible tuples.  Set *all_frozen to true if every tuple
    3225                 :  * on this page is frozen.
    3226                 :  *
    3227                 :  * This is a stripped down version of lazy_scan_prune().  If you change
    3228                 :  * anything here, make sure that everything stays in sync.  Note that an
    3229                 :  * assertion calls us to verify that everybody still agrees.  Be sure to avoid
    3230                 :  * introducing new side-effects here.
    3231                 :  */
    3232                 : static bool
    3233 GIC      159384 : heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
    3234                 :                          TransactionId *visibility_cutoff_xid,
    3235                 :                          bool *all_frozen)
    3236                 : {
    3237          159384 :     Page        page = BufferGetPage(buf);
    3238          159384 :     BlockNumber blockno = BufferGetBlockNumber(buf);
    3239                 :     OffsetNumber offnum,
    3240                 :                 maxoff;
    3241 CBC      159384 :     bool        all_visible = true;
    3242                 : 
    3243 GIC      159384 :     *visibility_cutoff_xid = InvalidTransactionId;
    3244          159384 :     *all_frozen = true;
    3245 ECB             : 
    3246 CBC      159384 :     maxoff = PageGetMaxOffsetNumber(page);
    3247 GIC      159384 :     for (offnum = FirstOffsetNumber;
    3248         9021375 :          offnum <= maxoff && all_visible;
    3249 CBC     8861991 :          offnum = OffsetNumberNext(offnum))
    3250                 :     {
    3251 ECB             :         ItemId      itemid;
    3252                 :         HeapTupleData tuple;
    3253                 : 
    3254                 :         /*
    3255                 :          * Set the offset number so that we can display it along with any
    3256                 :          * error that occurred while processing this tuple.
    3257                 :          */
    3258 GIC     8861992 :         vacrel->offnum = offnum;
    3259         8861992 :         itemid = PageGetItemId(page, offnum);
    3260                 : 
    3261                 :         /* Unused or redirect line pointers are of no interest */
    3262         8861992 :         if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
    3263          431347 :             continue;
    3264                 : 
    3265         8430645 :         ItemPointerSet(&(tuple.t_self), blockno, offnum);
    3266 ECB             : 
    3267                 :         /*
    3268                 :          * Dead line pointers can have index pointers pointing to them. So
    3269                 :          * they can't be treated as visible
    3270                 :          */
    3271 CBC     8430645 :         if (ItemIdIsDead(itemid))
    3272                 :         {
    3273               1 :             all_visible = false;
    3274 GIC           1 :             *all_frozen = false;
    3275               1 :             break;
    3276                 :         }
    3277                 : 
    3278         8430644 :         Assert(ItemIdIsNormal(itemid));
    3279 ECB             : 
    3280 GIC     8430644 :         tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
    3281 CBC     8430644 :         tuple.t_len = ItemIdGetLength(itemid);
    3282         8430644 :         tuple.t_tableOid = RelationGetRelid(vacrel->rel);
    3283 ECB             : 
    3284 GNC     8430644 :         switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
    3285                 :                                          buf))
    3286                 :         {
    3287 CBC     8430508 :             case HEAPTUPLE_LIVE:
    3288                 :                 {
    3289 ECB             :                     TransactionId xmin;
    3290                 : 
    3291                 :                     /* Check comments in lazy_scan_prune. */
    3292 GIC     8430508 :                     if (!HeapTupleHeaderXminCommitted(tuple.t_data))
    3293 ECB             :                     {
    3294 UIC           0 :                         all_visible = false;
    3295               0 :                         *all_frozen = false;
    3296 LBC           0 :                         break;
    3297                 :                     }
    3298                 : 
    3299                 :                     /*
    3300                 :                      * The inserter definitely committed. But is it old enough
    3301 ECB             :                      * that everyone sees it as committed?
    3302                 :                      */
    3303 GBC     8430508 :                     xmin = HeapTupleHeaderGetXmin(tuple.t_data);
    3304 GNC     8430508 :                     if (!TransactionIdPrecedes(xmin,
    3305                 :                                                vacrel->cutoffs.OldestXmin))
    3306 EUB             :                     {
    3307 GIC          57 :                         all_visible = false;
    3308              57 :                         *all_frozen = false;
    3309              57 :                         break;
    3310                 :                     }
    3311                 : 
    3312                 :                     /* Track newest xmin on page. */
    3313 GNC     8430451 :                     if (TransactionIdFollows(xmin, *visibility_cutoff_xid) &&
    3314                 :                         TransactionIdIsNormal(xmin))
    3315 CBC       24367 :                         *visibility_cutoff_xid = xmin;
    3316                 : 
    3317                 :                     /* Check whether this tuple is already frozen or not */
    3318        15458194 :                     if (all_visible && *all_frozen &&
    3319         7027743 :                         heap_tuple_needs_eventual_freeze(tuple.t_data))
    3320           13382 :                         *all_frozen = false;
    3321                 :                 }
    3322 GIC     8430451 :                 break;
    3323                 : 
    3324 CBC         136 :             case HEAPTUPLE_DEAD:
    3325                 :             case HEAPTUPLE_RECENTLY_DEAD:
    3326 ECB             :             case HEAPTUPLE_INSERT_IN_PROGRESS:
    3327                 :             case HEAPTUPLE_DELETE_IN_PROGRESS:
    3328                 :                 {
    3329 CBC         136 :                     all_visible = false;
    3330             136 :                     *all_frozen = false;
    3331             136 :                     break;
    3332                 :                 }
    3333 LBC           0 :             default:
    3334 UIC           0 :                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
    3335 ECB             :                 break;
    3336                 :         }
    3337                 :     }                           /* scan along page */
    3338                 : 
    3339                 :     /* Clear the offset information once we have processed the given page. */
    3340 CBC      159384 :     vacrel->offnum = InvalidOffsetNumber;
    3341 ECB             : 
    3342 CBC      159384 :     return all_visible;
    3343                 : }
    3344 EUB             : 
    3345                 : /*
    3346                 :  * Update index statistics in pg_class if the statistics are accurate.
    3347                 :  */
    3348                 : static void
    3349 GIC       36607 : update_relstats_all_indexes(LVRelState *vacrel)
    3350                 : {
    3351 CBC       36607 :     Relation   *indrels = vacrel->indrels;
    3352 GIC       36607 :     int         nindexes = vacrel->nindexes;
    3353 CBC       36607 :     IndexBulkDeleteResult **indstats = vacrel->indstats;
    3354                 : 
    3355 GIC       36607 :     Assert(vacrel->do_index_cleanup);
    3356                 : 
    3357           91245 :     for (int idx = 0; idx < nindexes; idx++)
    3358                 :     {
    3359           54638 :         Relation    indrel = indrels[idx];
    3360 CBC       54638 :         IndexBulkDeleteResult *istat = indstats[idx];
    3361                 : 
    3362           54638 :         if (istat == NULL || istat->estimated_count)
    3363           50724 :             continue;
    3364 ECB             : 
    3365                 :         /* Update index statistics */
    3366 CBC        3914 :         vac_update_relstats(indrel,
    3367                 :                             istat->num_pages,
    3368 ECB             :                             istat->num_index_tuples,
    3369                 :                             0,
    3370                 :                             false,
    3371                 :                             InvalidTransactionId,
    3372                 :                             InvalidMultiXactId,
    3373                 :                             NULL, NULL, false);
    3374                 :     }
    3375 GIC       36607 : }
    3376                 : 
    3377 ECB             : /*
    3378                 :  * Error context callback for errors occurring during vacuum.  The error
    3379                 :  * context messages for index phases should match the messages set in parallel
    3380                 :  * vacuum.  If you change this function for those phases, change
    3381                 :  * parallel_vacuum_error_callback() as well.
    3382                 :  */
    3383                 : static void
    3384 GIC          22 : vacuum_error_callback(void *arg)
    3385                 : {
    3386 CBC          22 :     LVRelState *errinfo = arg;
    3387                 : 
    3388 GIC          22 :     switch (errinfo->phase)
    3389                 :     {
    3390 UIC           0 :         case VACUUM_ERRCB_PHASE_SCAN_HEAP:
    3391               0 :             if (BlockNumberIsValid(errinfo->blkno))
    3392                 :             {
    3393               0 :                 if (OffsetNumberIsValid(errinfo->offnum))
    3394               0 :                     errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
    3395 LBC           0 :                                errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
    3396                 :                 else
    3397               0 :                     errcontext("while scanning block %u of relation \"%s.%s\"",
    3398                 :                                errinfo->blkno, errinfo->relnamespace, errinfo->relname);
    3399 ECB             :             }
    3400                 :             else
    3401 UBC           0 :                 errcontext("while scanning relation \"%s.%s\"",
    3402 EUB             :                            errinfo->relnamespace, errinfo->relname);
    3403 UIC           0 :             break;
    3404 EUB             : 
    3405 GBC           2 :         case VACUUM_ERRCB_PHASE_VACUUM_HEAP:
    3406               2 :             if (BlockNumberIsValid(errinfo->blkno))
    3407                 :             {
    3408 UBC           0 :                 if (OffsetNumberIsValid(errinfo->offnum))
    3409 UIC           0 :                     errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
    3410               0 :                                errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
    3411                 :                 else
    3412 UBC           0 :                     errcontext("while vacuuming block %u of relation \"%s.%s\"",
    3413                 :                                errinfo->blkno, errinfo->relnamespace, errinfo->relname);
    3414 EUB             :             }
    3415                 :             else
    3416 CBC           2 :                 errcontext("while vacuuming relation \"%s.%s\"",
    3417 ECB             :                            errinfo->relnamespace, errinfo->relname);
    3418 GIC           2 :             break;
    3419 EUB             : 
    3420 GBC           5 :         case VACUUM_ERRCB_PHASE_VACUUM_INDEX:
    3421               5 :             errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
    3422                 :                        errinfo->indname, errinfo->relnamespace, errinfo->relname);
    3423               5 :             break;
    3424                 : 
    3425 GIC           5 :         case VACUUM_ERRCB_PHASE_INDEX_CLEANUP:
    3426               5 :             errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
    3427 ECB             :                        errinfo->indname, errinfo->relnamespace, errinfo->relname);
    3428 GIC           5 :             break;
    3429 ECB             : 
    3430 GIC           3 :         case VACUUM_ERRCB_PHASE_TRUNCATE:
    3431 CBC           3 :             if (BlockNumberIsValid(errinfo->blkno))
    3432               3 :                 errcontext("while truncating relation \"%s.%s\" to %u blocks",
    3433                 :                            errinfo->relnamespace, errinfo->relname, errinfo->blkno);
    3434               3 :             break;
    3435                 : 
    3436               7 :         case VACUUM_ERRCB_PHASE_UNKNOWN:
    3437 ECB             :         default:
    3438 GIC           7 :             return;             /* do nothing; the errinfo may not be
    3439 ECB             :                                  * initialized */
    3440                 :     }
    3441                 : }
    3442                 : 
    3443                 : /*
    3444                 :  * Updates the information required for vacuum error callback.  This also saves
    3445                 :  * the current information which can be later restored via restore_vacuum_error_info.
    3446                 :  */
    3447                 : static void
    3448 GIC      250871 : update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel,
    3449 ECB             :                          int phase, BlockNumber blkno, OffsetNumber offnum)
    3450                 : {
    3451 GIC      250871 :     if (saved_vacrel)
    3452                 :     {
    3453           85683 :         saved_vacrel->offnum = vacrel->offnum;
    3454           85683 :         saved_vacrel->blkno = vacrel->blkno;
    3455           85683 :         saved_vacrel->phase = vacrel->phase;
    3456                 :     }
    3457                 : 
    3458          250871 :     vacrel->blkno = blkno;
    3459 CBC      250871 :     vacrel->offnum = offnum;
    3460 GIC      250871 :     vacrel->phase = phase;
    3461          250871 : }
    3462 ECB             : 
    3463                 : /*
    3464                 :  * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
    3465                 :  */
    3466                 : static void
    3467 GIC       85683 : restore_vacuum_error_info(LVRelState *vacrel,
    3468                 :                           const LVSavedErrInfo *saved_vacrel)
    3469 ECB             : {
    3470 CBC       85683 :     vacrel->blkno = saved_vacrel->blkno;
    3471           85683 :     vacrel->offnum = saved_vacrel->offnum;
    3472           85683 :     vacrel->phase = saved_vacrel->phase;
    3473 GIC       85683 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a