LCOV - differential code coverage report
Current view: top level - src/backend/access/heap - pruneheap.c (source / functions) Coverage Total Hit LBC UIC UBC GBC GIC GNC CBC EUB ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 92.4 % 331 306 10 9 6 1 130 5 170 18 120 6
Current Date: 2023-04-08 17:13:01 Functions: 100.0 % 11 11 9 1 1 9
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 [..60] days: 100.0 % 1 1 1
Legend: Lines: hit not hit (120,180] days: 100.0 % 4 4 4
(240..) days: 92.3 % 326 301 10 9 6 1 130 170 18 119
Function coverage date bins:
(240..) days: 55.0 % 20 11 9 1 1 9

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*-------------------------------------------------------------------------
                                  2                 :  *
                                  3                 :  * pruneheap.c
                                  4                 :  *    heap page pruning and HOT-chain management code
                                  5                 :  *
                                  6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
                                  7                 :  * Portions Copyright (c) 1994, Regents of the University of California
                                  8                 :  *
                                  9                 :  *
                                 10                 :  * IDENTIFICATION
                                 11                 :  *    src/backend/access/heap/pruneheap.c
                                 12                 :  *
                                 13                 :  *-------------------------------------------------------------------------
                                 14                 :  */
                                 15                 : #include "postgres.h"
                                 16                 : 
                                 17                 : #include "access/heapam.h"
                                 18                 : #include "access/heapam_xlog.h"
                                 19                 : #include "access/htup_details.h"
                                 20                 : #include "access/transam.h"
                                 21                 : #include "access/xlog.h"
                                 22                 : #include "access/xloginsert.h"
                                 23                 : #include "catalog/catalog.h"
                                 24                 : #include "miscadmin.h"
                                 25                 : #include "pgstat.h"
                                 26                 : #include "storage/bufmgr.h"
                                 27                 : #include "utils/snapmgr.h"
                                 28                 : #include "utils/rel.h"
                                 29                 : #include "utils/snapmgr.h"
                                 30                 : 
                                 31                 : /* Working data for heap_page_prune and subroutines */
                                 32                 : typedef struct
                                 33                 : {
                                 34                 :     Relation    rel;
                                 35                 : 
                                 36                 :     /* tuple visibility test, initialized for the relation */
                                 37                 :     GlobalVisState *vistest;
                                 38                 : 
                                 39                 :     /*
                                 40                 :      * Thresholds set by TransactionIdLimitedForOldSnapshots() if they have
                                 41                 :      * been computed (done on demand, and only if
                                 42                 :      * OldSnapshotThresholdActive()). The first time a tuple is about to be
                                 43                 :      * removed based on the limited horizon, old_snap_used is set to true, and
                                 44                 :      * SetOldSnapshotThresholdTimestamp() is called. See
                                 45                 :      * heap_prune_satisfies_vacuum().
                                 46                 :      */
                                 47                 :     TimestampTz old_snap_ts;
                                 48                 :     TransactionId old_snap_xmin;
                                 49                 :     bool        old_snap_used;
                                 50                 : 
                                 51                 :     TransactionId new_prune_xid;    /* new prune hint value for page */
                                 52                 :     TransactionId snapshotConflictHorizon;  /* latest xid removed */
                                 53                 :     int         nredirected;    /* numbers of entries in arrays below */
                                 54                 :     int         ndead;
                                 55                 :     int         nunused;
                                 56                 :     /* arrays that accumulate indexes of items to be changed */
                                 57                 :     OffsetNumber redirected[MaxHeapTuplesPerPage * 2];
                                 58                 :     OffsetNumber nowdead[MaxHeapTuplesPerPage];
                                 59                 :     OffsetNumber nowunused[MaxHeapTuplesPerPage];
                                 60                 : 
                                 61                 :     /*
                                 62                 :      * marked[i] is true if item i is entered in one of the above arrays.
                                 63                 :      *
                                 64                 :      * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
                                 65                 :      * 1. Otherwise every access would need to subtract 1.
                                 66                 :      */
                                 67                 :     bool        marked[MaxHeapTuplesPerPage + 1];
                                 68                 : 
                                 69                 :     /*
                                 70                 :      * Tuple visibility is only computed once for each tuple, for correctness
                                 71                 :      * and efficiency reasons; see comment in heap_page_prune() for details.
                                 72                 :      * This is of type int8[], instead of HTSV_Result[], so we can use -1 to
                                 73                 :      * indicate no visibility has been computed, e.g. for LP_DEAD items.
                                 74                 :      *
                                 75                 :      * Same indexing as ->marked.
                                 76                 :      */
                                 77                 :     int8        htsv[MaxHeapTuplesPerPage + 1];
                                 78                 : } PruneState;
                                 79                 : 
                                 80                 : /* Local functions */
                                 81                 : static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate,
                                 82                 :                                                HeapTuple tup,
                                 83                 :                                                Buffer buffer);
                                 84                 : static int  heap_prune_chain(Buffer buffer,
                                 85                 :                              OffsetNumber rootoffnum,
                                 86                 :                              PruneState *prstate);
                                 87                 : static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
                                 88                 : static void heap_prune_record_redirect(PruneState *prstate,
                                 89                 :                                        OffsetNumber offnum, OffsetNumber rdoffnum);
                                 90                 : static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
                                 91                 : static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
                                 92                 : static void page_verify_redirects(Page page);
                                 93                 : 
                                 94                 : 
                                 95                 : /*
                                 96                 :  * Optionally prune and repair fragmentation in the specified page.
                                 97                 :  *
                                 98                 :  * This is an opportunistic function.  It will perform housekeeping
                                 99                 :  * only if the page heuristically looks like a candidate for pruning and we
                                100                 :  * can acquire buffer cleanup lock without blocking.
                                101                 :  *
                                102                 :  * Note: this is called quite often.  It's important that it fall out quickly
                                103                 :  * if there's not any use in pruning.
                                104                 :  *
                                105                 :  * Caller must have pin on the buffer, and must *not* have a lock on it.
                                106                 :  */
                                107                 : void
 3324 rhaas                     108 CBC    17752776 : heap_page_prune_opt(Relation relation, Buffer buffer)
                                109                 : {
 2545 kgrittn                   110        17752776 :     Page        page = BufferGetPage(buffer);
                                111                 :     TransactionId prune_xid;
                                112                 :     GlobalVisState *vistest;
  970 andres                    113        17752776 :     TransactionId limited_xmin = InvalidTransactionId;
                                114        17752776 :     TimestampTz limited_ts = 0;
                                115                 :     Size        minfree;
                                116                 : 
                                117                 :     /*
                                118                 :      * We can't write WAL in recovery mode, so there's no point trying to
                                119                 :      * clean the page. The primary will likely issue a cleaning WAL record
                                120                 :      * soon anyway, so this is no particular loss.
                                121                 :      */
 3324 rhaas                     122        17752776 :     if (RecoveryInProgress())
                                123        16621937 :         return;
                                124                 : 
                                125                 :     /*
                                126                 :      * XXX: Magic to keep old_snapshot_threshold tests appear "working". They
                                127                 :      * currently are broken, and discussion of what to do about them is
                                128                 :      * ongoing. See
                                129                 :      * https://www.postgresql.org/message-id/20200403001235.e6jfdll3gh2ygbuc%40alap3.anarazel.de
                                130                 :      */
  970 andres                    131        17575599 :     if (old_snapshot_threshold == 0)
                                132            2627 :         SnapshotTooOldMagicForTest();
                                133                 : 
                                134                 :     /*
                                135                 :      * First check whether there's any chance there's something to prune,
                                136                 :      * determining the appropriate horizon is a waste if there's no prune_xid
                                137                 :      * (i.e. no updates/deletes left potentially dead tuples around).
                                138                 :      */
                                139        17575599 :     prune_xid = ((PageHeader) page)->pd_prune_xid;
                                140        17575599 :     if (!TransactionIdIsValid(prune_xid))
                                141        10033748 :         return;
                                142                 : 
                                143                 :     /*
                                144                 :      * Check whether prune_xid indicates that there may be dead rows that can
                                145                 :      * be cleaned up.
                                146                 :      *
                                147                 :      * It is OK to check the old snapshot limit before acquiring the cleanup
                                148                 :      * lock because the worst that can happen is that we are not quite as
                                149                 :      * aggressive about the cleanup (by however many transaction IDs are
                                150                 :      * consumed between this point and acquiring the lock).  This allows us to
                                151                 :      * save significant overhead in the case where the page is found not to be
                                152                 :      * prunable.
                                153                 :      *
                                154                 :      * Even if old_snapshot_threshold is set, we first check whether the page
                                155                 :      * can be pruned without. Both because
                                156                 :      * TransactionIdLimitedForOldSnapshots() is not cheap, and because not
                                157                 :      * unnecessarily relying on old_snapshot_threshold avoids causing
                                158                 :      * conflicts.
                                159                 :      */
                                160         7541851 :     vistest = GlobalVisTestFor(relation);
                                161                 : 
                                162         7541851 :     if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
                                163                 :     {
                                164         6410719 :         if (!OldSnapshotThresholdActive())
                                165         6410698 :             return;
                                166                 : 
                                167              21 :         if (!TransactionIdLimitedForOldSnapshots(GlobalVisTestNonRemovableHorizon(vistest),
                                168                 :                                                  relation,
                                169                 :                                                  &limited_xmin, &limited_ts))
                                170              19 :             return;
                                171                 : 
                                172               2 :         if (!TransactionIdPrecedes(prune_xid, limited_xmin))
                                173               2 :             return;
                                174                 :     }
                                175                 : 
                                176                 :     /*
                                177                 :      * We prune when a previous UPDATE failed to find enough space on the page
                                178                 :      * for a new tuple version, or when free space falls below the relation's
                                179                 :      * fill-factor target (but not less than 10%).
                                180                 :      *
                                181                 :      * Checking free space here is questionable since we aren't holding any
                                182                 :      * lock on the buffer; in the worst case we could get a bogus answer. It's
                                183                 :      * unlikely to be *seriously* wrong, though, since reading either pd_lower
                                184                 :      * or pd_upper is probably atomic.  Avoiding taking a lock seems more
                                185                 :      * important than sometimes getting a wrong answer in what is after all
                                186                 :      * just a heuristic estimate.
                                187                 :      */
 5680 tgl                       188         1131132 :     minfree = RelationGetTargetPageFreeSpace(relation,
                                189                 :                                              HEAP_DEFAULT_FILLFACTOR);
                                190         1131132 :     minfree = Max(minfree, BLCKSZ / 10);
                                191                 : 
 5383                           192         1131132 :     if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
                                193                 :     {
                                194                 :         /* OK, try to get exclusive buffer lock */
 5680                           195          104122 :         if (!ConditionalLockBufferForCleanup(buffer))
                                196             293 :             return;
                                197                 : 
                                198                 :         /*
                                199                 :          * Now that we have buffer lock, get accurate information about the
                                200                 :          * page's free space, and recheck the heuristic about whether to
                                201                 :          * prune. (We needn't recheck PageIsPrunable, since no one else could
                                202                 :          * have pruned while we hold pin.)
                                203                 :          */
 5383                           204          103829 :         if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
                                205                 :         {
                                206                 :             int         ndeleted,
                                207                 :                         nnewlpdead;
                                208                 : 
  513 pg                        209          103829 :             ndeleted = heap_page_prune(relation, buffer, vistest, limited_xmin,
                                210                 :                                        limited_ts, &nnewlpdead, NULL);
                                211                 : 
                                212                 :             /*
                                213                 :              * Report the number of tuples reclaimed to pgstats.  This is
                                214                 :              * ndeleted minus the number of newly-LP_DEAD-set items.
                                215                 :              *
                                216                 :              * We derive the number of dead tuples like this to avoid totally
                                217                 :              * forgetting about items that were set to LP_DEAD, since they
                                218                 :              * still need to be cleaned up by VACUUM.  We only want to count
                                219                 :              * heap-only tuples that just became LP_UNUSED in our report,
                                220                 :              * which don't.
                                221                 :              *
                                222                 :              * VACUUM doesn't have to compensate in the same way when it
                                223                 :              * tracks ndeleted, since it will set the same LP_DEAD items to
                                224                 :              * LP_UNUSED separately.
                                225                 :              */
                                226          103829 :             if (ndeleted > nnewlpdead)
                                227           54279 :                 pgstat_update_heap_dead_tuples(relation,
                                228                 :                                                ndeleted - nnewlpdead);
                                229                 :         }
                                230                 : 
                                231                 :         /* And release buffer lock */
 5680 tgl                       232          103829 :         LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
                                233                 : 
                                234                 :         /*
                                235                 :          * We avoid reuse of any free space created on the page by unrelated
                                236                 :          * UPDATEs/INSERTs by opting to not update the FSM at this point.  The
                                237                 :          * free space should be reused by UPDATEs to *this* page.
                                238                 :          */
                                239                 :     }
                                240                 : }
                                241                 : 
                                242                 : 
                                243                 : /*
                                244                 :  * Prune and repair fragmentation in the specified page.
                                245                 :  *
                                246                 :  * Caller must have pin and buffer cleanup lock on the page.  Note that we
                                247                 :  * don't update the FSM information for page on caller's behalf.  Caller might
                                248                 :  * also need to account for a reduction in the length of the line pointer
                                249                 :  * array following array truncation by us.
                                250                 :  *
                                251                 :  * vistest is used to distinguish whether tuples are DEAD or RECENTLY_DEAD
                                252                 :  * (see heap_prune_satisfies_vacuum and
                                253                 :  * HeapTupleSatisfiesVacuum). old_snap_xmin / old_snap_ts need to
                                254                 :  * either have been set by TransactionIdLimitedForOldSnapshots, or
                                255                 :  * InvalidTransactionId/0 respectively.
                                256                 :  *
                                257                 :  * Sets *nnewlpdead for caller, indicating the number of items that were
                                258                 :  * newly set LP_DEAD during prune operation.
                                259                 :  *
                                260                 :  * off_loc is the offset location required by the caller to use in error
                                261                 :  * callback.
                                262                 :  *
                                263                 :  * Returns the number of tuples deleted from the page during this call.
                                264                 :  */
                                265                 : int
  970 andres                    266          268918 : heap_page_prune(Relation relation, Buffer buffer,
                                267                 :                 GlobalVisState *vistest,
                                268                 :                 TransactionId old_snap_xmin,
                                269                 :                 TimestampTz old_snap_ts,
                                270                 :                 int *nnewlpdead,
                                271                 :                 OffsetNumber *off_loc)
                                272                 : {
 5624 bruce                     273          268918 :     int         ndeleted = 0;
 2545 kgrittn                   274          268918 :     Page        page = BufferGetPage(buffer);
  447 andres                    275          268918 :     BlockNumber blockno = BufferGetBlockNumber(buffer);
                                276                 :     OffsetNumber offnum,
                                277                 :                 maxoff;
                                278                 :     PruneState  prstate;
                                279                 :     HeapTupleData tup;
                                280                 : 
                                281                 :     /*
                                282                 :      * Our strategy is to scan the page and make lists of items to change,
                                283                 :      * then apply the changes within a critical section.  This keeps as much
                                284                 :      * logic as possible out of the critical section, and also ensures that
                                285                 :      * WAL replay will work the same as the normal case.
                                286                 :      *
                                287                 :      * First, initialize the new pd_prune_xid value to zero (indicating no
                                288                 :      * prunable tuples).  If we find any tuples which may soon become
                                289                 :      * prunable, we will save the lowest relevant XID in new_prune_xid. Also
                                290                 :      * initialize the rest of our working state.
                                291                 :      */
 5510 tgl                       292          268918 :     prstate.new_prune_xid = InvalidTransactionId;
  970 andres                    293          268918 :     prstate.rel = relation;
                                294          268918 :     prstate.vistest = vistest;
                                295          268918 :     prstate.old_snap_xmin = old_snap_xmin;
                                296          268918 :     prstate.old_snap_ts = old_snap_ts;
                                297          268918 :     prstate.old_snap_used = false;
  143 pg                        298 GNC      268918 :     prstate.snapshotConflictHorizon = InvalidTransactionId;
 5510 tgl                       299 CBC      268918 :     prstate.nredirected = prstate.ndead = prstate.nunused = 0;
                                300          268918 :     memset(prstate.marked, 0, sizeof(prstate.marked));
                                301                 : 
 5680                           302          268918 :     maxoff = PageGetMaxOffsetNumber(page);
  485 andres                    303          268918 :     tup.t_tableOid = RelationGetRelid(prstate.rel);
                                304                 : 
                                305                 :     /*
                                306                 :      * Determine HTSV for all tuples.
                                307                 :      *
                                308                 :      * This is required for correctness to deal with cases where running HTSV
                                309                 :      * twice could result in different results (e.g. RECENTLY_DEAD can turn to
                                310                 :      * DEAD if another checked item causes GlobalVisTestIsRemovableFullXid()
                                311                 :      * to update the horizon, INSERT_IN_PROGRESS can change to DEAD if the
                                312                 :      * inserting transaction aborts, ...). That in turn could cause
                                313                 :      * heap_prune_chain() to behave incorrectly if a tuple is reached twice,
                                314                 :      * once directly via a heap_prune_chain() and once following a HOT chain.
                                315                 :      *
                                316                 :      * It's also good for performance. Most commonly tuples within a page are
                                317                 :      * stored at decreasing offsets (while the items are stored at increasing
                                318                 :      * offsets). When processing all tuples on a page this leads to reading
                                319                 :      * memory at decreasing offsets within a page, with a variable stride.
                                320                 :      * That's hard for CPU prefetchers to deal with. Processing the items in
                                321                 :      * reverse order (and thus the tuples in increasing order) increases
                                322                 :      * prefetching efficiency significantly / decreases the number of cache
                                323                 :      * misses.
                                324                 :      */
                                325          268918 :     for (offnum = maxoff;
                                326        16690039 :          offnum >= FirstOffsetNumber;
                                327        16421121 :          offnum = OffsetNumberPrev(offnum))
                                328                 :     {
                                329        16421121 :         ItemId      itemid = PageGetItemId(page, offnum);
                                330                 :         HeapTupleHeader htup;
                                331                 : 
                                332                 :         /* Nothing to do if slot doesn't contain a tuple */
                                333        16421121 :         if (!ItemIdIsNormal(itemid))
                                334                 :         {
                                335         1683379 :             prstate.htsv[offnum] = -1;
                                336         1683379 :             continue;
                                337                 :         }
                                338                 : 
                                339        14737742 :         htup = (HeapTupleHeader) PageGetItem(page, itemid);
                                340        14737742 :         tup.t_data = htup;
                                341        14737742 :         tup.t_len = ItemIdGetLength(itemid);
  447                           342        14737742 :         ItemPointerSet(&(tup.t_self), blockno, offnum);
                                343                 : 
                                344                 :         /*
                                345                 :          * Set the offset number so that we can display it along with any
                                346                 :          * error that occurred while processing this tuple.
                                347                 :          */
  485                           348        14737742 :         if (off_loc)
                                349         9682613 :             *off_loc = offnum;
                                350                 : 
                                351        14737742 :         prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
                                352                 :                                                            buffer);
                                353                 :     }
                                354                 : 
                                355                 :     /* Scan the page */
 5680 tgl                       356          268918 :     for (offnum = FirstOffsetNumber;
                                357        16690039 :          offnum <= maxoff;
                                358        16421121 :          offnum = OffsetNumberNext(offnum))
                                359                 :     {
                                360                 :         ItemId      itemid;
                                361                 : 
                                362                 :         /* Ignore items already processed as part of an earlier chain */
 5510                           363        16421121 :         if (prstate.marked[offnum])
                                364          193811 :             continue;
                                365                 : 
                                366                 :         /* see preceding loop */
  956 akapila                   367        16227310 :         if (off_loc)
                                368        10145699 :             *off_loc = offnum;
                                369                 : 
                                370                 :         /* Nothing to do if slot is empty or already dead */
 5510 tgl                       371        16227310 :         itemid = PageGetItemId(page, offnum);
 5680                           372        16227310 :         if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid))
                                373         1098407 :             continue;
                                374                 : 
                                375                 :         /* Process this item or chain of items */
  970 andres                    376        15128903 :         ndeleted += heap_prune_chain(buffer, offnum, &prstate);
                                377                 :     }
                                378                 : 
                                379                 :     /* Clear the offset information once we have processed the given page. */
  956 akapila                   380          268918 :     if (off_loc)
                                381          165089 :         *off_loc = InvalidOffsetNumber;
                                382                 : 
                                383                 :     /* Any error while applying the changes is critical */
 5510 tgl                       384          268918 :     START_CRIT_SECTION();
                                385                 : 
                                386                 :     /* Have we found any prunable items? */
                                387          268918 :     if (prstate.nredirected > 0 || prstate.ndead > 0 || prstate.nunused > 0)
                                388                 :     {
                                389                 :         /*
                                390                 :          * Apply the planned item changes, then repair page fragmentation, and
                                391                 :          * update the page's hint bit about whether it has free line pointers.
                                392                 :          */
 5414 heikki.linnakangas        393          112841 :         heap_page_prune_execute(buffer,
                                394                 :                                 prstate.redirected, prstate.nredirected,
                                395                 :                                 prstate.nowdead, prstate.ndead,
                                396                 :                                 prstate.nowunused, prstate.nunused);
                                397                 : 
                                398                 :         /*
                                399                 :          * Update the page's pd_prune_xid field to either zero, or the lowest
                                400                 :          * XID of any soon-prunable tuple.
                                401                 :          */
 5510 tgl                       402          112841 :         ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
                                403                 : 
                                404                 :         /*
                                405                 :          * Also clear the "page is full" flag, since there's no point in
                                406                 :          * repeating the prune/defrag process until something else happens to
                                407                 :          * the page.
                                408                 :          */
                                409          112841 :         PageClearFull(page);
                                410                 : 
 5680                           411          112841 :         MarkBufferDirty(buffer);
                                412                 : 
                                413                 :         /*
                                414                 :          * Emit a WAL XLOG_HEAP2_PRUNE record showing what we did
                                415                 :          */
 4500 rhaas                     416          112841 :         if (RelationNeedsWAL(relation))
                                417                 :         {
                                418                 :             xl_heap_prune xlrec;
                                419                 :             XLogRecPtr  recptr;
                                420                 : 
    7 andres                    421 GNC      112027 :             xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(relation);
  143 pg                        422          112027 :             xlrec.snapshotConflictHorizon = prstate.snapshotConflictHorizon;
  733 pg                        423 CBC      112027 :             xlrec.nredirected = prstate.nredirected;
                                424          112027 :             xlrec.ndead = prstate.ndead;
  733 pg                        425 ECB             : 
  733 pg                        426 GIC      112027 :             XLogBeginInsert();
  733 pg                        427 CBC      112027 :             XLogRegisterData((char *) &xlrec, SizeOfHeapPrune);
  733 pg                        428 ECB             : 
  733 pg                        429 GIC      112027 :             XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
  733 pg                        430 ECB             : 
                                431                 :             /*
                                432                 :              * The OffsetNumber arrays are not actually in the buffer, but we
                                433                 :              * pretend that they are.  When XLogInsert stores the whole
                                434                 :              * buffer, the offset arrays need not be stored too.
                                435                 :              */
  733 pg                        436 GIC      112027 :             if (prstate.nredirected > 0)
  733 pg                        437 CBC       53622 :                 XLogRegisterBufData(0, (char *) prstate.redirected,
                                438           53622 :                                     prstate.nredirected *
  733 pg                        439 ECB             :                                     sizeof(OffsetNumber) * 2);
                                440                 : 
  733 pg                        441 GIC      112027 :             if (prstate.ndead > 0)
  733 pg                        442 CBC       62485 :                 XLogRegisterBufData(0, (char *) prstate.nowdead,
                                443           62485 :                                     prstate.ndead * sizeof(OffsetNumber));
  733 pg                        444 ECB             : 
  733 pg                        445 GIC      112027 :             if (prstate.nunused > 0)
  733 pg                        446 CBC       17957 :                 XLogRegisterBufData(0, (char *) prstate.nowunused,
                                447           17957 :                                     prstate.nunused * sizeof(OffsetNumber));
  733 pg                        448 ECB             : 
  733 pg                        449 GIC      112027 :             recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_PRUNE);
 5510 tgl                       450 ECB             : 
 2545 kgrittn                   451 GIC      112027 :             PageSetLSN(BufferGetPage(buffer), recptr);
 5680 tgl                       452 ECB             :         }
                                453                 :     }
                                454                 :     else
                                455                 :     {
                                456                 :         /*
                                457                 :          * If we didn't prune anything, but have found a new value for the
                                458                 :          * pd_prune_xid field, update it and mark the buffer dirty. This is
                                459                 :          * treated as a non-WAL-logged hint.
                                460                 :          *
                                461                 :          * Also clear the "page is full" flag if it is set, since there's no
                                462                 :          * point in repeating the prune/defrag process until something else
                                463                 :          * happens to the page.
                                464                 :          */
 5510 tgl                       465 GIC      312060 :         if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
 5510 tgl                       466 CBC      155983 :             PageIsFull(page))
 5510 tgl                       467 ECB             :         {
 5510 tgl                       468 GIC         192 :             ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
 5510 tgl                       469 CBC         192 :             PageClearFull(page);
 3583 jdavis                    470             192 :             MarkBufferDirtyHint(buffer, true);
 5510 tgl                       471 ECB             :         }
                                472                 :     }
                                473                 : 
 5680 tgl                       474 GIC      268918 :     END_CRIT_SECTION();
 5680 tgl                       475 ECB             : 
                                476                 :     /* Record number of newly-set-LP_DEAD items for caller */
  513 pg                        477 GIC      268918 :     *nnewlpdead = prstate.ndead;
 5680 tgl                       478 ECB             : 
 5680 tgl                       479 GIC      268918 :     return ndeleted;
 5680 tgl                       480 ECB             : }
                                481                 : 
                                482                 : 
                                483                 : /*
                                484                 :  * Perform visibility checks for heap pruning.
                                485                 :  *
                                486                 :  * This is more complicated than just using GlobalVisTestIsRemovableXid()
                                487                 :  * because of old_snapshot_threshold. We only want to increase the threshold
                                488                 :  * that triggers errors for old snapshots when we actually decide to remove a
                                489                 :  * row based on the limited horizon.
                                490                 :  *
                                491                 :  * Due to its cost we also only want to call
                                492                 :  * TransactionIdLimitedForOldSnapshots() if necessary, i.e. we might not have
                                493                 :  * done so in heap_hot_prune_opt() if pd_prune_xid was old enough. But we
                                494                 :  * still want to be able to remove rows that are too new to be removed
                                495                 :  * according to prstate->vistest, but that can be removed based on
                                496                 :  * old_snapshot_threshold. So we call TransactionIdLimitedForOldSnapshots() on
                                497                 :  * demand in here, if appropriate.
                                498                 :  */
                                499                 : static HTSV_Result
  970 andres                    500 GIC    14737742 : heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
  970 andres                    501 ECB             : {
                                502                 :     HTSV_Result res;
                                503                 :     TransactionId dead_after;
                                504                 : 
  970 andres                    505 GIC    14737742 :     res = HeapTupleSatisfiesVacuumHorizon(tup, buffer, &dead_after);
  970 andres                    506 ECB             : 
  970 andres                    507 GIC    14737742 :     if (res != HEAPTUPLE_RECENTLY_DEAD)
  970 andres                    508 CBC    12938086 :         return res;
  970 andres                    509 ECB             : 
                                510                 :     /*
                                511                 :      * If we are already relying on the limited xmin, there is no need to
                                512                 :      * delay doing so anymore.
                                513                 :      */
  970 andres                    514 GIC     1799656 :     if (prstate->old_snap_used)
  970 andres                    515 ECB             :     {
  970 andres                    516 UIC           0 :         Assert(TransactionIdIsValid(prstate->old_snap_xmin));
  970 andres                    517 EUB             : 
  970 andres                    518 UIC           0 :         if (TransactionIdPrecedes(dead_after, prstate->old_snap_xmin))
  970 andres                    519 UBC           0 :             res = HEAPTUPLE_DEAD;
                                520               0 :         return res;
  970 andres                    521 EUB             :     }
                                522                 : 
                                523                 :     /*
                                524                 :      * First check if GlobalVisTestIsRemovableXid() is sufficient to find the
                                525                 :      * row dead. If not, and old_snapshot_threshold is enabled, try to use the
                                526                 :      * lowered horizon.
                                527                 :      */
  970 andres                    528 GIC     1799656 :     if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after))
  970 andres                    529 CBC     1459724 :         res = HEAPTUPLE_DEAD;
                                530          339932 :     else if (OldSnapshotThresholdActive())
  970 andres                    531 ECB             :     {
                                532                 :         /* haven't determined limited horizon yet, requests */
  970 andres                    533 UIC           0 :         if (!TransactionIdIsValid(prstate->old_snap_xmin))
  970 andres                    534 EUB             :         {
                                535                 :             TransactionId horizon =
  970 andres                    536 UIC           0 :             GlobalVisTestNonRemovableHorizon(prstate->vistest);
  970 andres                    537 EUB             : 
  970 andres                    538 UIC           0 :             TransactionIdLimitedForOldSnapshots(horizon, prstate->rel,
  970 andres                    539 EUB             :                                                 &prstate->old_snap_xmin,
                                540                 :                                                 &prstate->old_snap_ts);
                                541                 :         }
                                542                 : 
  970 andres                    543 UIC           0 :         if (TransactionIdIsValid(prstate->old_snap_xmin) &&
  970 andres                    544 UBC           0 :             TransactionIdPrecedes(dead_after, prstate->old_snap_xmin))
  970 andres                    545 EUB             :         {
                                546                 :             /*
                                547                 :              * About to remove row based on snapshot_too_old. Need to raise
                                548                 :              * the threshold so problematic accesses would error.
                                549                 :              */
  970 andres                    550 UIC           0 :             Assert(!prstate->old_snap_used);
  970 andres                    551 UBC           0 :             SetOldSnapshotThresholdTimestamp(prstate->old_snap_ts,
  970 andres                    552 EUB             :                                              prstate->old_snap_xmin);
  970 andres                    553 UIC           0 :             prstate->old_snap_used = true;
  970 andres                    554 UBC           0 :             res = HEAPTUPLE_DEAD;
  970 andres                    555 EUB             :         }
                                556                 :     }
                                557                 : 
  970 andres                    558 GIC     1799656 :     return res;
  970 andres                    559 ECB             : }
                                560                 : 
                                561                 : 
                                562                 : /*
                                563                 :  * Prune specified line pointer or a HOT chain originating at line pointer.
                                564                 :  *
                                565                 :  * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
                                566                 :  * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
                                567                 :  * chain.  We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
                                568                 :  * This is OK because a RECENTLY_DEAD tuple preceding a DEAD tuple is really
                                569                 :  * DEAD, our visibility test is just too coarse to detect it.
                                570                 :  *
                                571                 :  * In general, pruning must never leave behind a DEAD tuple that still has
                                572                 :  * tuple storage.  VACUUM isn't prepared to deal with that case.  That's why
                                573                 :  * VACUUM prunes the same heap page a second time (without dropping its lock
                                574                 :  * in the interim) when it sees a newly DEAD tuple that we initially saw as
                                575                 :  * in-progress.  Retrying pruning like this can only happen when an inserting
                                576                 :  * transaction concurrently aborts.
                                577                 :  *
                                578                 :  * The root line pointer is redirected to the tuple immediately after the
                                579                 :  * latest DEAD tuple.  If all tuples in the chain are DEAD, the root line
                                580                 :  * pointer is marked LP_DEAD.  (This includes the case of a DEAD simple
                                581                 :  * tuple, which we treat as a chain of length 1.)
                                582                 :  *
                                583                 :  * We don't actually change the page here. We just add entries to the arrays in
                                584                 :  * prstate showing the changes to be made.  Items to be redirected are added
                                585                 :  * to the redirected[] array (two entries per redirection); items to be set to
                                586                 :  * LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
                                587                 :  * state are added to nowunused[].
                                588                 :  *
                                589                 :  * Returns the number of tuples (to be) deleted from the page.
                                590                 :  */
                                591                 : static int
  970 andres                    592 GIC    15128903 : heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
 5680 tgl                       593 ECB             : {
 5624 bruce                     594 GIC    15128903 :     int         ndeleted = 0;
 2545 kgrittn                   595 CBC    15128903 :     Page        dp = (Page) BufferGetPage(buffer);
 5624 bruce                     596        15128903 :     TransactionId priorXmax = InvalidTransactionId;
 5624 bruce                     597 ECB             :     ItemId      rootlp;
                                598                 :     HeapTupleHeader htup;
 5624 bruce                     599 GIC    15128903 :     OffsetNumber latestdead = InvalidOffsetNumber,
 5624 bruce                     600 CBC    15128903 :                 maxoff = PageGetMaxOffsetNumber(dp),
 5624 bruce                     601 ECB             :                 offnum;
                                602                 :     OffsetNumber chainitems[MaxHeapTuplesPerPage];
 5624 bruce                     603 GIC    15128903 :     int         nchain = 0,
 5624 bruce                     604 ECB             :                 i;
                                605                 : 
 5680 tgl                       606 GIC    15128903 :     rootlp = PageGetItemId(dp, rootoffnum);
 5680 tgl                       607 ECB             : 
                                608                 :     /*
                                609                 :      * If it's a heap-only tuple, then it is not the start of a HOT chain.
                                610                 :      */
 5680 tgl                       611 GIC    15128903 :     if (ItemIdIsNormal(rootlp))
 5680 tgl                       612 ECB             :     {
  485 andres                    613 GIC    14543931 :         Assert(prstate->htsv[rootoffnum] != -1);
 5680 tgl                       614 CBC    14543931 :         htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
 3548 rhaas                     615 ECB             : 
 5680 tgl                       616 GIC    14543931 :         if (HeapTupleHeaderIsHeapOnly(htup))
 5680 tgl                       617 ECB             :         {
                                618                 :             /*
                                619                 :              * If the tuple is DEAD and doesn't chain to anything else, mark
                                620                 :              * it unused immediately.  (If it does chain, we can only remove
                                621                 :              * it as part of pruning its chain.)
                                622                 :              *
                                623                 :              * We need this primarily to handle aborted HOT updates, that is,
                                624                 :              * XMIN_INVALID heap-only tuples.  Those might not be linked to by
                                625                 :              * any chain, since the parent tuple might be re-updated before
                                626                 :              * any pruning occurs.  So we have to be able to reap them
                                627                 :              * separately from chain-pruning.  (Note that
                                628                 :              * HeapTupleHeaderIsHotUpdated will never return true for an
                                629                 :              * XMIN_INVALID tuple, so this code will work even when there were
                                630                 :              * sequential updates within the aborted transaction.)
                                631                 :              *
                                632                 :              * Note that we might first arrive at a dead heap-only tuple
                                633                 :              * either here or while following a chain below.  Whichever path
                                634                 :              * gets there first will mark the tuple unused.
                                635                 :              */
  485 andres                    636 GIC      579696 :             if (prstate->htsv[rootoffnum] == HEAPTUPLE_DEAD &&
  485 andres                    637 CBC        7238 :                 !HeapTupleHeaderIsHotUpdated(htup))
 5680 tgl                       638 ECB             :             {
 5510 tgl                       639 GIC        3132 :                 heap_prune_record_unused(prstate, rootoffnum);
  143 pg                        640 GNC        3132 :                 HeapTupleHeaderAdvanceConflictHorizon(htup,
                                641                 :                                                       &prstate->snapshotConflictHorizon);
 5680 tgl                       642 GIC        3132 :                 ndeleted++;
 5680 tgl                       643 ECB             :             }
                                644                 : 
                                645                 :             /* Nothing more to do */
 5680 tgl                       646 GIC      579696 :             return ndeleted;
 5680 tgl                       647 ECB             :         }
                                648                 :     }
                                649                 : 
                                650                 :     /* Start from the root tuple */
 5680 tgl                       651 GIC    14549207 :     offnum = rootoffnum;
 5680 tgl                       652 ECB             : 
                                653                 :     /* while not end of the chain */
                                654                 :     for (;;)
 5680 tgl                       655 GIC      761000 :     {
 5624 bruce                     656 ECB             :         ItemId      lp;
                                657                 :         bool        tupdead,
                                658                 :                     recent_dead;
                                659                 : 
                                660                 :         /* Sanity check (pure paranoia) */
  564 pg                        661 GIC    15310207 :         if (offnum < FirstOffsetNumber)
  564 pg                        662 LBC           0 :             break;
  564 pg                        663 EUB             : 
                                664                 :         /*
                                665                 :          * An offset past the end of page's line pointer array is possible
                                666                 :          * when the array was truncated (original item must have been unused)
                                667                 :          */
  564 pg                        668 GIC    15310207 :         if (offnum > maxoff)
 5680 tgl                       669 LBC           0 :             break;
 5680 tgl                       670 EUB             : 
                                671                 :         /* If item is already processed, stop --- it must not be same chain */
 5510 tgl                       672 GIC    15310207 :         if (prstate->marked[offnum])
 5510 tgl                       673 CBC        1907 :             break;
 5510 tgl                       674 ECB             : 
 5680 tgl                       675 GIC    15308300 :         lp = PageGetItemId(dp, offnum);
 5680 tgl                       676 ECB             : 
                                677                 :         /* Unused item obviously isn't part of the chain */
 5680 tgl                       678 GIC    15308300 :         if (!ItemIdIsUsed(lp))
 5680 tgl                       679 LBC           0 :             break;
 5680 tgl                       680 EUB             : 
                                681                 :         /*
                                682                 :          * If we are looking at the redirected root line pointer, jump to the
                                683                 :          * first normal tuple in the chain.  If we find a redirect somewhere
                                684                 :          * else, stop --- it must not be same chain.
                                685                 :          */
 5680 tgl                       686 GIC    15308300 :         if (ItemIdIsRedirected(lp))
 5680 tgl                       687 ECB             :         {
 5680 tgl                       688 GIC      584972 :             if (nchain > 0)
 5680 tgl                       689 LBC           0 :                 break;          /* not at start of chain */
 5680 tgl                       690 GBC      584972 :             chainitems[nchain++] = offnum;
 5680 tgl                       691 CBC      584972 :             offnum = ItemIdGetRedirect(rootlp);
                                692          584972 :             continue;
 5680 tgl                       693 ECB             :         }
                                694                 : 
                                695                 :         /*
                                696                 :          * Likewise, a dead line pointer can't be part of the chain. (We
                                697                 :          * already eliminated the case of dead root tuple outside this
                                698                 :          * function.)
                                699                 :          */
 5680 tgl                       700 GIC    14723328 :         if (ItemIdIsDead(lp))
 5680 tgl                       701 LBC           0 :             break;
 5680 tgl                       702 EUB             : 
 5680 tgl                       703 GIC    14723328 :         Assert(ItemIdIsNormal(lp));
  485 andres                    704 CBC    14723328 :         Assert(prstate->htsv[offnum] != -1);
 5680 tgl                       705        14723328 :         htup = (HeapTupleHeader) PageGetItem(dp, lp);
 5680 tgl                       706 ECB             : 
                                707                 :         /*
                                708                 :          * Check the tuple XMIN against prior XMAX, if any
                                709                 :          */
 5680 tgl                       710 GIC    14898367 :         if (TransactionIdIsValid(priorXmax) &&
 1984 alvherre                  711 CBC      175039 :             !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
 5680 tgl                       712 LBC           0 :             break;
 5680 tgl                       713 EUB             : 
                                714                 :         /*
                                715                 :          * OK, this tuple is indeed a member of the chain.
                                716                 :          */
 5680 tgl                       717 GIC    14723328 :         chainitems[nchain++] = offnum;
 5680 tgl                       718 ECB             : 
                                719                 :         /*
                                720                 :          * Check tuple's visibility status.
                                721                 :          */
 5680 tgl                       722 GIC    14723328 :         tupdead = recent_dead = false;
 5680 tgl                       723 ECB             : 
  485 andres                    724 GIC    14723328 :         switch ((HTSV_Result) prstate->htsv[offnum])
 5680 tgl                       725 ECB             :         {
 5680 tgl                       726 GIC     1476657 :             case HEAPTUPLE_DEAD:
 5680 tgl                       727 CBC     1476657 :                 tupdead = true;
                                728         1476657 :                 break;
 5680 tgl                       729 ECB             : 
 5680 tgl                       730 GIC      339932 :             case HEAPTUPLE_RECENTLY_DEAD:
 5680 tgl                       731 CBC      339932 :                 recent_dead = true;
 5624 bruce                     732 ECB             : 
                                733                 :                 /*
                                734                 :                  * This tuple may soon become DEAD.  Update the hint field so
                                735                 :                  * that the page is reconsidered for pruning in future.
                                736                 :                  */
 5510 tgl                       737 GIC      339932 :                 heap_prune_record_prunable(prstate,
 3728 alvherre                  738 CBC      339932 :                                            HeapTupleHeaderGetUpdateXid(htup));
 5680 tgl                       739          339932 :                 break;
 5680 tgl                       740 ECB             : 
 5680 tgl                       741 GIC       14271 :             case HEAPTUPLE_DELETE_IN_PROGRESS:
 3260 bruce                     742 ECB             : 
                                743                 :                 /*
                                744                 :                  * This tuple may soon become DEAD.  Update the hint field so
                                745                 :                  * that the page is reconsidered for pruning in future.
                                746                 :                  */
 3418 alvherre                  747 GIC       14271 :                 heap_prune_record_prunable(prstate,
 3418 alvherre                  748 CBC       14271 :                                            HeapTupleHeaderGetUpdateXid(htup));
 5680 tgl                       749           14271 :                 break;
 5680 tgl                       750 ECB             : 
 5680 tgl                       751 GIC    12892468 :             case HEAPTUPLE_LIVE:
 5680 tgl                       752 ECB             :             case HEAPTUPLE_INSERT_IN_PROGRESS:
                                753                 : 
                                754                 :                 /*
                                755                 :                  * If we wanted to optimize for aborts, we might consider
                                756                 :                  * marking the page prunable when we see INSERT_IN_PROGRESS.
                                757                 :                  * But we don't.  See related decisions about when to mark the
                                758                 :                  * page prunable in heapam.c.
                                759                 :                  */
 5680 tgl                       760 GIC    12892468 :                 break;
 5680 tgl                       761 ECB             : 
 5680 tgl                       762 UIC           0 :             default:
 5680 tgl                       763 UBC           0 :                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
 5680 tgl                       764 EUB             :                 break;
                                765                 :         }
                                766                 : 
                                767                 :         /*
                                768                 :          * Remember the last DEAD tuple seen.  We will advance past
                                769                 :          * RECENTLY_DEAD tuples just in case there's a DEAD one after them;
                                770                 :          * but we can't advance past anything else.  We have to make sure that
                                771                 :          * we don't miss any DEAD tuples, since DEAD tuples that still have
                                772                 :          * tuple storage after pruning will confuse VACUUM.
                                773                 :          */
 5680 tgl                       774 GIC    14723328 :         if (tupdead)
 4859 simon                     775 ECB             :         {
 5680 tgl                       776 GIC     1476657 :             latestdead = offnum;
  143 pg                        777 GNC     1476657 :             HeapTupleHeaderAdvanceConflictHorizon(htup,
                                778                 :                                                   &prstate->snapshotConflictHorizon);
                                779                 :         }
 5680 tgl                       780 GIC    13246671 :         else if (!recent_dead)
 5680 tgl                       781 CBC    12906739 :             break;
 5680 tgl                       782 ECB             : 
                                783                 :         /*
                                784                 :          * If the tuple is not HOT-updated, then we are at the end of this
                                785                 :          * HOT-update chain.
                                786                 :          */
 5680 tgl                       787 GIC     1816589 :         if (!HeapTupleHeaderIsHotUpdated(htup))
 5680 tgl                       788 ECB             :             break;
                                789                 : 
                                790                 :         /* HOT implies it can't have moved to different partition */
 1828 andres                    791 GIC      176028 :         Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
 1828 andres                    792 ECB             : 
                                793                 :         /*
                                794                 :          * Advance to next chain member.
                                795                 :          */
 5680 tgl                       796 GIC      176028 :         Assert(ItemPointerGetBlockNumber(&htup->t_ctid) ==
 5680 tgl                       797 ECB             :                BufferGetBlockNumber(buffer));
 5680 tgl                       798 GIC      176028 :         offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
 3728 alvherre                  799 CBC      176028 :         priorXmax = HeapTupleHeaderGetUpdateXid(htup);
 5680 tgl                       800 ECB             :     }
                                801                 : 
                                802                 :     /*
                                803                 :      * If we found a DEAD tuple in the chain, adjust the HOT chain so that all
                                804                 :      * the DEAD tuples at the start of the chain are removed and the root line
                                805                 :      * pointer is appropriately redirected.
                                806                 :      */
 5680 tgl                       807 GIC    14549207 :     if (OffsetNumberIsValid(latestdead))
 5680 tgl                       808 ECB             :     {
                                809                 :         /*
                                810                 :          * Mark as unused each intermediate item that we are able to remove
                                811                 :          * from the chain.
                                812                 :          *
                                813                 :          * When the previous item is the last dead tuple seen, we are at the
                                814                 :          * right candidate for redirection.
                                815                 :          */
 5680 tgl                       816 GIC     1509961 :         for (i = 1; (i < nchain) && (chainitems[i - 1] != latestdead); i++)
 5680 tgl                       817 ECB             :         {
 5510 tgl                       818 GIC       74717 :             heap_prune_record_unused(prstate, chainitems[i]);
 5680 tgl                       819 CBC       74717 :             ndeleted++;
 5680 tgl                       820 ECB             :         }
                                821                 : 
                                822                 :         /*
                                823                 :          * If the root entry had been a normal tuple, we are deleting it, so
                                824                 :          * count it in the result.  But changing a redirect (even to DEAD
                                825                 :          * state) doesn't count.
                                826                 :          */
 5680 tgl                       827 GIC     1435244 :         if (ItemIdIsNormal(rootlp))
 5680 tgl                       828 CBC     1401940 :             ndeleted++;
 5680 tgl                       829 ECB             : 
                                830                 :         /*
                                831                 :          * If the DEAD tuple is at the end of the chain, the entire chain is
                                832                 :          * dead and the root line pointer can be marked dead.  Otherwise just
                                833                 :          * redirect the root to the correct chain member.
                                834                 :          */
 5680 tgl                       835 GIC     1435244 :         if (i >= nchain)
 5510 tgl                       836 CBC     1305612 :             heap_prune_record_dead(prstate, rootoffnum);
 5680 tgl                       837 ECB             :         else
 5510 tgl                       838 GIC      129632 :             heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]);
 5680 tgl                       839 ECB             :     }
 5680 tgl                       840 GIC    13113963 :     else if (nchain < 2 && ItemIdIsRedirected(rootlp))
 5680 tgl                       841 ECB             :     {
                                842                 :         /*
                                843                 :          * We found a redirect item that doesn't point to a valid follow-on
                                844                 :          * item.  This can happen if the loop in heap_page_prune caused us to
                                845                 :          * visit the dead successor of a redirect item before visiting the
                                846                 :          * redirect item.  We can clean up by setting the redirect item to
                                847                 :          * DEAD state.
                                848                 :          */
 5510 tgl                       849 GIC         918 :         heap_prune_record_dead(prstate, rootoffnum);
 5510 tgl                       850 ECB             :     }
                                851                 : 
 5680 tgl                       852 GIC    14549207 :     return ndeleted;
 5680 tgl                       853 ECB             : }
                                854                 : 
                                855                 : /* Record lowest soon-prunable XID */
                                856                 : static void
 5510 tgl                       857 GIC      354203 : heap_prune_record_prunable(PruneState *prstate, TransactionId xid)
 5510 tgl                       858 ECB             : {
                                859                 :     /*
                                860                 :      * This should exactly match the PageSetPrunable macro.  We can't store
                                861                 :      * directly into the page header yet, so we update working state.
                                862                 :      */
 5510 tgl                       863 GIC      354203 :     Assert(TransactionIdIsNormal(xid));
 5510 tgl                       864 CBC      698363 :     if (!TransactionIdIsValid(prstate->new_prune_xid) ||
                                865          344160 :         TransactionIdPrecedes(xid, prstate->new_prune_xid))
                                866           11185 :         prstate->new_prune_xid = xid;
                                867          354203 : }
 5680 tgl                       868 ECB             : 
                                869                 : /* Record line pointer to be redirected */
                                870                 : static void
 5510 tgl                       871 GIC      129632 : heap_prune_record_redirect(PruneState *prstate,
 5624 bruce                     872 ECB             :                            OffsetNumber offnum, OffsetNumber rdoffnum)
                                873                 : {
 5510 tgl                       874 GIC      129632 :     Assert(prstate->nredirected < MaxHeapTuplesPerPage);
 5510 tgl                       875 CBC      129632 :     prstate->redirected[prstate->nredirected * 2] = offnum;
                                876          129632 :     prstate->redirected[prstate->nredirected * 2 + 1] = rdoffnum;
                                877          129632 :     prstate->nredirected++;
                                878          129632 :     Assert(!prstate->marked[offnum]);
                                879          129632 :     prstate->marked[offnum] = true;
                                880          129632 :     Assert(!prstate->marked[rdoffnum]);
                                881          129632 :     prstate->marked[rdoffnum] = true;
 5680                           882          129632 : }
 5680 tgl                       883 ECB             : 
                                884                 : /* Record line pointer to be marked dead */
                                885                 : static void
 5510 tgl                       886 GIC     1306530 : heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
 5680 tgl                       887 ECB             : {
 5510 tgl                       888 GIC     1306530 :     Assert(prstate->ndead < MaxHeapTuplesPerPage);
 5510 tgl                       889 CBC     1306530 :     prstate->nowdead[prstate->ndead] = offnum;
                                890         1306530 :     prstate->ndead++;
                                891         1306530 :     Assert(!prstate->marked[offnum]);
                                892         1306530 :     prstate->marked[offnum] = true;
 5680                           893         1306530 : }
 5680 tgl                       894 ECB             : 
                                895                 : /* Record line pointer to be marked unused */
                                896                 : static void
 5510 tgl                       897 GIC       77849 : heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum)
 5680 tgl                       898 ECB             : {
 5510 tgl                       899 GIC       77849 :     Assert(prstate->nunused < MaxHeapTuplesPerPage);
 5510 tgl                       900 CBC       77849 :     prstate->nowunused[prstate->nunused] = offnum;
                                901           77849 :     prstate->nunused++;
                                902           77849 :     Assert(!prstate->marked[offnum]);
                                903           77849 :     prstate->marked[offnum] = true;
                                904           77849 : }
 5510 tgl                       905 ECB             : 
                                906                 : 
                                907                 : /*
                                908                 :  * Perform the actual page changes needed by heap_page_prune.
                                909                 :  * It is expected that the caller has a full cleanup lock on the
                                910                 :  * buffer.
                                911                 :  */
                                912                 : void
 5414 heikki.linnakangas        913 GIC      118566 : heap_page_prune_execute(Buffer buffer,
 5510 tgl                       914 ECB             :                         OffsetNumber *redirected, int nredirected,
                                915                 :                         OffsetNumber *nowdead, int ndead,
                                916                 :                         OffsetNumber *nowunused, int nunused)
                                917                 : {
 2545 kgrittn                   918 GIC      118566 :     Page        page = (Page) BufferGetPage(buffer);
 5510 tgl                       919 ECB             :     OffsetNumber *offnum;
                                920                 :     HeapTupleHeader htup PG_USED_FOR_ASSERTS_ONLY;
                                921                 : 
                                922                 :     /* Shouldn't be called unless there's something to do */
  733 pg                        923 GIC      118566 :     Assert(nredirected > 0 || ndead > 0 || nunused > 0);
  733 pg                        924 ECB             : 
                                925                 :     /* Update all redirected line pointers */
 5510 tgl                       926 GIC      118566 :     offnum = redirected;
  521 pg                        927 CBC      261341 :     for (int i = 0; i < nredirected; i++)
 5510 tgl                       928 ECB             :     {
 5510 tgl                       929 GIC      142775 :         OffsetNumber fromoff = *offnum++;
 5510 tgl                       930 CBC      142775 :         OffsetNumber tooff = *offnum++;
                                931          142775 :         ItemId      fromlp = PageGetItemId(page, fromoff);
  521 pg                        932 ECB             :         ItemId      tolp PG_USED_FOR_ASSERTS_ONLY;
                                933                 : 
                                934                 : #ifdef USE_ASSERT_CHECKING
                                935                 : 
                                936                 :         /*
                                937                 :          * Any existing item that we set as an LP_REDIRECT (any 'from' item)
                                938                 :          * must be the first item from a HOT chain.  If the item has tuple
                                939                 :          * storage then it can't be a heap-only tuple.  Otherwise we are just
                                940                 :          * maintaining an existing LP_REDIRECT from an existing HOT chain that
                                941                 :          * has been pruned at least once before now.
                                942                 :          */
  521 pg                        943 GIC      142775 :         if (!ItemIdIsRedirected(fromlp))
  521 pg                        944 ECB             :         {
  521 pg                        945 GIC      132479 :             Assert(ItemIdHasStorage(fromlp) && ItemIdIsNormal(fromlp));
  521 pg                        946 ECB             : 
  521 pg                        947 GIC      132479 :             htup = (HeapTupleHeader) PageGetItem(page, fromlp);
  521 pg                        948 CBC      132479 :             Assert(!HeapTupleHeaderIsHeapOnly(htup));
  521 pg                        949 ECB             :         }
                                950                 :         else
                                951                 :         {
                                952                 :             /* We shouldn't need to redundantly set the redirect */
  521 pg                        953 GIC       10296 :             Assert(ItemIdGetRedirect(fromlp) != tooff);
  521 pg                        954 ECB             :         }
                                955                 : 
                                956                 :         /*
                                957                 :          * The item that we're about to set as an LP_REDIRECT (the 'from'
                                958                 :          * item) will point to an existing item (the 'to' item) that is
                                959                 :          * already a heap-only tuple.  There can be at most one LP_REDIRECT
                                960                 :          * item per HOT chain.
                                961                 :          *
                                962                 :          * We need to keep around an LP_REDIRECT item (after original
                                963                 :          * non-heap-only root tuple gets pruned away) so that it's always
                                964                 :          * possible for VACUUM to easily figure out what TID to delete from
                                965                 :          * indexes when an entire HOT chain becomes dead.  A heap-only tuple
                                966                 :          * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
                                967                 :          * tuple can.
                                968                 :          *
                                969                 :          * This check may miss problems, e.g. the target of a redirect could
                                970                 :          * be marked as unused subsequently. The page_verify_redirects() check
                                971                 :          * below will catch such problems.
                                972                 :          */
  521 pg                        973 GIC      142775 :         tolp = PageGetItemId(page, tooff);
  521 pg                        974 CBC      142775 :         Assert(ItemIdHasStorage(tolp) && ItemIdIsNormal(tolp));
                                975          142775 :         htup = (HeapTupleHeader) PageGetItem(page, tolp);
                                976          142775 :         Assert(HeapTupleHeaderIsHeapOnly(htup));
  521 pg                        977 ECB             : #endif
                                978                 : 
 4808 tgl                       979 GIC      142775 :         ItemIdSetRedirect(fromlp, tooff);
 5510 tgl                       980 ECB             :     }
                                981                 : 
                                982                 :     /* Update all now-dead line pointers */
 5510 tgl                       983 GIC      118566 :     offnum = nowdead;
  521 pg                        984 CBC     1685949 :     for (int i = 0; i < ndead; i++)
 5510 tgl                       985 ECB             :     {
 5510 tgl                       986 GIC     1567383 :         OffsetNumber off = *offnum++;
 5510 tgl                       987 CBC     1567383 :         ItemId      lp = PageGetItemId(page, off);
 5510 tgl                       988 ECB             : 
                                989                 : #ifdef USE_ASSERT_CHECKING
                                990                 : 
                                991                 :         /*
                                992                 :          * An LP_DEAD line pointer must be left behind when the original item
                                993                 :          * (which is dead to everybody) could still be referenced by a TID in
                                994                 :          * an index.  This should never be necessary with any individual
                                995                 :          * heap-only tuple item, though. (It's not clear how much of a problem
                                996                 :          * that would be, but there is no reason to allow it.)
                                997                 :          */
  521 pg                        998 GIC     1567383 :         if (ItemIdHasStorage(lp))
  521 pg                        999 ECB             :         {
  521 pg                       1000 GIC     1542159 :             Assert(ItemIdIsNormal(lp));
  521 pg                       1001 CBC     1542159 :             htup = (HeapTupleHeader) PageGetItem(page, lp);
                               1002         1542159 :             Assert(!HeapTupleHeaderIsHeapOnly(htup));
  521 pg                       1003 ECB             :         }
                               1004                 :         else
                               1005                 :         {
                               1006                 :             /* Whole HOT chain becomes dead */
  521 pg                       1007 GIC       25224 :             Assert(ItemIdIsRedirected(lp));
  521 pg                       1008 ECB             :         }
                               1009                 : #endif
                               1010                 : 
 5510 tgl                      1011 GIC     1567383 :         ItemIdSetDead(lp);
 5510 tgl                      1012 ECB             :     }
                               1013                 : 
                               1014                 :     /* Update all now-unused line pointers */
 5510 tgl                      1015 GIC      118566 :     offnum = nowunused;
  521 pg                       1016 CBC      203209 :     for (int i = 0; i < nunused; i++)
 5510 tgl                      1017 ECB             :     {
 5510 tgl                      1018 GIC       84643 :         OffsetNumber off = *offnum++;
 5510 tgl                      1019 CBC       84643 :         ItemId      lp = PageGetItemId(page, off);
 5510 tgl                      1020 ECB             : 
                               1021                 : #ifdef USE_ASSERT_CHECKING
                               1022                 : 
                               1023                 :         /*
                               1024                 :          * Only heap-only tuples can become LP_UNUSED during pruning.  They
                               1025                 :          * don't need to be left in place as LP_DEAD items until VACUUM gets
                               1026                 :          * around to doing index vacuuming.
                               1027                 :          */
  521 pg                       1028 GIC       84643 :         Assert(ItemIdHasStorage(lp) && ItemIdIsNormal(lp));
  521 pg                       1029 CBC       84643 :         htup = (HeapTupleHeader) PageGetItem(page, lp);
                               1030           84643 :         Assert(HeapTupleHeaderIsHeapOnly(htup));
  521 pg                       1031 ECB             : #endif
                               1032                 : 
 5510 tgl                      1033 GIC       84643 :         ItemIdSetUnused(lp);
 5510 tgl                      1034 ECB             :     }
                               1035                 : 
                               1036                 :     /*
                               1037                 :      * Finally, repair any fragmentation, and update the page's hint bit about
                               1038                 :      * whether it has free pointers.
                               1039                 :      */
 5510 tgl                      1040 GIC      118566 :     PageRepairFragmentation(page);
  503 andres                   1041 ECB             : 
                               1042                 :     /*
                               1043                 :      * Now that the page has been modified, assert that redirect items still
                               1044                 :      * point to valid targets.
                               1045                 :      */
  503 andres                   1046 GIC      118566 :     page_verify_redirects(page);
  503 andres                   1047 CBC      118566 : }
  503 andres                   1048 ECB             : 
                               1049                 : 
                               1050                 : /*
                               1051                 :  * If built with assertions, verify that all LP_REDIRECT items point to a
                               1052                 :  * valid item.
                               1053                 :  *
                               1054                 :  * One way that bugs related to HOT pruning show is redirect items pointing to
                               1055                 :  * removed tuples. It's not trivial to reliably check that marking an item
                               1056                 :  * unused will not orphan a redirect item during heap_prune_chain() /
                               1057                 :  * heap_page_prune_execute(), so we additionally check the whole page after
                               1058                 :  * pruning. Without this check such bugs would typically only cause asserts
                               1059                 :  * later, potentially well after the corruption has been introduced.
                               1060                 :  *
                               1061                 :  * Also check comments in heap_page_prune_execute()'s redirection loop.
                               1062                 :  */
                               1063                 : static void
  503 andres                   1064 GIC      118566 : page_verify_redirects(Page page)
  503 andres                   1065 ECB             : {
                               1066                 : #ifdef USE_ASSERT_CHECKING
                               1067                 :     OffsetNumber offnum;
                               1068                 :     OffsetNumber maxoff;
                               1069                 : 
  503 andres                   1070 GIC      118566 :     maxoff = PageGetMaxOffsetNumber(page);
  503 andres                   1071 CBC      118566 :     for (offnum = FirstOffsetNumber;
                               1072         7809329 :          offnum <= maxoff;
                               1073         7690763 :          offnum = OffsetNumberNext(offnum))
  503 andres                   1074 ECB             :     {
  503 andres                   1075 GIC     7690763 :         ItemId      itemid = PageGetItemId(page, offnum);
  503 andres                   1076 ECB             :         OffsetNumber targoff;
                               1077                 :         ItemId      targitem;
                               1078                 :         HeapTupleHeader htup;
                               1079                 : 
  503 andres                   1080 GIC     7690763 :         if (!ItemIdIsRedirected(itemid))
  503 andres                   1081 CBC     7044702 :             continue;
  503 andres                   1082 ECB             : 
  503 andres                   1083 GIC      646061 :         targoff = ItemIdGetRedirect(itemid);
  503 andres                   1084 CBC      646061 :         targitem = PageGetItemId(page, targoff);
  503 andres                   1085 ECB             : 
  503 andres                   1086 GIC      646061 :         Assert(ItemIdIsUsed(targitem));
  503 andres                   1087 CBC      646061 :         Assert(ItemIdIsNormal(targitem));
                               1088          646061 :         Assert(ItemIdHasStorage(targitem));
                               1089          646061 :         htup = (HeapTupleHeader) PageGetItem(page, targitem);
                               1090          646061 :         Assert(HeapTupleHeaderIsHeapOnly(htup));
  503 andres                   1091 ECB             :     }
                               1092                 : #endif
 5680 tgl                      1093 GIC      118566 : }
 5680 tgl                      1094 ECB             : 
                               1095                 : 
                               1096                 : /*
                               1097                 :  * For all items in this page, find their respective root line pointers.
                               1098                 :  * If item k is part of a HOT-chain with root at item j, then we set
                               1099                 :  * root_offsets[k - 1] = j.
                               1100                 :  *
                               1101                 :  * The passed-in root_offsets array must have MaxHeapTuplesPerPage entries.
                               1102                 :  * Unused entries are filled with InvalidOffsetNumber (zero).
                               1103                 :  *
                               1104                 :  * The function must be called with at least share lock on the buffer, to
                               1105                 :  * prevent concurrent prune operations.
                               1106                 :  *
                               1107                 :  * Note: The information collected here is valid only as long as the caller
                               1108                 :  * holds a pin on the buffer. Once pin is released, a tuple might be pruned
                               1109                 :  * and reused by a completely unrelated tuple.
                               1110                 :  */
                               1111                 : void
 5680 tgl                      1112 GIC      204041 : heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
 5680 tgl                      1113 ECB             : {
                               1114                 :     OffsetNumber offnum,
                               1115                 :                 maxoff;
                               1116                 : 
  969 alvherre                 1117 GIC      204041 :     MemSet(root_offsets, InvalidOffsetNumber,
  969 alvherre                 1118 ECB             :            MaxHeapTuplesPerPage * sizeof(OffsetNumber));
                               1119                 : 
 5680 tgl                      1120 GIC      204041 :     maxoff = PageGetMaxOffsetNumber(page);
 5444 bruce                    1121 CBC    14254285 :     for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
 5680 tgl                      1122 ECB             :     {
 5624 bruce                    1123 GIC    14050244 :         ItemId      lp = PageGetItemId(page, offnum);
 5624 bruce                    1124 ECB             :         HeapTupleHeader htup;
                               1125                 :         OffsetNumber nextoffnum;
                               1126                 :         TransactionId priorXmax;
                               1127                 : 
                               1128                 :         /* skip unused and dead items */
 5680 tgl                      1129 GIC    14050244 :         if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
 5680 tgl                      1130 CBC        2932 :             continue;
 5680 tgl                      1131 ECB             : 
 5680 tgl                      1132 GIC    14047312 :         if (ItemIdIsNormal(lp))
 5680 tgl                      1133 ECB             :         {
 5680 tgl                      1134 GIC    14045874 :             htup = (HeapTupleHeader) PageGetItem(page, lp);
 5680 tgl                      1135 ECB             : 
                               1136                 :             /*
                               1137                 :              * Check if this tuple is part of a HOT-chain rooted at some other
                               1138                 :              * tuple. If so, skip it for now; we'll process it when we find
                               1139                 :              * its root.
                               1140                 :              */
 5680 tgl                      1141 GIC    14045874 :             if (HeapTupleHeaderIsHeapOnly(htup))
 5680 tgl                      1142 CBC        1664 :                 continue;
 5680 tgl                      1143 ECB             : 
                               1144                 :             /*
                               1145                 :              * This is either a plain tuple or the root of a HOT-chain.
                               1146                 :              * Remember it in the mapping.
                               1147                 :              */
 5680 tgl                      1148 GIC    14044210 :             root_offsets[offnum - 1] = offnum;
 5680 tgl                      1149 ECB             : 
                               1150                 :             /* If it's not the start of a HOT-chain, we're done with it */
 5680 tgl                      1151 GIC    14044210 :             if (!HeapTupleHeaderIsHotUpdated(htup))
 5680 tgl                      1152 CBC    14044030 :                 continue;
 5680 tgl                      1153 ECB             : 
                               1154                 :             /* Set up to scan the HOT-chain */
 5680 tgl                      1155 GIC         180 :             nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
 3728 alvherre                 1156 CBC         180 :             priorXmax = HeapTupleHeaderGetUpdateXid(htup);
 5680 tgl                      1157 ECB             :         }
                               1158                 :         else
                               1159                 :         {
                               1160                 :             /* Must be a redirect item. We do not set its root_offsets entry */
 5680 tgl                      1161 GIC        1438 :             Assert(ItemIdIsRedirected(lp));
 5680 tgl                      1162 ECB             :             /* Set up to scan the HOT-chain */
 5680 tgl                      1163 GIC        1438 :             nextoffnum = ItemIdGetRedirect(lp);
 5680 tgl                      1164 CBC        1438 :             priorXmax = InvalidTransactionId;
 5680 tgl                      1165 ECB             :         }
                               1166                 : 
                               1167                 :         /*
                               1168                 :          * Now follow the HOT-chain and collect other tuples in the chain.
                               1169                 :          *
                               1170                 :          * Note: Even though this is a nested loop, the complexity of the
                               1171                 :          * function is O(N) because a tuple in the page should be visited not
                               1172                 :          * more than twice, once in the outer loop and once in HOT-chain
                               1173                 :          * chases.
                               1174                 :          */
                               1175                 :         for (;;)
                               1176                 :         {
                               1177                 :             /* Sanity check (pure paranoia) */
  564 pg                       1178 GIC        1664 :             if (offnum < FirstOffsetNumber)
  564 pg                       1179 LBC           0 :                 break;
  564 pg                       1180 EUB             : 
                               1181                 :             /*
                               1182                 :              * An offset past the end of page's line pointer array is possible
                               1183                 :              * when the array was truncated
                               1184                 :              */
  564 pg                       1185 GIC        1664 :             if (offnum > maxoff)
  732 pg                       1186 LBC           0 :                 break;
  732 pg                       1187 EUB             : 
 5680 tgl                      1188 GIC        1664 :             lp = PageGetItemId(page, nextoffnum);
 5680 tgl                      1189 ECB             : 
                               1190                 :             /* Check for broken chains */
 5680 tgl                      1191 GIC        1664 :             if (!ItemIdIsNormal(lp))
 5680 tgl                      1192 LBC           0 :                 break;
 5680 tgl                      1193 EUB             : 
 5680 tgl                      1194 GIC        1664 :             htup = (HeapTupleHeader) PageGetItem(page, lp);
 5680 tgl                      1195 ECB             : 
 5680 tgl                      1196 GIC        1890 :             if (TransactionIdIsValid(priorXmax) &&
 1984 alvherre                 1197 CBC         226 :                 !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
 5680 tgl                      1198 LBC           0 :                 break;
 5680 tgl                      1199 EUB             : 
                               1200                 :             /* Remember the root line pointer for this item */
 5680 tgl                      1201 GIC        1664 :             root_offsets[nextoffnum - 1] = offnum;
 5680 tgl                      1202 ECB             : 
                               1203                 :             /* Advance to next chain member, if any */
 5680 tgl                      1204 GIC        1664 :             if (!HeapTupleHeaderIsHotUpdated(htup))
 5680 tgl                      1205 ECB             :                 break;
                               1206                 : 
                               1207                 :             /* HOT implies it can't have moved to different partition */
 1828 andres                   1208 GIC          46 :             Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
 1828 andres                   1209 ECB             : 
 5680 tgl                      1210 GIC          46 :             nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
 3728 alvherre                 1211 CBC          46 :             priorXmax = HeapTupleHeaderGetUpdateXid(htup);
 5680 tgl                      1212 ECB             :         }
                               1213                 :     }
 5680 tgl                      1214 GIC      204041 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a