LCOV - differential code coverage report
Current view: top level - src/backend/access/heap - heapam_handler.c (source / functions) Coverage Total Hit LBC UIC UBC GBC GIC GNC CBC EUB ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 93.0 % 800 744 13 40 3 15 414 27 288 38 432 11
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 34 34 19 3 12 21
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * heapam_handler.c
       4                 :  *    heap table access method code
       5                 :  *
       6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       7                 :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :  *
       9                 :  *
      10                 :  * IDENTIFICATION
      11                 :  *    src/backend/access/heap/heapam_handler.c
      12                 :  *
      13                 :  *
      14                 :  * NOTES
      15                 :  *    This files wires up the lower level heapam.c et al routines with the
      16                 :  *    tableam abstraction.
      17                 :  *
      18                 :  *-------------------------------------------------------------------------
      19                 :  */
      20                 : #include "postgres.h"
      21                 : 
      22                 : #include "access/genam.h"
      23                 : #include "access/heapam.h"
      24                 : #include "access/heaptoast.h"
      25                 : #include "access/multixact.h"
      26                 : #include "access/rewriteheap.h"
      27                 : #include "access/syncscan.h"
      28                 : #include "access/tableam.h"
      29                 : #include "access/tsmapi.h"
      30                 : #include "access/xact.h"
      31                 : #include "catalog/catalog.h"
      32                 : #include "catalog/index.h"
      33                 : #include "catalog/storage.h"
      34                 : #include "catalog/storage_xlog.h"
      35                 : #include "commands/progress.h"
      36                 : #include "executor/executor.h"
      37                 : #include "miscadmin.h"
      38                 : #include "pgstat.h"
      39                 : #include "storage/bufmgr.h"
      40                 : #include "storage/bufpage.h"
      41                 : #include "storage/lmgr.h"
      42                 : #include "storage/predicate.h"
      43                 : #include "storage/procarray.h"
      44                 : #include "storage/smgr.h"
      45                 : #include "utils/builtins.h"
      46                 : #include "utils/rel.h"
      47                 : 
      48                 : static void reform_and_rewrite_tuple(HeapTuple tuple,
      49                 :                                      Relation OldHeap, Relation NewHeap,
      50                 :                                      Datum *values, bool *isnull, RewriteState rwstate);
      51                 : 
      52                 : static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
      53                 :                                    HeapTuple tuple,
      54                 :                                    OffsetNumber tupoffset);
      55                 : 
      56                 : static BlockNumber heapam_scan_get_blocks_done(HeapScanDesc hscan);
      57                 : 
      58                 : static const TableAmRoutine heapam_methods;
      59                 : 
      60                 : 
      61                 : /* ------------------------------------------------------------------------
      62                 :  * Slot related callbacks for heap AM
      63                 :  * ------------------------------------------------------------------------
      64                 :  */
      65                 : 
      66                 : static const TupleTableSlotOps *
      67 CBC    15838912 : heapam_slot_callbacks(Relation relation)
      68                 : {
      69        15838912 :     return &TTSOpsBufferHeapTuple;
      70                 : }
      71                 : 
      72                 : 
      73                 : /* ------------------------------------------------------------------------
      74                 :  * Index Scan Callbacks for heap AM
      75                 :  * ------------------------------------------------------------------------
      76                 :  */
      77                 : 
      78                 : static IndexFetchTableData *
      79        14712661 : heapam_index_fetch_begin(Relation rel)
      80                 : {
      81        14712661 :     IndexFetchHeapData *hscan = palloc0(sizeof(IndexFetchHeapData));
      82                 : 
      83        14712661 :     hscan->xs_base.rel = rel;
      84        14712661 :     hscan->xs_cbuf = InvalidBuffer;
      85                 : 
      86        14712661 :     return &hscan->xs_base;
      87                 : }
      88                 : 
      89                 : static void
      90        29040280 : heapam_index_fetch_reset(IndexFetchTableData *scan)
      91                 : {
      92        29040280 :     IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
      93                 : 
      94        29040280 :     if (BufferIsValid(hscan->xs_cbuf))
      95                 :     {
      96        10792610 :         ReleaseBuffer(hscan->xs_cbuf);
      97        10792610 :         hscan->xs_cbuf = InvalidBuffer;
      98                 :     }
      99        29040280 : }
     100                 : 
     101                 : static void
     102        14711980 : heapam_index_fetch_end(IndexFetchTableData *scan)
     103                 : {
     104        14711980 :     IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
     105                 : 
     106        14711980 :     heapam_index_fetch_reset(scan);
     107                 : 
     108        14711980 :     pfree(hscan);
     109        14711980 : }
     110                 : 
     111                 : static bool
     112        21147270 : heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
     113                 :                          ItemPointer tid,
     114                 :                          Snapshot snapshot,
     115                 :                          TupleTableSlot *slot,
     116                 :                          bool *call_again, bool *all_dead)
     117                 : {
     118        21147270 :     IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
     119        21147270 :     BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
     120                 :     bool        got_heap_tuple;
     121                 : 
     122        21147270 :     Assert(TTS_IS_BUFFERTUPLE(slot));
     123                 : 
     124                 :     /* We can skip the buffer-switching logic if we're in mid-HOT chain. */
     125        21147270 :     if (!*call_again)
     126                 :     {
     127                 :         /* Switch to correct buffer if we don't have it already */
     128        20973478 :         Buffer      prev_buf = hscan->xs_cbuf;
     129                 : 
     130        20973478 :         hscan->xs_cbuf = ReleaseAndReadBuffer(hscan->xs_cbuf,
     131                 :                                               hscan->xs_base.rel,
     132                 :                                               ItemPointerGetBlockNumber(tid));
     133                 : 
     134                 :         /*
     135                 :          * Prune page, but only if we weren't already on this page
     136                 :          */
     137        20973475 :         if (prev_buf != hscan->xs_cbuf)
     138        13036061 :             heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf);
     139                 :     }
     140                 : 
     141                 :     /* Obtain share-lock on the buffer so we can examine visibility */
     142        21147267 :     LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_SHARE);
     143        21147267 :     got_heap_tuple = heap_hot_search_buffer(tid,
     144                 :                                             hscan->xs_base.rel,
     145                 :                                             hscan->xs_cbuf,
     146                 :                                             snapshot,
     147                 :                                             &bslot->base.tupdata,
     148                 :                                             all_dead,
     149        21147267 :                                             !*call_again);
     150        21147265 :     bslot->base.tupdata.t_self = *tid;
     151        21147265 :     LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_UNLOCK);
     152                 : 
     153        21147265 :     if (got_heap_tuple)
     154                 :     {
     155                 :         /*
     156                 :          * Only in a non-MVCC snapshot can more than one member of the HOT
     157                 :          * chain be visible.
     158                 :          */
     159        14679054 :         *call_again = !IsMVCCSnapshot(snapshot);
     160                 : 
     161        14679054 :         slot->tts_tableOid = RelationGetRelid(scan->rel);
     162        14679054 :         ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf);
     163                 :     }
     164                 :     else
     165                 :     {
     166                 :         /* We've reached the end of the HOT chain. */
     167         6468211 :         *call_again = false;
     168                 :     }
     169                 : 
     170        21147265 :     return got_heap_tuple;
     171                 : }
     172                 : 
     173                 : 
     174                 : /* ------------------------------------------------------------------------
     175                 :  * Callbacks for non-modifying operations on individual tuples for heap AM
     176                 :  * ------------------------------------------------------------------------
     177                 :  */
     178                 : 
     179                 : static bool
     180          195976 : heapam_fetch_row_version(Relation relation,
     181                 :                          ItemPointer tid,
     182                 :                          Snapshot snapshot,
     183                 :                          TupleTableSlot *slot)
     184                 : {
     185          195976 :     BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
     186                 :     Buffer      buffer;
     187                 : 
     188          195976 :     Assert(TTS_IS_BUFFERTUPLE(slot));
     189                 : 
     190          195976 :     bslot->base.tupdata.t_self = *tid;
     191          195976 :     if (heap_fetch(relation, snapshot, &bslot->base.tupdata, &buffer, false))
     192                 :     {
     193                 :         /* store in slot, transferring existing pin */
     194          195961 :         ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, slot, buffer);
     195          195961 :         slot->tts_tableOid = RelationGetRelid(relation);
     196                 : 
     197          195961 :         return true;
     198                 :     }
     199                 : 
     200              15 :     return false;
     201                 : }
     202                 : 
     203                 : static bool
     204             292 : heapam_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
     205                 : {
     206             292 :     HeapScanDesc hscan = (HeapScanDesc) scan;
     207                 : 
     208             584 :     return ItemPointerIsValid(tid) &&
     209             292 :         ItemPointerGetBlockNumber(tid) < hscan->rs_nblocks;
     210                 : }
     211                 : 
     212                 : static bool
     213           91614 : heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
     214                 :                                 Snapshot snapshot)
     215                 : {
     216           91614 :     BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
     217                 :     bool        res;
     218                 : 
     219           91614 :     Assert(TTS_IS_BUFFERTUPLE(slot));
     220           91614 :     Assert(BufferIsValid(bslot->buffer));
     221                 : 
     222                 :     /*
     223                 :      * We need buffer pin and lock to call HeapTupleSatisfiesVisibility.
     224                 :      * Caller should be holding pin, but not lock.
     225                 :      */
     226           91614 :     LockBuffer(bslot->buffer, BUFFER_LOCK_SHARE);
     227           91614 :     res = HeapTupleSatisfiesVisibility(bslot->base.tuple, snapshot,
     228                 :                                        bslot->buffer);
     229           91614 :     LockBuffer(bslot->buffer, BUFFER_LOCK_UNLOCK);
     230                 : 
     231           91614 :     return res;
     232                 : }
     233                 : 
     234                 : 
     235                 : /* ----------------------------------------------------------------------------
     236                 :  *  Functions for manipulations of physical tuples for heap AM.
     237                 :  * ----------------------------------------------------------------------------
     238                 :  */
     239                 : 
     240                 : static void
     241         7135630 : heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid,
     242                 :                     int options, BulkInsertState bistate)
     243                 : {
     244         7135630 :     bool        shouldFree = true;
     245         7135630 :     HeapTuple   tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
     246                 : 
     247                 :     /* Update the tuple with table oid */
     248         7135630 :     slot->tts_tableOid = RelationGetRelid(relation);
     249         7135630 :     tuple->t_tableOid = slot->tts_tableOid;
     250                 : 
     251                 :     /* Perform the insertion, and copy the resulting ItemPointer */
     252         7135630 :     heap_insert(relation, tuple, cid, options, bistate);
     253         7135618 :     ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
     254                 : 
     255         7135618 :     if (shouldFree)
     256         1357341 :         pfree(tuple);
     257         7135618 : }
     258                 : 
     259                 : static void
     260            2013 : heapam_tuple_insert_speculative(Relation relation, TupleTableSlot *slot,
     261                 :                                 CommandId cid, int options,
     262                 :                                 BulkInsertState bistate, uint32 specToken)
     263                 : {
     264            2013 :     bool        shouldFree = true;
     265            2013 :     HeapTuple   tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
     266                 : 
     267                 :     /* Update the tuple with table oid */
     268            2013 :     slot->tts_tableOid = RelationGetRelid(relation);
     269            2013 :     tuple->t_tableOid = slot->tts_tableOid;
     270                 : 
     271            2013 :     HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
     272            2013 :     options |= HEAP_INSERT_SPECULATIVE;
     273                 : 
     274                 :     /* Perform the insertion, and copy the resulting ItemPointer */
     275            2013 :     heap_insert(relation, tuple, cid, options, bistate);
     276            2013 :     ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
     277                 : 
     278            2013 :     if (shouldFree)
     279              30 :         pfree(tuple);
     280            2013 : }
     281                 : 
     282                 : static void
     283            2010 : heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot,
     284                 :                                   uint32 specToken, bool succeeded)
     285                 : {
     286            2010 :     bool        shouldFree = true;
     287            2010 :     HeapTuple   tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
     288                 : 
     289                 :     /* adjust the tuple's state accordingly */
     290            2010 :     if (succeeded)
     291            2005 :         heap_finish_speculative(relation, &slot->tts_tid);
     292                 :     else
     293               5 :         heap_abort_speculative(relation, &slot->tts_tid);
     294                 : 
     295            2010 :     if (shouldFree)
     296              30 :         pfree(tuple);
     297            2010 : }
     298                 : 
     299                 : static TM_Result
     300          883906 : heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
     301                 :                     Snapshot snapshot, Snapshot crosscheck, bool wait,
     302                 :                     TM_FailureData *tmfd, bool changingPart)
     303                 : {
     304                 :     /*
     305                 :      * Currently Deleting of index tuples are handled at vacuum, in case if
     306                 :      * the storage itself is cleaning the dead tuples by itself, it is the
     307                 :      * time to call the index tuple deletion also.
     308                 :      */
     309          883906 :     return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
     310                 : }
     311                 : 
     312                 : 
     313                 : static TM_Result
     314          217778 : heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
     315                 :                     CommandId cid, Snapshot snapshot, Snapshot crosscheck,
     316                 :                     bool wait, TM_FailureData *tmfd,
     317                 :                     LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
     318                 : {
     319          217778 :     bool        shouldFree = true;
     320          217778 :     HeapTuple   tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
     321                 :     TM_Result   result;
     322                 : 
     323                 :     /* Update the tuple with table oid */
     324          217778 :     slot->tts_tableOid = RelationGetRelid(relation);
     325          217778 :     tuple->t_tableOid = slot->tts_tableOid;
     326                 : 
     327          217778 :     result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
     328                 :                          tmfd, lockmode, update_indexes);
     329          217766 :     ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
     330                 : 
     331                 :     /*
     332                 :      * Decide whether new index entries are needed for the tuple
     333                 :      *
     334                 :      * Note: heap_update returns the tid (location) of the new tuple in the
     335                 :      * t_self field.
     336                 :      *
     337                 :      * If the update is not HOT, we must update all indexes. If the update
     338                 :      * is HOT, it could be that we updated summarized columns, so we either
     339                 :      * update only summarized indexes, or none at all.
     340                 :      */
     341 GNC      217766 :     if (result != TM_Ok)
     342                 :     {
     343             114 :         Assert(*update_indexes == TU_None);
     344             114 :         *update_indexes = TU_None;
     345                 :     }
     346          217652 :     else if (!HeapTupleIsHeapOnly(tuple))
     347          149288 :         Assert(*update_indexes == TU_All);
     348                 :     else
     349           68364 :         Assert((*update_indexes == TU_Summarizing) ||
     350                 :                (*update_indexes == TU_None));
     351                 : 
     352 CBC      217766 :     if (shouldFree)
     353 GIC       31926 :         pfree(tuple);
     354 ECB             : 
     355 CBC      217766 :     return result;
     356                 : }
     357 ECB             : 
     358                 : static TM_Result
     359 GIC       82371 : heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
     360 ECB             :                   TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
     361                 :                   LockWaitPolicy wait_policy, uint8 flags,
     362                 :                   TM_FailureData *tmfd)
     363                 : {
     364 CBC       82371 :     BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
     365                 :     TM_Result   result;
     366 ECB             :     Buffer      buffer;
     367 GIC       82371 :     HeapTuple   tuple = &bslot->base.tupdata;
     368                 :     bool        follow_updates;
     369                 : 
     370 CBC       82371 :     follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0;
     371 GIC       82371 :     tmfd->traversed = false;
     372                 : 
     373           82371 :     Assert(TTS_IS_BUFFERTUPLE(slot));
     374                 : 
     375 CBC       82371 : tuple_lock_retry:
     376 GIC       82482 :     tuple->t_self = *tid;
     377           82482 :     result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
     378 ECB             :                              follow_updates, &buffer, tmfd);
     379                 : 
     380 GIC       82473 :     if (result == TM_Updated &&
     381 CBC         143 :         (flags & TUPLE_LOCK_FLAG_FIND_LAST_VERSION))
     382 ECB             :     {
     383                 :         /* Should not encounter speculative tuple on recheck */
     384 CBC         130 :         Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data));
     385                 : 
     386             130 :         ReleaseBuffer(buffer);
     387 ECB             : 
     388 CBC         130 :         if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
     389                 :         {
     390                 :             SnapshotData SnapshotDirty;
     391 ECB             :             TransactionId priorXmax;
     392                 : 
     393                 :             /* it was updated, so look at the updated version */
     394 GIC         130 :             *tid = tmfd->ctid;
     395 ECB             :             /* updated row should have xmin matching this xmax */
     396 GIC         130 :             priorXmax = tmfd->xmax;
     397 ECB             : 
     398                 :             /* signal that a tuple later in the chain is getting locked */
     399 CBC         130 :             tmfd->traversed = true;
     400                 : 
     401                 :             /*
     402                 :              * fetch target tuple
     403                 :              *
     404                 :              * Loop here to deal with updated or busy tuples
     405 ECB             :              */
     406 GIC         130 :             InitDirtySnapshot(SnapshotDirty);
     407 ECB             :             for (;;)
     408                 :             {
     409 GIC         154 :                 if (ItemPointerIndicatesMovedPartitions(tid))
     410 CBC           9 :                     ereport(ERROR,
     411                 :                             (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
     412                 :                              errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
     413                 : 
     414 GIC         145 :                 tuple->t_self = *tid;
     415             145 :                 if (heap_fetch(relation, &SnapshotDirty, tuple, &buffer, true))
     416                 :                 {
     417 ECB             :                     /*
     418                 :                      * If xmin isn't what we're expecting, the slot must have
     419                 :                      * been recycled and reused for an unrelated tuple.  This
     420                 :                      * implies that the latest version of the row was deleted,
     421                 :                      * so we need do nothing.  (Should be safe to examine xmin
     422                 :                      * without getting buffer's content lock.  We assume
     423                 :                      * reading a TransactionId to be atomic, and Xmin never
     424                 :                      * changes in an existing tuple, except to invalid or
     425                 :                      * frozen, and neither of those can match priorXmax.)
     426                 :                      */
     427 GIC         118 :                     if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
     428                 :                                              priorXmax))
     429                 :                     {
     430 UIC           0 :                         ReleaseBuffer(buffer);
     431 GIC           9 :                         return TM_Deleted;
     432                 :                     }
     433                 : 
     434                 :                     /* otherwise xmin should not be dirty... */
     435             118 :                     if (TransactionIdIsValid(SnapshotDirty.xmin))
     436 UIC           0 :                         ereport(ERROR,
     437                 :                                 (errcode(ERRCODE_DATA_CORRUPTED),
     438 ECB             :                                  errmsg_internal("t_xmin %u is uncommitted in tuple (%u,%u) to be updated in table \"%s\"",
     439                 :                                                  SnapshotDirty.xmin,
     440                 :                                                  ItemPointerGetBlockNumber(&tuple->t_self),
     441 EUB             :                                                  ItemPointerGetOffsetNumber(&tuple->t_self),
     442 ECB             :                                                  RelationGetRelationName(relation))));
     443                 : 
     444                 :                     /*
     445                 :                      * If tuple is being updated by other transaction then we
     446                 :                      * have to wait for its commit/abort, or die trying.
     447 EUB             :                      */
     448 GIC         118 :                     if (TransactionIdIsValid(SnapshotDirty.xmax))
     449                 :                     {
     450               2 :                         ReleaseBuffer(buffer);
     451               2 :                         switch (wait_policy)
     452                 :                         {
     453 UIC           0 :                             case LockWaitBlock:
     454               0 :                                 XactLockTableWait(SnapshotDirty.xmax,
     455                 :                                                   relation, &tuple->t_self,
     456                 :                                                   XLTW_FetchUpdated);
     457               0 :                                 break;
     458 GIC           1 :                             case LockWaitSkip:
     459 CBC           1 :                                 if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
     460                 :                                     /* skip instead of waiting */
     461               1 :                                     return TM_WouldBlock;
     462 LBC           0 :                                 break;
     463 GIC           1 :                             case LockWaitError:
     464 GBC           1 :                                 if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
     465               1 :                                     ereport(ERROR,
     466                 :                                             (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
     467                 :                                              errmsg("could not obtain lock on row in relation \"%s\"",
     468 EUB             :                                                     RelationGetRelationName(relation))));
     469 LBC           0 :                                 break;
     470 ECB             :                         }
     471 UIC           0 :                         continue;   /* loop back to repeat heap_fetch */
     472 ECB             :                     }
     473 EUB             : 
     474 ECB             :                     /*
     475                 :                      * If tuple was inserted by our own transaction, we have
     476                 :                      * to check cmin against cid: cmin >= current CID means
     477                 :                      * our command cannot see the tuple, so we should ignore
     478                 :                      * it. Otherwise heap_lock_tuple() will throw an error,
     479                 :                      * and so would any later attempt to update or delete the
     480 EUB             :                      * tuple.  (We need not check cmax because
     481                 :                      * HeapTupleSatisfiesDirty will consider a tuple deleted
     482                 :                      * by our transaction dead, regardless of cmax.)  We just
     483                 :                      * checked that priorXmax == xmin, so we can test that
     484                 :                      * variable instead of doing HeapTupleHeaderGetXmin again.
     485                 :                      */
     486 GIC         121 :                     if (TransactionIdIsCurrentTransactionId(priorXmax) &&
     487               5 :                         HeapTupleHeaderGetCmin(tuple->t_data) >= cid)
     488                 :                     {
     489               5 :                         tmfd->xmax = priorXmax;
     490                 : 
     491                 :                         /*
     492                 :                          * Cmin is the problematic value, so store that. See
     493                 :                          * above.
     494                 :                          */
     495               5 :                         tmfd->cmax = HeapTupleHeaderGetCmin(tuple->t_data);
     496               5 :                         ReleaseBuffer(buffer);
     497 CBC           5 :                         return TM_SelfModified;
     498 ECB             :                     }
     499                 : 
     500                 :                     /*
     501                 :                      * This is a live tuple, so try to lock it again.
     502                 :                      */
     503 GIC         111 :                     ReleaseBuffer(buffer);
     504             111 :                     goto tuple_lock_retry;
     505                 :                 }
     506 ECB             : 
     507                 :                 /*
     508                 :                  * If the referenced slot was actually empty, the latest
     509                 :                  * version of the row must have been deleted, so we need do
     510                 :                  * nothing.
     511                 :                  */
     512 GIC          27 :                 if (tuple->t_data == NULL)
     513                 :                 {
     514 LBC           0 :                     Assert(!BufferIsValid(buffer));
     515               0 :                     return TM_Deleted;
     516                 :                 }
     517                 : 
     518                 :                 /*
     519                 :                  * As above, if xmin isn't what we're expecting, do nothing.
     520                 :                  */
     521 GIC          27 :                 if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
     522                 :                                          priorXmax))
     523 ECB             :                 {
     524 UIC           0 :                     ReleaseBuffer(buffer);
     525 UBC           0 :                     return TM_Deleted;
     526 EUB             :                 }
     527                 : 
     528                 :                 /*
     529                 :                  * If we get here, the tuple was found but failed
     530                 :                  * SnapshotDirty. Assuming the xmin is either a committed xact
     531                 :                  * or our own xact (as it certainly should be if we're trying
     532 ECB             :                  * to modify the tuple), this must mean that the row was
     533                 :                  * updated or deleted by either a committed xact or our own
     534                 :                  * xact.  If it was deleted, we can ignore it; if it was
     535 EUB             :                  * updated then chain up to the next version and repeat the
     536                 :                  * whole process.
     537                 :                  *
     538                 :                  * As above, it should be safe to examine xmax and t_ctid
     539                 :                  * without the buffer content lock, because they can't be
     540                 :                  * changing.  We'd better hold a buffer pin though.
     541                 :                  */
     542 GIC          27 :                 if (ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
     543                 :                 {
     544                 :                     /* deleted, so forget about it */
     545               3 :                     ReleaseBuffer(buffer);
     546               3 :                     return TM_Deleted;
     547                 :                 }
     548                 : 
     549                 :                 /* updated, so look at the updated row */
     550              24 :                 *tid = tuple->t_data->t_ctid;
     551                 :                 /* updated row should have xmin matching this xmax */
     552              24 :                 priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
     553 CBC          24 :                 ReleaseBuffer(buffer);
     554                 :                 /* loop back to fetch next in chain */
     555                 :             }
     556 ECB             :         }
     557                 :         else
     558                 :         {
     559                 :             /* tuple was deleted, so give up */
     560 UIC           0 :             return TM_Deleted;
     561 ECB             :         }
     562                 :     }
     563                 : 
     564 CBC       82343 :     slot->tts_tableOid = RelationGetRelid(relation);
     565 GIC       82343 :     tuple->t_tableOid = slot->tts_tableOid;
     566                 : 
     567                 :     /* store in slot, transferring existing pin */
     568           82343 :     ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
     569                 : 
     570           82343 :     return result;
     571 EUB             : }
     572                 : 
     573                 : 
     574                 : /* ------------------------------------------------------------------------
     575 ECB             :  * DDL related callbacks for heap AM.
     576                 :  * ------------------------------------------------------------------------
     577                 :  */
     578                 : 
     579                 : static void
     580 GNC       55537 : heapam_relation_set_new_filelocator(Relation rel,
     581                 :                                     const RelFileLocator *newrlocator,
     582                 :                                     char persistence,
     583                 :                                     TransactionId *freezeXid,
     584                 :                                     MultiXactId *minmulti)
     585                 : {
     586                 :     SMgrRelation srel;
     587                 : 
     588                 :     /*
     589                 :      * Initialize to the minimum XID that could put tuples in the table. We
     590                 :      * know that no xacts older than RecentXmin are still running, so that
     591 ECB             :      * will do.
     592                 :      */
     593 GIC       55537 :     *freezeXid = RecentXmin;
     594                 : 
     595                 :     /*
     596                 :      * Similarly, initialize the minimum Multixact to the first value that
     597                 :      * could possibly be stored in tuples in the table.  Running transactions
     598                 :      * could reuse values from their local cache, so we are careful to
     599                 :      * consider all currently running multis.
     600                 :      *
     601                 :      * XXX this could be refined further, but is it worth the hassle?
     602                 :      */
     603           55537 :     *minmulti = GetOldestMultiXactId();
     604 ECB             : 
     605 GNC       55537 :     srel = RelationCreateStorage(*newrlocator, persistence, true);
     606                 : 
     607                 :     /*
     608                 :      * If required, set up an init fork for an unlogged table so that it can
     609                 :      * be correctly reinitialized on restart.  An immediate sync is required
     610                 :      * even if the page has been logged, because the write did not go through
     611                 :      * shared_buffers and therefore a concurrent checkpoint may have moved the
     612                 :      * redo pointer past our xlog record.  Recovery may as well remove it
     613                 :      * while replaying, for example, XLOG_DBASE_CREATE* or XLOG_TBLSPC_CREATE
     614 ECB             :      * record. Therefore, logging is necessary even if wal_level=minimal.
     615                 :      */
     616 CBC       55537 :     if (persistence == RELPERSISTENCE_UNLOGGED)
     617                 :     {
     618 GIC         103 :         Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
     619                 :                rel->rd_rel->relkind == RELKIND_MATVIEW ||
     620                 :                rel->rd_rel->relkind == RELKIND_TOASTVALUE);
     621             103 :         smgrcreate(srel, INIT_FORKNUM, false);
     622 GNC         103 :         log_smgrcreate(newrlocator, INIT_FORKNUM);
     623 GIC         103 :         smgrimmedsync(srel, INIT_FORKNUM);
     624                 :     }
     625                 : 
     626           55537 :     smgrclose(srel);
     627 CBC       55537 : }
     628                 : 
     629 ECB             : static void
     630 GIC         262 : heapam_relation_nontransactional_truncate(Relation rel)
     631                 : {
     632 CBC         262 :     RelationTruncate(rel, 0);
     633             262 : }
     634 ECB             : 
     635                 : static void
     636 GNC          49 : heapam_relation_copy_data(Relation rel, const RelFileLocator *newrlocator)
     637 ECB             : {
     638                 :     SMgrRelation dstrel;
     639                 : 
     640 GNC          49 :     dstrel = smgropen(*newrlocator, rel->rd_backend);
     641 ECB             : 
     642                 :     /*
     643                 :      * Since we copy the file directly without looking at the shared buffers,
     644                 :      * we'd better first flush out any pages of the source relation that are
     645                 :      * in shared buffers.  We assume no new changes will be made while we are
     646                 :      * holding exclusive lock on the rel.
     647                 :      */
     648 GIC          49 :     FlushRelationBuffers(rel);
     649                 : 
     650                 :     /*
     651 ECB             :      * Create and copy all forks of the relation, and schedule unlinking of
     652                 :      * old physical files.
     653                 :      *
     654                 :      * NOTE: any conflict in relfilenumber value will be caught in
     655                 :      * RelationCreateStorage().
     656                 :      */
     657 GNC          49 :     RelationCreateStorage(*newrlocator, rel->rd_rel->relpersistence, true);
     658                 : 
     659 ECB             :     /* copy main fork */
     660 GIC          49 :     RelationCopyStorage(RelationGetSmgr(rel), dstrel, MAIN_FORKNUM,
     661              49 :                         rel->rd_rel->relpersistence);
     662                 : 
     663                 :     /* copy those extra forks that exist */
     664              49 :     for (ForkNumber forkNum = MAIN_FORKNUM + 1;
     665             196 :          forkNum <= MAX_FORKNUM; forkNum++)
     666                 :     {
     667             147 :         if (smgrexists(RelationGetSmgr(rel), forkNum))
     668 ECB             :         {
     669 GIC           6 :             smgrcreate(dstrel, forkNum, false);
     670                 : 
     671 ECB             :             /*
     672                 :              * WAL log creation if the relation is persistent, or this is the
     673                 :              * init fork of an unlogged relation.
     674                 :              */
     675 CBC           6 :             if (RelationIsPermanent(rel) ||
     676               3 :                 (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
     677                 :                  forkNum == INIT_FORKNUM))
     678 GNC           3 :                 log_smgrcreate(newrlocator, forkNum);
     679 GIC           6 :             RelationCopyStorage(RelationGetSmgr(rel), dstrel, forkNum,
     680 CBC           6 :                                 rel->rd_rel->relpersistence);
     681                 :         }
     682                 :     }
     683                 : 
     684                 : 
     685                 :     /* drop old relation, and close new one */
     686              49 :     RelationDropStorage(rel);
     687              49 :     smgrclose(dstrel);
     688 GIC          49 : }
     689 ECB             : 
     690                 : static void
     691 CBC         262 : heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
     692                 :                                  Relation OldIndex, bool use_sort,
     693                 :                                  TransactionId OldestXmin,
     694                 :                                  TransactionId *xid_cutoff,
     695                 :                                  MultiXactId *multi_cutoff,
     696                 :                                  double *num_tuples,
     697 ECB             :                                  double *tups_vacuumed,
     698                 :                                  double *tups_recently_dead)
     699                 : {
     700                 :     RewriteState rwstate;
     701                 :     IndexScanDesc indexScan;
     702                 :     TableScanDesc tableScan;
     703                 :     HeapScanDesc heapScan;
     704                 :     bool        is_system_catalog;
     705                 :     Tuplesortstate *tuplesort;
     706 GIC         262 :     TupleDesc   oldTupDesc = RelationGetDescr(OldHeap);
     707             262 :     TupleDesc   newTupDesc = RelationGetDescr(NewHeap);
     708                 :     TupleTableSlot *slot;
     709                 :     int         natts;
     710                 :     Datum      *values;
     711                 :     bool       *isnull;
     712                 :     BufferHeapTupleTableSlot *hslot;
     713             262 :     BlockNumber prev_cblock = InvalidBlockNumber;
     714                 : 
     715                 :     /* Remember if it's a system catalog */
     716             262 :     is_system_catalog = IsSystemRelation(OldHeap);
     717 ECB             : 
     718                 :     /*
     719                 :      * Valid smgr_targblock implies something already wrote to the relation.
     720                 :      * This may be harmless, but this function hasn't planned for it.
     721                 :      */
     722 GIC         262 :     Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber);
     723                 : 
     724 ECB             :     /* Preallocate values/isnull arrays */
     725 GIC         262 :     natts = newTupDesc->natts;
     726             262 :     values = (Datum *) palloc(natts * sizeof(Datum));
     727 CBC         262 :     isnull = (bool *) palloc(natts * sizeof(bool));
     728                 : 
     729                 :     /* Initialize the rewrite operation */
     730 GIC         262 :     rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, *xid_cutoff,
     731                 :                                  *multi_cutoff);
     732                 : 
     733 ECB             : 
     734                 :     /*
     735                 :      * Set up sorting if wanted. NewHeap is being passed to
     736                 :      * tuplesort_begin_cluster(), it could have been OldHeap too. It does not
     737                 :      * really matter, as the goal is to have a heap relation being passed to
     738                 :      * _bt_log_reuse_page() (which should not be called from this code path).
     739                 :      */
     740 GIC         262 :     if (use_sort)
     741 GNC          50 :         tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex, NewHeap,
     742 ECB             :                                             maintenance_work_mem,
     743                 :                                             NULL, TUPLESORT_NONE);
     744                 :     else
     745 GIC         212 :         tuplesort = NULL;
     746 ECB             : 
     747                 :     /*
     748                 :      * Prepare to scan the OldHeap.  To ensure we see recently-dead tuples
     749                 :      * that still need to be copied, we scan with SnapshotAny and use
     750                 :      * HeapTupleSatisfiesVacuum for the visibility test.
     751                 :      */
     752 GIC         262 :     if (OldIndex != NULL && !use_sort)
     753              48 :     {
     754              48 :         const int   ci_index[] = {
     755                 :             PROGRESS_CLUSTER_PHASE,
     756 ECB             :             PROGRESS_CLUSTER_INDEX_RELID
     757                 :         };
     758                 :         int64       ci_val[2];
     759                 : 
     760                 :         /* Set phase and OIDOldIndex to columns */
     761 CBC          48 :         ci_val[0] = PROGRESS_CLUSTER_PHASE_INDEX_SCAN_HEAP;
     762 GIC          48 :         ci_val[1] = RelationGetRelid(OldIndex);
     763              48 :         pgstat_progress_update_multi_param(2, ci_index, ci_val);
     764                 : 
     765              48 :         tableScan = NULL;
     766              48 :         heapScan = NULL;
     767              48 :         indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0);
     768 CBC          48 :         index_rescan(indexScan, NULL, 0, NULL, 0);
     769 ECB             :     }
     770                 :     else
     771                 :     {
     772                 :         /* In scan-and-sort mode and also VACUUM FULL, set phase */
     773 GIC         214 :         pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
     774                 :                                      PROGRESS_CLUSTER_PHASE_SEQ_SCAN_HEAP);
     775                 : 
     776             214 :         tableScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
     777 CBC         214 :         heapScan = (HeapScanDesc) tableScan;
     778             214 :         indexScan = NULL;
     779 ECB             : 
     780                 :         /* Set total heap blocks */
     781 CBC         214 :         pgstat_progress_update_param(PROGRESS_CLUSTER_TOTAL_HEAP_BLKS,
     782             214 :                                      heapScan->rs_nblocks);
     783 ECB             :     }
     784                 : 
     785 GIC         262 :     slot = table_slot_create(OldHeap, NULL);
     786             262 :     hslot = (BufferHeapTupleTableSlot *) slot;
     787                 : 
     788                 :     /*
     789 ECB             :      * Scan through the OldHeap, either in OldIndex order or sequentially;
     790                 :      * copy each tuple into the NewHeap, or transiently to the tuplesort
     791                 :      * module.  Note that we don't bother sorting dead tuples (they won't get
     792                 :      * to the new table anyway).
     793                 :      */
     794                 :     for (;;)
     795 GIC      396038 :     {
     796                 :         HeapTuple   tuple;
     797 ECB             :         Buffer      buf;
     798                 :         bool        isdead;
     799                 : 
     800 GIC      396300 :         CHECK_FOR_INTERRUPTS();
     801 ECB             : 
     802 CBC      396300 :         if (indexScan != NULL)
     803                 :         {
     804 GIC         102 :             if (!index_getnext_slot(indexScan, ForwardScanDirection, slot))
     805              48 :                 break;
     806                 : 
     807                 :             /* Since we used no scan keys, should never need to recheck */
     808              54 :             if (indexScan->xs_recheck)
     809 UIC           0 :                 elog(ERROR, "CLUSTER does not support lossy index conditions");
     810                 :         }
     811 ECB             :         else
     812                 :         {
     813 GIC      396198 :             if (!table_scan_getnextslot(tableScan, ForwardScanDirection, slot))
     814                 :             {
     815                 :                 /*
     816 ECB             :                  * If the last pages of the scan were empty, we would go to
     817                 :                  * the next phase while heap_blks_scanned != heap_blks_total.
     818                 :                  * Instead, to ensure that heap_blks_scanned is equivalent to
     819                 :                  * total_heap_blks after the table scan phase, this parameter
     820                 :                  * is manually updated to the correct value when the table
     821                 :                  * scan finishes.
     822                 :                  */
     823 GIC         214 :                 pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_BLKS_SCANNED,
     824 CBC         214 :                                              heapScan->rs_nblocks);
     825 GBC         214 :                 break;
     826                 :             }
     827                 : 
     828                 :             /*
     829 ECB             :              * In scan-and-sort mode and also VACUUM FULL, set heap blocks
     830                 :              * scanned
     831                 :              *
     832                 :              * Note that heapScan may start at an offset and wrap around, i.e.
     833                 :              * rs_startblock may be >0, and rs_cblock may end with a number
     834                 :              * below rs_startblock. To prevent showing this wraparound to the
     835                 :              * user, we offset rs_cblock by rs_startblock (modulo rs_nblocks).
     836                 :              */
     837 GIC      395984 :             if (prev_cblock != heapScan->rs_cblock)
     838                 :             {
     839 CBC        5481 :                 pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_BLKS_SCANNED,
     840            5481 :                                              (heapScan->rs_cblock +
     841            5481 :                                               heapScan->rs_nblocks -
     842 GIC        5481 :                                               heapScan->rs_startblock
     843            5481 :                                               ) % heapScan->rs_nblocks + 1);
     844            5481 :                 prev_cblock = heapScan->rs_cblock;
     845                 :             }
     846                 :         }
     847                 : 
     848          396038 :         tuple = ExecFetchSlotHeapTuple(slot, false, NULL);
     849          396038 :         buf = hslot->buffer;
     850                 : 
     851          396038 :         LockBuffer(buf, BUFFER_LOCK_SHARE);
     852                 : 
     853 CBC      396038 :         switch (HeapTupleSatisfiesVacuum(tuple, OldestXmin, buf))
     854                 :         {
     855           11411 :             case HEAPTUPLE_DEAD:
     856 ECB             :                 /* Definitely dead */
     857 CBC       11411 :                 isdead = true;
     858           11411 :                 break;
     859           32491 :             case HEAPTUPLE_RECENTLY_DEAD:
     860           32491 :                 *tups_recently_dead += 1;
     861                 :                 /* fall through */
     862 GIC      384520 :             case HEAPTUPLE_LIVE:
     863                 :                 /* Live or recently dead, must copy it */
     864 CBC      384520 :                 isdead = false;
     865          384520 :                 break;
     866 GIC          72 :             case HEAPTUPLE_INSERT_IN_PROGRESS:
     867 ECB             : 
     868                 :                 /*
     869                 :                  * Since we hold exclusive lock on the relation, normally the
     870                 :                  * only way to see this is if it was inserted earlier in our
     871                 :                  * own transaction.  However, it can happen in system
     872                 :                  * catalogs, since we tend to release write lock before commit
     873                 :                  * there.  Give a warning if neither case applies; but in any
     874                 :                  * case we had better copy it.
     875                 :                  */
     876 CBC          72 :                 if (!is_system_catalog &&
     877 GIC          10 :                     !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data)))
     878 LBC           0 :                     elog(WARNING, "concurrent insert in progress within table \"%s\"",
     879                 :                          RelationGetRelationName(OldHeap));
     880 ECB             :                 /* treat as live */
     881 CBC          72 :                 isdead = false;
     882              72 :                 break;
     883 GIC          35 :             case HEAPTUPLE_DELETE_IN_PROGRESS:
     884                 : 
     885                 :                 /*
     886                 :                  * Similar situation to INSERT_IN_PROGRESS case.
     887                 :                  */
     888              35 :                 if (!is_system_catalog &&
     889              15 :                     !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple->t_data)))
     890 UIC           0 :                     elog(WARNING, "concurrent delete in progress within table \"%s\"",
     891                 :                          RelationGetRelationName(OldHeap));
     892 ECB             :                 /* treat as recently dead */
     893 CBC          35 :                 *tups_recently_dead += 1;
     894 GBC          35 :                 isdead = false;
     895 GIC          35 :                 break;
     896 UIC           0 :             default:
     897 LBC           0 :                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
     898 ECB             :                 isdead = false; /* keep compiler quiet */
     899                 :                 break;
     900                 :         }
     901                 : 
     902 GIC      396038 :         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     903                 : 
     904 CBC      396038 :         if (isdead)
     905 ECB             :         {
     906 GBC       11411 :             *tups_vacuumed += 1;
     907                 :             /* heap rewrite module still needs to see it... */
     908 GIC       11411 :             if (rewrite_heap_dead_tuple(rwstate, tuple))
     909 ECB             :             {
     910                 :                 /* A previous recently-dead tuple is now known dead */
     911 LBC           0 :                 *tups_vacuumed += 1;
     912 UBC           0 :                 *tups_recently_dead -= 1;
     913 EUB             :             }
     914 GIC       11411 :             continue;
     915                 :         }
     916                 : 
     917          384627 :         *num_tuples += 1;
     918 CBC      384627 :         if (tuplesort != NULL)
     919                 :         {
     920          273643 :             tuplesort_putheaptuple(tuplesort, tuple);
     921                 : 
     922 ECB             :             /*
     923                 :              * In scan-and-sort mode, report increase in number of tuples
     924                 :              * scanned
     925                 :              */
     926 GIC      273643 :             pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED,
     927 GBC      273643 :                                          *num_tuples);
     928 EUB             :         }
     929                 :         else
     930 ECB             :         {
     931 GIC      110984 :             const int   ct_index[] = {
     932                 :                 PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED,
     933 ECB             :                 PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN
     934                 :             };
     935                 :             int64       ct_val[2];
     936                 : 
     937 GIC      110984 :             reform_and_rewrite_tuple(tuple, OldHeap, NewHeap,
     938                 :                                      values, isnull, rwstate);
     939                 : 
     940                 :             /*
     941                 :              * In indexscan mode and also VACUUM FULL, report increase in
     942 ECB             :              * number of tuples scanned and written
     943                 :              */
     944 GIC      110984 :             ct_val[0] = *num_tuples;
     945          110984 :             ct_val[1] = *num_tuples;
     946          110984 :             pgstat_progress_update_multi_param(2, ct_index, ct_val);
     947 ECB             :         }
     948                 :     }
     949                 : 
     950 GIC         262 :     if (indexScan != NULL)
     951              48 :         index_endscan(indexScan);
     952             262 :     if (tableScan != NULL)
     953 CBC         214 :         table_endscan(tableScan);
     954 GIC         262 :     if (slot)
     955             262 :         ExecDropSingleTupleTableSlot(slot);
     956                 : 
     957                 :     /*
     958                 :      * In scan-and-sort mode, complete the sort, then read out all live tuples
     959                 :      * from the tuplestore and write them to the new relation.
     960 ECB             :      */
     961 CBC         262 :     if (tuplesort != NULL)
     962 ECB             :     {
     963 GIC          50 :         double      n_tuples = 0;
     964                 : 
     965                 :         /* Report that we are now sorting tuples */
     966 CBC          50 :         pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
     967 ECB             :                                      PROGRESS_CLUSTER_PHASE_SORT_TUPLES);
     968                 : 
     969 CBC          50 :         tuplesort_performsort(tuplesort);
     970 ECB             : 
     971                 :         /* Report that we are now writing new heap */
     972 GIC          50 :         pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
     973                 :                                      PROGRESS_CLUSTER_PHASE_WRITE_NEW_HEAP);
     974                 : 
     975                 :         for (;;)
     976          273643 :         {
     977 ECB             :             HeapTuple   tuple;
     978                 : 
     979 CBC      273693 :             CHECK_FOR_INTERRUPTS();
     980                 : 
     981 GIC      273693 :             tuple = tuplesort_getheaptuple(tuplesort, true);
     982 CBC      273693 :             if (tuple == NULL)
     983 GIC          50 :                 break;
     984                 : 
     985 CBC      273643 :             n_tuples += 1;
     986 GIC      273643 :             reform_and_rewrite_tuple(tuple,
     987                 :                                      OldHeap, NewHeap,
     988 ECB             :                                      values, isnull,
     989                 :                                      rwstate);
     990                 :             /* Report n_tuples */
     991 GIC      273643 :             pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN,
     992 ECB             :                                          n_tuples);
     993                 :         }
     994                 : 
     995 CBC          50 :         tuplesort_end(tuplesort);
     996                 :     }
     997 ECB             : 
     998                 :     /* Write out any remaining tuples, and fsync if needed */
     999 CBC         262 :     end_heap_rewrite(rwstate);
    1000                 : 
    1001 ECB             :     /* Clean up */
    1002 CBC         262 :     pfree(values);
    1003 GIC         262 :     pfree(isnull);
    1004             262 : }
    1005                 : 
    1006                 : static bool
    1007 CBC      147358 : heapam_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno,
    1008                 :                                BufferAccessStrategy bstrategy)
    1009                 : {
    1010 GIC      147358 :     HeapScanDesc hscan = (HeapScanDesc) scan;
    1011 ECB             : 
    1012                 :     /*
    1013                 :      * We must maintain a pin on the target page's buffer to ensure that
    1014                 :      * concurrent activity - e.g. HOT pruning - doesn't delete tuples out from
    1015                 :      * under us.  Hence, pin the page until we are done looking at it.  We
    1016                 :      * also choose to hold sharelock on the buffer throughout --- we could
    1017                 :      * release and re-acquire sharelock for each tuple, but since we aren't
    1018                 :      * doing much work per tuple, the extra lock traffic is probably better
    1019                 :      * avoided.
    1020                 :      */
    1021 GIC      147358 :     hscan->rs_cblock = blockno;
    1022          147358 :     hscan->rs_cindex = FirstOffsetNumber;
    1023 CBC      147358 :     hscan->rs_cbuf = ReadBufferExtended(scan->rs_rd, MAIN_FORKNUM,
    1024                 :                                         blockno, RBM_NORMAL, bstrategy);
    1025 GIC      147358 :     LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
    1026 ECB             : 
    1027                 :     /* in heap all blocks can contain tuples, so always return true */
    1028 GIC      147358 :     return true;
    1029                 : }
    1030                 : 
    1031                 : static bool
    1032        10303246 : heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
    1033                 :                                double *liverows, double *deadrows,
    1034                 :                                TupleTableSlot *slot)
    1035                 : {
    1036        10303246 :     HeapScanDesc hscan = (HeapScanDesc) scan;
    1037 ECB             :     Page        targpage;
    1038                 :     OffsetNumber maxoffset;
    1039                 :     BufferHeapTupleTableSlot *hslot;
    1040                 : 
    1041 CBC    10303246 :     Assert(TTS_IS_BUFFERTUPLE(slot));
    1042                 : 
    1043 GIC    10303246 :     hslot = (BufferHeapTupleTableSlot *) slot;
    1044 CBC    10303246 :     targpage = BufferGetPage(hscan->rs_cbuf);
    1045 GIC    10303246 :     maxoffset = PageGetMaxOffsetNumber(targpage);
    1046                 : 
    1047                 :     /* Inner loop over all tuples on the selected page */
    1048 CBC    10681263 :     for (; hscan->rs_cindex <= maxoffset; hscan->rs_cindex++)
    1049                 :     {
    1050                 :         ItemId      itemid;
    1051 GIC    10533905 :         HeapTuple   targtuple = &hslot->base.tupdata;
    1052 CBC    10533905 :         bool        sample_it = false;
    1053                 : 
    1054 GIC    10533905 :         itemid = PageGetItemId(targpage, hscan->rs_cindex);
    1055                 : 
    1056                 :         /*
    1057 ECB             :          * We ignore unused and redirect line pointers.  DEAD line pointers
    1058                 :          * should be counted as dead, because we need vacuum to run to get rid
    1059                 :          * of them.  Note that this rule agrees with the way that
    1060                 :          * heap_page_prune() counts things.
    1061                 :          */
    1062 GIC    10533905 :         if (!ItemIdIsNormal(itemid))
    1063                 :         {
    1064 CBC      187102 :             if (ItemIdIsDead(itemid))
    1065 GIC       79966 :                 *deadrows += 1;
    1066          187102 :             continue;
    1067 ECB             :         }
    1068                 : 
    1069 GIC    10346803 :         ItemPointerSet(&targtuple->t_self, hscan->rs_cblock, hscan->rs_cindex);
    1070 ECB             : 
    1071 GIC    10346803 :         targtuple->t_tableOid = RelationGetRelid(scan->rs_rd);
    1072        10346803 :         targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
    1073        10346803 :         targtuple->t_len = ItemIdGetLength(itemid);
    1074                 : 
    1075        10346803 :         switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin,
    1076                 :                                          hscan->rs_cbuf))
    1077                 :         {
    1078 CBC    10016800 :             case HEAPTUPLE_LIVE:
    1079 GIC    10016800 :                 sample_it = true;
    1080 CBC    10016800 :                 *liverows += 1;
    1081        10016800 :                 break;
    1082 ECB             : 
    1083 GIC      189625 :             case HEAPTUPLE_DEAD:
    1084                 :             case HEAPTUPLE_RECENTLY_DEAD:
    1085 ECB             :                 /* Count dead and recently-dead rows */
    1086 GIC      189625 :                 *deadrows += 1;
    1087 CBC      189625 :                 break;
    1088 ECB             : 
    1089 CBC      139520 :             case HEAPTUPLE_INSERT_IN_PROGRESS:
    1090                 : 
    1091 ECB             :                 /*
    1092                 :                  * Insert-in-progress rows are not counted.  We assume that
    1093                 :                  * when the inserting transaction commits or aborts, it will
    1094                 :                  * send a stats message to increment the proper count.  This
    1095                 :                  * works right only if that transaction ends after we finish
    1096                 :                  * analyzing the table; if things happen in the other order,
    1097                 :                  * its stats update will be overwritten by ours.  However, the
    1098                 :                  * error will be large only if the other transaction runs long
    1099                 :                  * enough to insert many tuples, so assuming it will finish
    1100                 :                  * after us is the safer option.
    1101                 :                  *
    1102                 :                  * A special case is that the inserting transaction might be
    1103                 :                  * our own.  In this case we should count and sample the row,
    1104                 :                  * to accommodate users who load a table and analyze it in one
    1105                 :                  * transaction.  (pgstat_report_analyze has to adjust the
    1106                 :                  * numbers we report to the cumulative stats system to make
    1107                 :                  * this come out right.)
    1108                 :                  */
    1109 GIC      139520 :                 if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple->t_data)))
    1110                 :                 {
    1111          139088 :                     sample_it = true;
    1112          139088 :                     *liverows += 1;
    1113                 :                 }
    1114          139520 :                 break;
    1115                 : 
    1116             858 :             case HEAPTUPLE_DELETE_IN_PROGRESS:
    1117                 : 
    1118                 :                 /*
    1119                 :                  * We count and sample delete-in-progress rows the same as
    1120                 :                  * live ones, so that the stats counters come out right if the
    1121                 :                  * deleting transaction commits after us, per the same
    1122                 :                  * reasoning given above.
    1123                 :                  *
    1124                 :                  * If the delete was done by our own transaction, however, we
    1125 ECB             :                  * must count the row as dead to make pgstat_report_analyze's
    1126                 :                  * stats adjustments come out right.  (Note: this works out
    1127                 :                  * properly when the row was both inserted and deleted in our
    1128                 :                  * xact.)
    1129                 :                  *
    1130                 :                  * The net effect of these choices is that we act as though an
    1131                 :                  * IN_PROGRESS transaction hasn't happened yet, except if it
    1132                 :                  * is our own transaction, which we assume has happened.
    1133                 :                  *
    1134                 :                  * This approach ensures that we behave sanely if we see both
    1135                 :                  * the pre-image and post-image rows for a row being updated
    1136                 :                  * by a concurrent transaction: we will sample the pre-image
    1137                 :                  * but not the post-image.  We also get sane results if the
    1138                 :                  * concurrent transaction never commits.
    1139                 :                  */
    1140 GIC         858 :                 if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple->t_data)))
    1141             858 :                     *deadrows += 1;
    1142                 :                 else
    1143                 :                 {
    1144 UIC           0 :                     sample_it = true;
    1145               0 :                     *liverows += 1;
    1146                 :                 }
    1147 GIC         858 :                 break;
    1148                 : 
    1149 UIC           0 :             default:
    1150               0 :                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
    1151                 :                 break;
    1152                 :         }
    1153                 : 
    1154 GIC    10346803 :         if (sample_it)
    1155                 :         {
    1156 CBC    10155888 :             ExecStoreBufferHeapTuple(targtuple, slot, hscan->rs_cbuf);
    1157        10155888 :             hscan->rs_cindex++;
    1158                 : 
    1159                 :             /* note that we leave the buffer locked here! */
    1160 GBC    10155888 :             return true;
    1161 EUB             :         }
    1162                 :     }
    1163 ECB             : 
    1164                 :     /* Now release the lock and pin on the page */
    1165 GBC      147358 :     UnlockReleaseBuffer(hscan->rs_cbuf);
    1166          147358 :     hscan->rs_cbuf = InvalidBuffer;
    1167                 : 
    1168                 :     /* also prevent old slot contents from having pin on page */
    1169 GIC      147358 :     ExecClearTuple(slot);
    1170 ECB             : 
    1171 GIC      147358 :     return false;
    1172 ECB             : }
    1173                 : 
    1174                 : static double
    1175 GIC       66563 : heapam_index_build_range_scan(Relation heapRelation,
    1176 ECB             :                               Relation indexRelation,
    1177                 :                               IndexInfo *indexInfo,
    1178                 :                               bool allow_sync,
    1179                 :                               bool anyvisible,
    1180                 :                               bool progress,
    1181                 :                               BlockNumber start_blockno,
    1182                 :                               BlockNumber numblocks,
    1183                 :                               IndexBuildCallback callback,
    1184                 :                               void *callback_state,
    1185                 :                               TableScanDesc scan)
    1186                 : {
    1187                 :     HeapScanDesc hscan;
    1188                 :     bool        is_system_catalog;
    1189                 :     bool        checking_uniqueness;
    1190                 :     HeapTuple   heapTuple;
    1191                 :     Datum       values[INDEX_MAX_KEYS];
    1192                 :     bool        isnull[INDEX_MAX_KEYS];
    1193                 :     double      reltuples;
    1194                 :     ExprState  *predicate;
    1195                 :     TupleTableSlot *slot;
    1196                 :     EState     *estate;
    1197                 :     ExprContext *econtext;
    1198                 :     Snapshot    snapshot;
    1199 GIC       66563 :     bool        need_unregister_snapshot = false;
    1200                 :     TransactionId OldestXmin;
    1201           66563 :     BlockNumber previous_blkno = InvalidBlockNumber;
    1202           66563 :     BlockNumber root_blkno = InvalidBlockNumber;
    1203                 :     OffsetNumber root_offsets[MaxHeapTuplesPerPage];
    1204                 : 
    1205                 :     /*
    1206                 :      * sanity checks
    1207                 :      */
    1208           66563 :     Assert(OidIsValid(indexRelation->rd_rel->relam));
    1209                 : 
    1210                 :     /* Remember if it's a system catalog */
    1211           66563 :     is_system_catalog = IsSystemRelation(heapRelation);
    1212                 : 
    1213                 :     /* See whether we're verifying uniqueness/exclusion properties */
    1214           76455 :     checking_uniqueness = (indexInfo->ii_Unique ||
    1215 CBC        9892 :                            indexInfo->ii_ExclusionOps != NULL);
    1216                 : 
    1217 ECB             :     /*
    1218                 :      * "Any visible" mode is not compatible with uniqueness checks; make sure
    1219                 :      * only one of those is requested.
    1220                 :      */
    1221 GIC       66563 :     Assert(!(anyvisible && checking_uniqueness));
    1222                 : 
    1223                 :     /*
    1224 ECB             :      * Need an EState for evaluation of index expressions and partial-index
    1225                 :      * predicates.  Also a slot to hold the current tuple.
    1226                 :      */
    1227 CBC       66563 :     estate = CreateExecutorState();
    1228 GIC       66563 :     econtext = GetPerTupleExprContext(estate);
    1229           66563 :     slot = table_slot_create(heapRelation, NULL);
    1230 ECB             : 
    1231                 :     /* Arrange for econtext's scan tuple to be the tuple under test */
    1232 GIC       66563 :     econtext->ecxt_scantuple = slot;
    1233                 : 
    1234                 :     /* Set up execution state for predicate, if any. */
    1235           66563 :     predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
    1236                 : 
    1237 ECB             :     /*
    1238                 :      * Prepare for scan of the base relation.  In a normal index build, we use
    1239                 :      * SnapshotAny because we must retrieve all tuples and do our own time
    1240                 :      * qual checks (because we have to index RECENTLY_DEAD tuples). In a
    1241                 :      * concurrent build, or during bootstrap, we take a regular MVCC snapshot
    1242                 :      * and index whatever's live according to that.
    1243                 :      */
    1244 CBC       66563 :     OldestXmin = InvalidTransactionId;
    1245 ECB             : 
    1246                 :     /* okay to ignore lazy VACUUMs here */
    1247 GIC       66563 :     if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
    1248 CBC       17440 :         OldestXmin = GetOldestNonRemovableTransactionId(heapRelation);
    1249                 : 
    1250 GIC       66563 :     if (!scan)
    1251 ECB             :     {
    1252                 :         /*
    1253                 :          * Serial index build.
    1254                 :          *
    1255                 :          * Must begin our own heap scan in this case.  We may also need to
    1256                 :          * register a snapshot whose lifetime is under our direct control.
    1257                 :          */
    1258 GIC       66367 :         if (!TransactionIdIsValid(OldestXmin))
    1259                 :         {
    1260 CBC       49079 :             snapshot = RegisterSnapshot(GetTransactionSnapshot());
    1261 GIC       49079 :             need_unregister_snapshot = true;
    1262                 :         }
    1263 ECB             :         else
    1264 CBC       17288 :             snapshot = SnapshotAny;
    1265                 : 
    1266           66367 :         scan = table_beginscan_strat(heapRelation,  /* relation */
    1267                 :                                      snapshot,  /* snapshot */
    1268                 :                                      0, /* number of keys */
    1269                 :                                      NULL,  /* scan key */
    1270                 :                                      true,  /* buffer access strategy OK */
    1271                 :                                      allow_sync);   /* syncscan OK? */
    1272                 :     }
    1273                 :     else
    1274 ECB             :     {
    1275                 :         /*
    1276                 :          * Parallel index build.
    1277                 :          *
    1278                 :          * Parallel case never registers/unregisters own snapshot.  Snapshot
    1279                 :          * is taken from parallel heap scan, and is SnapshotAny or an MVCC
    1280                 :          * snapshot, based on same criteria as serial case.
    1281                 :          */
    1282 CBC         196 :         Assert(!IsBootstrapProcessingMode());
    1283 GIC         196 :         Assert(allow_sync);
    1284             196 :         snapshot = scan->rs_snapshot;
    1285                 :     }
    1286                 : 
    1287           66563 :     hscan = (HeapScanDesc) scan;
    1288                 : 
    1289                 :     /*
    1290                 :      * Must have called GetOldestNonRemovableTransactionId() if using
    1291                 :      * SnapshotAny.  Shouldn't have for an MVCC snapshot. (It's especially
    1292                 :      * worth checking this for parallel builds, since ambuild routines that
    1293                 :      * support parallel builds must work these details out for themselves.)
    1294                 :      */
    1295           66563 :     Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot));
    1296           66563 :     Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) :
    1297                 :            !TransactionIdIsValid(OldestXmin));
    1298 CBC       66563 :     Assert(snapshot == SnapshotAny || !anyvisible);
    1299 ECB             : 
    1300                 :     /* Publish number of blocks to scan */
    1301 GIC       66563 :     if (progress)
    1302                 :     {
    1303 ECB             :         BlockNumber nblocks;
    1304                 : 
    1305 GIC       64971 :         if (hscan->rs_base.rs_parallel != NULL)
    1306                 :         {
    1307                 :             ParallelBlockTableScanDesc pbscan;
    1308                 : 
    1309              71 :             pbscan = (ParallelBlockTableScanDesc) hscan->rs_base.rs_parallel;
    1310              71 :             nblocks = pbscan->phs_nblocks;
    1311 ECB             :         }
    1312                 :         else
    1313 GIC       64900 :             nblocks = hscan->rs_nblocks;
    1314 ECB             : 
    1315 GIC       64971 :         pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_TOTAL,
    1316                 :                                      nblocks);
    1317 ECB             :     }
    1318                 : 
    1319                 :     /* set our scan endpoints */
    1320 GIC       66563 :     if (!allow_sync)
    1321 CBC        1738 :         heap_setscanlimits(scan, start_blockno, numblocks);
    1322                 :     else
    1323                 :     {
    1324                 :         /* syncscan can only be requested on whole relation */
    1325           64825 :         Assert(start_blockno == 0);
    1326           64825 :         Assert(numblocks == InvalidBlockNumber);
    1327                 :     }
    1328                 : 
    1329           66563 :     reltuples = 0;
    1330                 : 
    1331 ECB             :     /*
    1332                 :      * Scan all tuples in the base relation.
    1333                 :      */
    1334 GIC    14096084 :     while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
    1335                 :     {
    1336 ECB             :         bool        tupleIsAlive;
    1337                 : 
    1338 GIC    14029524 :         CHECK_FOR_INTERRUPTS();
    1339                 : 
    1340                 :         /* Report scan progress, if asked to. */
    1341 CBC    14029524 :         if (progress)
    1342 ECB             :         {
    1343 GIC    12751582 :             BlockNumber blocks_done = heapam_scan_get_blocks_done(hscan);
    1344                 : 
    1345 CBC    12751582 :             if (blocks_done != previous_blkno)
    1346                 :             {
    1347 GIC      190470 :                 pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
    1348                 :                                              blocks_done);
    1349          190470 :                 previous_blkno = blocks_done;
    1350 ECB             :             }
    1351                 :         }
    1352                 : 
    1353                 :         /*
    1354                 :          * When dealing with a HOT-chain of updated tuples, we want to index
    1355                 :          * the values of the live tuple (if any), but index it under the TID
    1356                 :          * of the chain's root tuple.  This approach is necessary to preserve
    1357                 :          * the HOT-chain structure in the heap. So we need to be able to find
    1358                 :          * the root item offset for every tuple that's in a HOT-chain.  When
    1359                 :          * first reaching a new page of the relation, call
    1360                 :          * heap_get_root_tuples() to build a map of root item offsets on the
    1361                 :          * page.
    1362                 :          *
    1363                 :          * It might look unsafe to use this information across buffer
    1364                 :          * lock/unlock.  However, we hold ShareLock on the table so no
    1365                 :          * ordinary insert/update/delete should occur; and we hold pin on the
    1366                 :          * buffer continuously while visiting the page, so no pruning
    1367                 :          * operation can occur either.
    1368                 :          *
    1369                 :          * In cases with only ShareUpdateExclusiveLock on the table, it's
    1370                 :          * possible for some HOT tuples to appear that we didn't know about
    1371                 :          * when we first read the page.  To handle that case, we re-obtain the
    1372                 :          * list of root offsets when a HOT tuple points to a root item that we
    1373                 :          * don't know about.
    1374                 :          *
    1375                 :          * Also, although our opinions about tuple liveness could change while
    1376                 :          * we scan the page (due to concurrent transaction commits/aborts),
    1377                 :          * the chain root locations won't, so this info doesn't need to be
    1378                 :          * rebuilt after waiting for another transaction.
    1379                 :          *
    1380                 :          * Note the implied assumption that there is no more than one live
    1381                 :          * tuple per HOT-chain --- else we could create more than one index
    1382                 :          * entry pointing to the same root tuple.
    1383                 :          */
    1384 GIC    14029524 :         if (hscan->rs_cblock != root_blkno)
    1385                 :         {
    1386          203661 :             Page        page = BufferGetPage(hscan->rs_cbuf);
    1387                 : 
    1388          203661 :             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
    1389          203661 :             heap_get_root_tuples(page, root_offsets);
    1390          203661 :             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    1391                 : 
    1392          203661 :             root_blkno = hscan->rs_cblock;
    1393                 :         }
    1394                 : 
    1395        14029524 :         if (snapshot == SnapshotAny)
    1396                 :         {
    1397                 :             /* do our own time qual check */
    1398                 :             bool        indexIt;
    1399                 :             TransactionId xwait;
    1400 ECB             : 
    1401 GIC     7002533 :     recheck:
    1402 ECB             : 
    1403                 :             /*
    1404                 :              * We could possibly get away with not locking the buffer here,
    1405                 :              * since caller should hold ShareLock on the relation, but let's
    1406                 :              * be conservative about it.  (This remark is still correct even
    1407                 :              * with HOT-pruning: our pin on the buffer prevents pruning.)
    1408                 :              */
    1409 GIC     7002533 :             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
    1410                 : 
    1411 ECB             :             /*
    1412                 :              * The criteria for counting a tuple as live in this block need to
    1413                 :              * match what analyze.c's heapam_scan_analyze_next_tuple() does,
    1414                 :              * otherwise CREATE INDEX and ANALYZE may produce wildly different
    1415                 :              * reltuples values, e.g. when there are many recently-dead
    1416                 :              * tuples.
    1417                 :              */
    1418 GIC     7002533 :             switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
    1419                 :                                              hscan->rs_cbuf))
    1420                 :             {
    1421             334 :                 case HEAPTUPLE_DEAD:
    1422                 :                     /* Definitely dead, we can ignore it */
    1423             334 :                     indexIt = false;
    1424             334 :                     tupleIsAlive = false;
    1425 CBC         334 :                     break;
    1426 GIC     5116582 :                 case HEAPTUPLE_LIVE:
    1427                 :                     /* Normal case, index and unique-check it */
    1428         5116582 :                     indexIt = true;
    1429         5116582 :                     tupleIsAlive = true;
    1430                 :                     /* Count it as live, too */
    1431         5116582 :                     reltuples += 1;
    1432         5116582 :                     break;
    1433          116716 :                 case HEAPTUPLE_RECENTLY_DEAD:
    1434 ECB             : 
    1435                 :                     /*
    1436                 :                      * If tuple is recently deleted then we must index it
    1437                 :                      * anyway to preserve MVCC semantics.  (Pre-existing
    1438                 :                      * transactions could try to use the index after we finish
    1439                 :                      * building it, and may need to see such tuples.)
    1440                 :                      *
    1441                 :                      * However, if it was HOT-updated then we must only index
    1442                 :                      * the live tuple at the end of the HOT-chain.  Since this
    1443                 :                      * breaks semantics for pre-existing snapshots, mark the
    1444                 :                      * index as unusable for them.
    1445                 :                      *
    1446                 :                      * We don't count recently-dead tuples in reltuples, even
    1447                 :                      * if we index them; see heapam_scan_analyze_next_tuple().
    1448                 :                      */
    1449 CBC      116716 :                     if (HeapTupleIsHotUpdated(heapTuple))
    1450                 :                     {
    1451 GIC          22 :                         indexIt = false;
    1452                 :                         /* mark the index as unsafe for old snapshots */
    1453              22 :                         indexInfo->ii_BrokenHotChain = true;
    1454                 :                     }
    1455                 :                     else
    1456          116694 :                         indexIt = true;
    1457                 :                     /* In any case, exclude the tuple from unique-checking */
    1458          116716 :                     tupleIsAlive = false;
    1459          116716 :                     break;
    1460         1768826 :                 case HEAPTUPLE_INSERT_IN_PROGRESS:
    1461                 : 
    1462                 :                     /*
    1463                 :                      * In "anyvisible" mode, this tuple is visible and we
    1464                 :                      * don't need any further checks.
    1465 ECB             :                      */
    1466 GIC     1768826 :                     if (anyvisible)
    1467 ECB             :                     {
    1468 GIC       30736 :                         indexIt = true;
    1469 CBC       30736 :                         tupleIsAlive = true;
    1470 GIC       30736 :                         reltuples += 1;
    1471           30736 :                         break;
    1472 ECB             :                     }
    1473                 : 
    1474                 :                     /*
    1475                 :                      * Since caller should hold ShareLock or better, normally
    1476                 :                      * the only way to see this is if it was inserted earlier
    1477                 :                      * in our own transaction.  However, it can happen in
    1478                 :                      * system catalogs, since we tend to release write lock
    1479                 :                      * before commit there.  Give a warning if neither case
    1480                 :                      * applies.
    1481                 :                      */
    1482 CBC     1738090 :                     xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
    1483 GIC     1738090 :                     if (!TransactionIdIsCurrentTransactionId(xwait))
    1484 ECB             :                     {
    1485 CBC          27 :                         if (!is_system_catalog)
    1486 LBC           0 :                             elog(WARNING, "concurrent insert in progress within table \"%s\"",
    1487 ECB             :                                  RelationGetRelationName(heapRelation));
    1488                 : 
    1489                 :                         /*
    1490                 :                          * If we are performing uniqueness checks, indexing
    1491                 :                          * such a tuple could lead to a bogus uniqueness
    1492                 :                          * failure.  In that case we wait for the inserting
    1493                 :                          * transaction to finish and check again.
    1494                 :                          */
    1495 GIC          27 :                         if (checking_uniqueness)
    1496                 :                         {
    1497                 :                             /*
    1498 ECB             :                              * Must drop the lock on the buffer before we wait
    1499                 :                              */
    1500 UIC           0 :                             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    1501 LBC           0 :                             XactLockTableWait(xwait, heapRelation,
    1502 EUB             :                                               &heapTuple->t_self,
    1503                 :                                               XLTW_InsertIndexUnique);
    1504 UIC           0 :                             CHECK_FOR_INTERRUPTS();
    1505               0 :                             goto recheck;
    1506                 :                         }
    1507                 :                     }
    1508                 :                     else
    1509                 :                     {
    1510                 :                         /*
    1511 ECB             :                          * For consistency with
    1512                 :                          * heapam_scan_analyze_next_tuple(), count
    1513                 :                          * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
    1514                 :                          * when inserted by our own transaction.
    1515                 :                          */
    1516 GBC     1738063 :                         reltuples += 1;
    1517 EUB             :                     }
    1518                 : 
    1519                 :                     /*
    1520                 :                      * We must index such tuples, since if the index build
    1521                 :                      * commits then they're good.
    1522                 :                      */
    1523 GIC     1738090 :                     indexIt = true;
    1524         1738090 :                     tupleIsAlive = true;
    1525         1738090 :                     break;
    1526              75 :                 case HEAPTUPLE_DELETE_IN_PROGRESS:
    1527                 : 
    1528                 :                     /*
    1529                 :                      * As with INSERT_IN_PROGRESS case, this is unexpected
    1530                 :                      * unless it's our own deletion or a system catalog; but
    1531                 :                      * in anyvisible mode, this tuple is visible.
    1532 ECB             :                      */
    1533 GIC          75 :                     if (anyvisible)
    1534                 :                     {
    1535 UIC           0 :                         indexIt = true;
    1536               0 :                         tupleIsAlive = false;
    1537               0 :                         reltuples += 1;
    1538               0 :                         break;
    1539 ECB             :                     }
    1540                 : 
    1541 CBC          75 :                     xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
    1542              75 :                     if (!TransactionIdIsCurrentTransactionId(xwait))
    1543                 :                     {
    1544 GIC          36 :                         if (!is_system_catalog)
    1545 UIC           0 :                             elog(WARNING, "concurrent delete in progress within table \"%s\"",
    1546                 :                                  RelationGetRelationName(heapRelation));
    1547                 : 
    1548                 :                         /*
    1549 ECB             :                          * If we are performing uniqueness checks, assuming
    1550                 :                          * the tuple is dead could lead to missing a
    1551 EUB             :                          * uniqueness violation.  In that case we wait for the
    1552                 :                          * deleting transaction to finish and check again.
    1553                 :                          *
    1554                 :                          * Also, if it's a HOT-updated tuple, we should not
    1555                 :                          * index it but rather the live tuple at the end of
    1556                 :                          * the HOT-chain.  However, the deleting transaction
    1557 ECB             :                          * could abort, possibly leaving this tuple as live
    1558                 :                          * after all, in which case it has to be indexed. The
    1559                 :                          * only way to know what to do is to wait for the
    1560                 :                          * deleting transaction to finish and check again.
    1561 EUB             :                          */
    1562 GIC          36 :                         if (checking_uniqueness ||
    1563              36 :                             HeapTupleIsHotUpdated(heapTuple))
    1564                 :                         {
    1565                 :                             /*
    1566                 :                              * Must drop the lock on the buffer before we wait
    1567                 :                              */
    1568 UIC           0 :                             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    1569               0 :                             XactLockTableWait(xwait, heapRelation,
    1570                 :                                               &heapTuple->t_self,
    1571                 :                                               XLTW_InsertIndexUnique);
    1572               0 :                             CHECK_FOR_INTERRUPTS();
    1573               0 :                             goto recheck;
    1574                 :                         }
    1575                 : 
    1576                 :                         /*
    1577                 :                          * Otherwise index it but don't check for uniqueness,
    1578 ECB             :                          * the same as a RECENTLY_DEAD tuple.
    1579                 :                          */
    1580 GIC          36 :                         indexIt = true;
    1581                 : 
    1582                 :                         /*
    1583                 :                          * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
    1584 EUB             :                          * if they were not deleted by the current
    1585                 :                          * transaction.  That's what
    1586                 :                          * heapam_scan_analyze_next_tuple() does, and we want
    1587                 :                          * the behavior to be consistent.
    1588                 :                          */
    1589 GBC          36 :                         reltuples += 1;
    1590                 :                     }
    1591 GIC          39 :                     else if (HeapTupleIsHotUpdated(heapTuple))
    1592                 :                     {
    1593                 :                         /*
    1594                 :                          * It's a HOT-updated tuple deleted by our own xact.
    1595                 :                          * We can assume the deletion will commit (else the
    1596 ECB             :                          * index contents don't matter), so treat the same as
    1597                 :                          * RECENTLY_DEAD HOT-updated tuples.
    1598                 :                          */
    1599 UIC           0 :                         indexIt = false;
    1600                 :                         /* mark the index as unsafe for old snapshots */
    1601               0 :                         indexInfo->ii_BrokenHotChain = true;
    1602                 :                     }
    1603                 :                     else
    1604                 :                     {
    1605 ECB             :                         /*
    1606                 :                          * It's a regular tuple deleted by our own xact. Index
    1607                 :                          * it, but don't check for uniqueness nor count in
    1608                 :                          * reltuples, the same as a RECENTLY_DEAD tuple.
    1609                 :                          */
    1610 GIC          39 :                         indexIt = true;
    1611                 :                     }
    1612                 :                     /* In any case, exclude the tuple from unique-checking */
    1613              75 :                     tupleIsAlive = false;
    1614              75 :                     break;
    1615 UBC           0 :                 default:
    1616 UIC           0 :                     elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
    1617 EUB             :                     indexIt = tupleIsAlive = false; /* keep compiler quiet */
    1618                 :                     break;
    1619                 :             }
    1620                 : 
    1621 GIC     7002533 :             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    1622                 : 
    1623         7002533 :             if (!indexIt)
    1624             356 :                 continue;
    1625                 :         }
    1626 ECB             :         else
    1627                 :         {
    1628                 :             /* heap_getnext did the time qual check */
    1629 CBC     7026991 :             tupleIsAlive = true;
    1630         7026991 :             reltuples += 1;
    1631 EUB             :         }
    1632                 : 
    1633 GIC    14029168 :         MemoryContextReset(econtext->ecxt_per_tuple_memory);
    1634                 : 
    1635                 :         /* Set up for predicate or expression evaluation */
    1636        14029168 :         ExecStoreBufferHeapTuple(heapTuple, slot, hscan->rs_cbuf);
    1637 ECB             : 
    1638                 :         /*
    1639                 :          * In a partial index, discard tuples that don't satisfy the
    1640                 :          * predicate.
    1641                 :          */
    1642 GIC    14029168 :         if (predicate != NULL)
    1643                 :         {
    1644           41274 :             if (!ExecQual(predicate, econtext))
    1645 CBC       11793 :                 continue;
    1646 ECB             :         }
    1647                 : 
    1648                 :         /*
    1649                 :          * For the current heap tuple, extract all the attributes we use in
    1650                 :          * this index, and note which are null.  This also performs evaluation
    1651                 :          * of any expressions needed.
    1652                 :          */
    1653 GIC    14017375 :         FormIndexDatum(indexInfo,
    1654                 :                        slot,
    1655                 :                        estate,
    1656                 :                        values,
    1657                 :                        isnull);
    1658 ECB             : 
    1659                 :         /*
    1660                 :          * You'd think we should go ahead and build the index tuple here, but
    1661                 :          * some index AMs want to do further processing on the data first.  So
    1662                 :          * pass the values[] and isnull[] arrays, instead.
    1663                 :          */
    1664                 : 
    1665 GIC    14017372 :         if (HeapTupleIsHeapOnly(heapTuple))
    1666                 :         {
    1667                 :             /*
    1668                 :              * For a heap-only tuple, pretend its TID is that of the root. See
    1669 ECB             :              * src/backend/access/heap/README.HOT for discussion.
    1670                 :              */
    1671                 :             ItemPointerData tid;
    1672                 :             OffsetNumber offnum;
    1673                 : 
    1674 GIC        1589 :             offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
    1675                 : 
    1676                 :             /*
    1677                 :              * If a HOT tuple points to a root that we don't know about,
    1678                 :              * obtain root items afresh.  If that still fails, report it as
    1679                 :              * corruption.
    1680                 :              */
    1681 CBC        1589 :             if (root_offsets[offnum - 1] == InvalidOffsetNumber)
    1682                 :             {
    1683 UIC           0 :                 Page        page = BufferGetPage(hscan->rs_cbuf);
    1684                 : 
    1685               0 :                 LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
    1686               0 :                 heap_get_root_tuples(page, root_offsets);
    1687               0 :                 LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    1688                 :             }
    1689                 : 
    1690 CBC        1589 :             if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
    1691 UIC           0 :                 ereport(ERROR,
    1692                 :                         (errcode(ERRCODE_DATA_CORRUPTED),
    1693                 :                          errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
    1694                 :                                          ItemPointerGetBlockNumber(&heapTuple->t_self),
    1695                 :                                          offnum,
    1696                 :                                          RelationGetRelationName(heapRelation))));
    1697 ECB             : 
    1698 GIC        1589 :             ItemPointerSet(&tid, ItemPointerGetBlockNumber(&heapTuple->t_self),
    1699 GBC        1589 :                            root_offsets[offnum - 1]);
    1700                 : 
    1701 EUB             :             /* Call the AM's callback routine to process the tuple */
    1702 GBC        1589 :             callback(indexRelation, &tid, values, isnull, tupleIsAlive,
    1703 EUB             :                      callback_state);
    1704                 :         }
    1705                 :         else
    1706 ECB             :         {
    1707 EUB             :             /* Call the AM's callback routine to process the tuple */
    1708 GIC    14015783 :             callback(indexRelation, &heapTuple->t_self, values, isnull,
    1709                 :                      tupleIsAlive, callback_state);
    1710                 :         }
    1711                 :     }
    1712                 : 
    1713                 :     /* Report scan progress one last time. */
    1714 CBC       66560 :     if (progress)
    1715 ECB             :     {
    1716                 :         BlockNumber blks_done;
    1717                 : 
    1718 CBC       64968 :         if (hscan->rs_base.rs_parallel != NULL)
    1719                 :         {
    1720                 :             ParallelBlockTableScanDesc pbscan;
    1721                 : 
    1722 GIC          71 :             pbscan = (ParallelBlockTableScanDesc) hscan->rs_base.rs_parallel;
    1723              71 :             blks_done = pbscan->phs_nblocks;
    1724 ECB             :         }
    1725                 :         else
    1726 GIC       64897 :             blks_done = hscan->rs_nblocks;
    1727                 : 
    1728           64968 :         pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
    1729                 :                                      blks_done);
    1730 ECB             :     }
    1731                 : 
    1732 GIC       66560 :     table_endscan(scan);
    1733                 : 
    1734 ECB             :     /* we can now forget our snapshot, if set and registered by us */
    1735 GIC       66560 :     if (need_unregister_snapshot)
    1736           49079 :         UnregisterSnapshot(snapshot);
    1737                 : 
    1738 CBC       66560 :     ExecDropSingleTupleTableSlot(slot);
    1739 ECB             : 
    1740 GIC       66560 :     FreeExecutorState(estate);
    1741                 : 
    1742 ECB             :     /* These may have been pointing to the now-gone estate */
    1743 GIC       66560 :     indexInfo->ii_ExpressionsState = NIL;
    1744 CBC       66560 :     indexInfo->ii_PredicateState = NULL;
    1745                 : 
    1746 GIC       66560 :     return reltuples;
    1747                 : }
    1748 ECB             : 
    1749                 : static void
    1750 GIC         270 : heapam_index_validate_scan(Relation heapRelation,
    1751 ECB             :                            Relation indexRelation,
    1752                 :                            IndexInfo *indexInfo,
    1753                 :                            Snapshot snapshot,
    1754                 :                            ValidateIndexState *state)
    1755                 : {
    1756                 :     TableScanDesc scan;
    1757                 :     HeapScanDesc hscan;
    1758                 :     HeapTuple   heapTuple;
    1759                 :     Datum       values[INDEX_MAX_KEYS];
    1760                 :     bool        isnull[INDEX_MAX_KEYS];
    1761                 :     ExprState  *predicate;
    1762                 :     TupleTableSlot *slot;
    1763                 :     EState     *estate;
    1764                 :     ExprContext *econtext;
    1765 GIC         270 :     BlockNumber root_blkno = InvalidBlockNumber;
    1766 ECB             :     OffsetNumber root_offsets[MaxHeapTuplesPerPage];
    1767                 :     bool        in_index[MaxHeapTuplesPerPage];
    1768 GIC         270 :     BlockNumber previous_blkno = InvalidBlockNumber;
    1769                 : 
    1770                 :     /* state variables for the merge */
    1771             270 :     ItemPointer indexcursor = NULL;
    1772                 :     ItemPointerData decoded;
    1773             270 :     bool        tuplesort_empty = false;
    1774                 : 
    1775                 :     /*
    1776                 :      * sanity checks
    1777                 :      */
    1778             270 :     Assert(OidIsValid(indexRelation->rd_rel->relam));
    1779                 : 
    1780                 :     /*
    1781 ECB             :      * Need an EState for evaluation of index expressions and partial-index
    1782                 :      * predicates.  Also a slot to hold the current tuple.
    1783                 :      */
    1784 CBC         270 :     estate = CreateExecutorState();
    1785 GIC         270 :     econtext = GetPerTupleExprContext(estate);
    1786             270 :     slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
    1787 ECB             :                                     &TTSOpsHeapTuple);
    1788                 : 
    1789                 :     /* Arrange for econtext's scan tuple to be the tuple under test */
    1790 GIC         270 :     econtext->ecxt_scantuple = slot;
    1791                 : 
    1792                 :     /* Set up execution state for predicate, if any. */
    1793             270 :     predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
    1794 ECB             : 
    1795                 :     /*
    1796                 :      * Prepare for scan of the base relation.  We need just those tuples
    1797                 :      * satisfying the passed-in reference snapshot.  We must disable syncscan
    1798                 :      * here, because it's critical that we read from block zero forward to
    1799                 :      * match the sorted TIDs.
    1800                 :      */
    1801 CBC         270 :     scan = table_beginscan_strat(heapRelation,  /* relation */
    1802 ECB             :                                  snapshot,  /* snapshot */
    1803                 :                                  0, /* number of keys */
    1804                 :                                  NULL,  /* scan key */
    1805                 :                                  true,  /* buffer access strategy OK */
    1806                 :                                  false);    /* syncscan not OK */
    1807 GIC         270 :     hscan = (HeapScanDesc) scan;
    1808                 : 
    1809 CBC         270 :     pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_TOTAL,
    1810 GIC         270 :                                  hscan->rs_nblocks);
    1811                 : 
    1812                 :     /*
    1813                 :      * Scan all tuples matching the snapshot.
    1814                 :      */
    1815           16419 :     while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
    1816                 :     {
    1817 CBC       16149 :         ItemPointer heapcursor = &heapTuple->t_self;
    1818                 :         ItemPointerData rootTuple;
    1819                 :         OffsetNumber root_offnum;
    1820                 : 
    1821 GIC       16149 :         CHECK_FOR_INTERRUPTS();
    1822                 : 
    1823 CBC       16149 :         state->htups += 1;
    1824                 : 
    1825           16149 :         if ((previous_blkno == InvalidBlockNumber) ||
    1826           15974 :             (hscan->rs_cblock != previous_blkno))
    1827                 :         {
    1828 GIC         380 :             pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
    1829             380 :                                          hscan->rs_cblock);
    1830             380 :             previous_blkno = hscan->rs_cblock;
    1831 ECB             :         }
    1832                 : 
    1833                 :         /*
    1834                 :          * As commented in table_index_build_scan, we should index heap-only
    1835                 :          * tuples under the TIDs of their root tuples; so when we advance onto
    1836                 :          * a new heap page, build a map of root item offsets on the page.
    1837                 :          *
    1838                 :          * This complicates merging against the tuplesort output: we will
    1839                 :          * visit the live tuples in order by their offsets, but the root
    1840                 :          * offsets that we need to compare against the index contents might be
    1841                 :          * ordered differently.  So we might have to "look back" within the
    1842                 :          * tuplesort output, but only within the current page.  We handle that
    1843                 :          * by keeping a bool array in_index[] showing all the
    1844                 :          * already-passed-over tuplesort output TIDs of the current page. We
    1845                 :          * clear that array here, when advancing onto a new heap page.
    1846                 :          */
    1847 GIC       16149 :         if (hscan->rs_cblock != root_blkno)
    1848                 :         {
    1849             380 :             Page        page = BufferGetPage(hscan->rs_cbuf);
    1850                 : 
    1851             380 :             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
    1852             380 :             heap_get_root_tuples(page, root_offsets);
    1853             380 :             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    1854                 : 
    1855             380 :             memset(in_index, 0, sizeof(in_index));
    1856                 : 
    1857             380 :             root_blkno = hscan->rs_cblock;
    1858                 :         }
    1859                 : 
    1860                 :         /* Convert actual tuple TID to root TID */
    1861           16149 :         rootTuple = *heapcursor;
    1862           16149 :         root_offnum = ItemPointerGetOffsetNumber(heapcursor);
    1863 ECB             : 
    1864 GIC       16149 :         if (HeapTupleIsHeapOnly(heapTuple))
    1865 ECB             :         {
    1866 GIC           4 :             root_offnum = root_offsets[root_offnum - 1];
    1867 CBC           4 :             if (!OffsetNumberIsValid(root_offnum))
    1868 LBC           0 :                 ereport(ERROR,
    1869 ECB             :                         (errcode(ERRCODE_DATA_CORRUPTED),
    1870                 :                          errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
    1871                 :                                          ItemPointerGetBlockNumber(heapcursor),
    1872                 :                                          ItemPointerGetOffsetNumber(heapcursor),
    1873                 :                                          RelationGetRelationName(heapRelation))));
    1874 GIC           4 :             ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
    1875                 :         }
    1876                 : 
    1877 ECB             :         /*
    1878                 :          * "merge" by skipping through the index tuples until we find or pass
    1879                 :          * the current root tuple.
    1880                 :          */
    1881 GIC       32251 :         while (!tuplesort_empty &&
    1882 CBC       32036 :                (!indexcursor ||
    1883           32036 :                 ItemPointerCompare(indexcursor, &rootTuple) < 0))
    1884 EUB             :         {
    1885                 :             Datum       ts_val;
    1886                 :             bool        ts_isnull;
    1887                 : 
    1888 GIC       16102 :             if (indexcursor)
    1889                 :             {
    1890 ECB             :                 /*
    1891                 :                  * Remember index items seen earlier on the current heap page
    1892                 :                  */
    1893 GIC       15927 :                 if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
    1894           15722 :                     in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
    1895                 :             }
    1896                 : 
    1897 CBC       16102 :             tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
    1898                 :                                                   false, &ts_val, &ts_isnull,
    1899 GNC       16102 :                                                   NULL);
    1900 CBC       16102 :             Assert(tuplesort_empty || !ts_isnull);
    1901 GIC       16102 :             if (!tuplesort_empty)
    1902                 :             {
    1903           16085 :                 itemptr_decode(&decoded, DatumGetInt64(ts_val));
    1904           16085 :                 indexcursor = &decoded;
    1905 ECB             :             }
    1906                 :             else
    1907                 :             {
    1908                 :                 /* Be tidy */
    1909 CBC          17 :                 indexcursor = NULL;
    1910                 :             }
    1911 ECB             :         }
    1912                 : 
    1913                 :         /*
    1914                 :          * If the tuplesort has overshot *and* we didn't see a match earlier,
    1915                 :          * then this tuple is missing from the index, so insert it.
    1916                 :          */
    1917 GIC       32258 :         if ((tuplesort_empty ||
    1918           16109 :              ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
    1919              74 :             !in_index[root_offnum - 1])
    1920                 :         {
    1921 CBC          70 :             MemoryContextReset(econtext->ecxt_per_tuple_memory);
    1922                 : 
    1923                 :             /* Set up for predicate or expression evaluation */
    1924 GIC          70 :             ExecStoreHeapTuple(heapTuple, slot, false);
    1925                 : 
    1926                 :             /*
    1927                 :              * In a partial index, discard tuples that don't satisfy the
    1928                 :              * predicate.
    1929 ECB             :              */
    1930 CBC          70 :             if (predicate != NULL)
    1931 ECB             :             {
    1932 GIC          24 :                 if (!ExecQual(predicate, econtext))
    1933 CBC          24 :                     continue;
    1934                 :             }
    1935                 : 
    1936 ECB             :             /*
    1937                 :              * For the current heap tuple, extract all the attributes we use
    1938                 :              * in this index, and note which are null.  This also performs
    1939                 :              * evaluation of any expressions needed.
    1940                 :              */
    1941 GIC          46 :             FormIndexDatum(indexInfo,
    1942 ECB             :                            slot,
    1943                 :                            estate,
    1944                 :                            values,
    1945                 :                            isnull);
    1946                 : 
    1947                 :             /*
    1948                 :              * You'd think we should go ahead and build the index tuple here,
    1949                 :              * but some index AMs want to do further processing on the data
    1950                 :              * first. So pass the values[] and isnull[] arrays, instead.
    1951                 :              */
    1952                 : 
    1953                 :             /*
    1954                 :              * If the tuple is already committed dead, you might think we
    1955                 :              * could suppress uniqueness checking, but this is no longer true
    1956                 :              * in the presence of HOT, because the insert is actually a proxy
    1957                 :              * for a uniqueness check on the whole HOT-chain.  That is, the
    1958                 :              * tuple we have here could be dead because it was already
    1959                 :              * HOT-updated, and if so the updating transaction will not have
    1960                 :              * thought it should insert index entries.  The index AM will
    1961                 :              * check the whole HOT-chain and correctly detect a conflict if
    1962                 :              * there is one.
    1963                 :              */
    1964                 : 
    1965 GIC          46 :             index_insert(indexRelation,
    1966                 :                          values,
    1967                 :                          isnull,
    1968                 :                          &rootTuple,
    1969                 :                          heapRelation,
    1970              46 :                          indexInfo->ii_Unique ?
    1971                 :                          UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
    1972                 :                          false,
    1973                 :                          indexInfo);
    1974                 : 
    1975              46 :             state->tups_inserted += 1;
    1976                 :         }
    1977 ECB             :     }
    1978                 : 
    1979 GIC         270 :     table_endscan(scan);
    1980                 : 
    1981             270 :     ExecDropSingleTupleTableSlot(slot);
    1982 ECB             : 
    1983 GIC         270 :     FreeExecutorState(estate);
    1984                 : 
    1985                 :     /* These may have been pointing to the now-gone estate */
    1986             270 :     indexInfo->ii_ExpressionsState = NIL;
    1987 CBC         270 :     indexInfo->ii_PredicateState = NULL;
    1988 GIC         270 : }
    1989                 : 
    1990                 : /*
    1991 ECB             :  * Return the number of blocks that have been read by this scan since
    1992                 :  * starting.  This is meant for progress reporting rather than be fully
    1993                 :  * accurate: in a parallel scan, workers can be concurrently reading blocks
    1994                 :  * further ahead than what we report.
    1995                 :  */
    1996                 : static BlockNumber
    1997 GIC    12751582 : heapam_scan_get_blocks_done(HeapScanDesc hscan)
    1998 ECB             : {
    1999 CBC    12751582 :     ParallelBlockTableScanDesc bpscan = NULL;
    2000 ECB             :     BlockNumber startblock;
    2001                 :     BlockNumber blocks_done;
    2002                 : 
    2003 GIC    12751582 :     if (hscan->rs_base.rs_parallel != NULL)
    2004                 :     {
    2005         1013615 :         bpscan = (ParallelBlockTableScanDesc) hscan->rs_base.rs_parallel;
    2006         1013615 :         startblock = bpscan->phs_startblock;
    2007                 :     }
    2008                 :     else
    2009 CBC    11737967 :         startblock = hscan->rs_startblock;
    2010                 : 
    2011 ECB             :     /*
    2012                 :      * Might have wrapped around the end of the relation, if startblock was
    2013                 :      * not zero.
    2014                 :      */
    2015 CBC    12751582 :     if (hscan->rs_cblock > startblock)
    2016 GIC    11876578 :         blocks_done = hscan->rs_cblock - startblock;
    2017 ECB             :     else
    2018                 :     {
    2019                 :         BlockNumber nblocks;
    2020                 : 
    2021 CBC      875004 :         nblocks = bpscan != NULL ? bpscan->phs_nblocks : hscan->rs_nblocks;
    2022 GIC      875004 :         blocks_done = nblocks - startblock +
    2023          875004 :             hscan->rs_cblock;
    2024                 :     }
    2025                 : 
    2026        12751582 :     return blocks_done;
    2027 ECB             : }
    2028                 : 
    2029                 : 
    2030                 : /* ------------------------------------------------------------------------
    2031                 :  * Miscellaneous callbacks for the heap AM
    2032                 :  * ------------------------------------------------------------------------
    2033                 :  */
    2034                 : 
    2035                 : /*
    2036                 :  * Check to see whether the table needs a TOAST table.  It does only if
    2037                 :  * (1) there are any toastable attributes, and (2) the maximum length
    2038                 :  * of a tuple could exceed TOAST_TUPLE_THRESHOLD.  (We don't want to
    2039                 :  * create a toast table for something like "f1 varchar(20)".)
    2040                 :  */
    2041                 : static bool
    2042 GIC       30583 : heapam_relation_needs_toast_table(Relation rel)
    2043                 : {
    2044           30583 :     int32       data_length = 0;
    2045           30583 :     bool        maxlength_unknown = false;
    2046           30583 :     bool        has_toastable_attrs = false;
    2047           30583 :     TupleDesc   tupdesc = rel->rd_att;
    2048                 :     int32       tuple_length;
    2049                 :     int         i;
    2050                 : 
    2051          190212 :     for (i = 0; i < tupdesc->natts; i++)
    2052                 :     {
    2053          159629 :         Form_pg_attribute att = TupleDescAttr(tupdesc, i);
    2054 ECB             : 
    2055 GIC      159629 :         if (att->attisdropped)
    2056 CBC         480 :             continue;
    2057          159149 :         data_length = att_align_nominal(data_length, att->attalign);
    2058          159149 :         if (att->attlen > 0)
    2059 ECB             :         {
    2060                 :             /* Fixed-length types are never toastable */
    2061 GIC      113551 :             data_length += att->attlen;
    2062                 :         }
    2063 ECB             :         else
    2064                 :         {
    2065 CBC       45598 :             int32       maxlen = type_maximum_size(att->atttypid,
    2066                 :                                                    att->atttypmod);
    2067 ECB             : 
    2068 CBC       45598 :             if (maxlen < 0)
    2069           44155 :                 maxlength_unknown = true;
    2070 ECB             :             else
    2071 GIC        1443 :                 data_length += maxlen;
    2072           45598 :             if (att->attstorage != TYPSTORAGE_PLAIN)
    2073 CBC       43419 :                 has_toastable_attrs = true;
    2074                 :         }
    2075                 :     }
    2076 GIC       30583 :     if (!has_toastable_attrs)
    2077 CBC       11659 :         return false;           /* nothing to toast? */
    2078 GIC       18924 :     if (maxlength_unknown)
    2079           17777 :         return true;            /* any unlimited-length attrs? */
    2080 CBC        1147 :     tuple_length = MAXALIGN(SizeofHeapTupleHeader +
    2081            1147 :                             BITMAPLEN(tupdesc->natts)) +
    2082 GIC        1147 :         MAXALIGN(data_length);
    2083 CBC        1147 :     return (tuple_length > TOAST_TUPLE_THRESHOLD);
    2084 ECB             : }
    2085                 : 
    2086                 : /*
    2087                 :  * TOAST tables for heap relations are just heap relations.
    2088                 :  */
    2089                 : static Oid
    2090 CBC       18026 : heapam_relation_toast_am(Relation rel)
    2091 ECB             : {
    2092 CBC       18026 :     return rel->rd_rel->relam;
    2093 ECB             : }
    2094                 : 
    2095                 : 
    2096                 : /* ------------------------------------------------------------------------
    2097                 :  * Planner related callbacks for the heap AM
    2098                 :  * ------------------------------------------------------------------------
    2099                 :  */
    2100                 : 
    2101                 : #define HEAP_OVERHEAD_BYTES_PER_TUPLE \
    2102                 :     (MAXALIGN(SizeofHeapTupleHeader) + sizeof(ItemIdData))
    2103                 : #define HEAP_USABLE_BYTES_PER_PAGE \
    2104                 :     (BLCKSZ - SizeOfPageHeaderData)
    2105                 : 
    2106                 : static void
    2107 GIC      174504 : heapam_estimate_rel_size(Relation rel, int32 *attr_widths,
    2108                 :                          BlockNumber *pages, double *tuples,
    2109                 :                          double *allvisfrac)
    2110                 : {
    2111          174504 :     table_block_relation_estimate_size(rel, attr_widths, pages,
    2112                 :                                        tuples, allvisfrac,
    2113                 :                                        HEAP_OVERHEAD_BYTES_PER_TUPLE,
    2114                 :                                        HEAP_USABLE_BYTES_PER_PAGE);
    2115          174504 : }
    2116                 : 
    2117                 : 
    2118                 : /* ------------------------------------------------------------------------
    2119 ECB             :  * Executor related callbacks for the heap AM
    2120                 :  * ------------------------------------------------------------------------
    2121                 :  */
    2122                 : 
    2123                 : static bool
    2124 GIC      254417 : heapam_scan_bitmap_next_block(TableScanDesc scan,
    2125                 :                               TBMIterateResult *tbmres)
    2126                 : {
    2127 CBC      254417 :     HeapScanDesc hscan = (HeapScanDesc) scan;
    2128 GNC      254417 :     BlockNumber block = tbmres->blockno;
    2129                 :     Buffer      buffer;
    2130                 :     Snapshot    snapshot;
    2131                 :     int         ntup;
    2132                 : 
    2133 GIC      254417 :     hscan->rs_cindex = 0;
    2134          254417 :     hscan->rs_ntuples = 0;
    2135                 : 
    2136 ECB             :     /*
    2137                 :      * Ignore any claimed entries past what we think is the end of the
    2138                 :      * relation. It may have been extended after the start of our scan (we
    2139                 :      * only hold an AccessShareLock, and it could be inserts from this
    2140                 :      * backend).
    2141                 :      */
    2142 GNC      254417 :     if (block >= hscan->rs_nblocks)
    2143 GIC           6 :         return false;
    2144                 : 
    2145 ECB             :     /*
    2146                 :      * Acquire pin on the target heap page, trading in any pin we held before.
    2147                 :      */
    2148 GIC      254411 :     hscan->rs_cbuf = ReleaseAndReadBuffer(hscan->rs_cbuf,
    2149                 :                                           scan->rs_rd,
    2150                 :                                           block);
    2151 GNC      254411 :     hscan->rs_cblock = block;
    2152 GIC      254411 :     buffer = hscan->rs_cbuf;
    2153          254411 :     snapshot = scan->rs_snapshot;
    2154 ECB             : 
    2155 CBC      254411 :     ntup = 0;
    2156                 : 
    2157                 :     /*
    2158                 :      * Prune and repair fragmentation for the whole page, if possible.
    2159                 :      */
    2160          254411 :     heap_page_prune_opt(scan->rs_rd, buffer);
    2161                 : 
    2162                 :     /*
    2163 ECB             :      * We must hold share lock on the buffer content while examining tuple
    2164                 :      * visibility.  Afterwards, however, the tuples we have found to be
    2165                 :      * visible are guaranteed good as long as we hold the buffer pin.
    2166                 :      */
    2167 CBC      254411 :     LockBuffer(buffer, BUFFER_LOCK_SHARE);
    2168                 : 
    2169                 :     /*
    2170                 :      * We need two separate strategies for lossy and non-lossy cases.
    2171                 :      */
    2172          254411 :     if (tbmres->ntuples >= 0)
    2173                 :     {
    2174                 :         /*
    2175                 :          * Bitmap is non-lossy, so we just look through the offsets listed in
    2176                 :          * tbmres; but we have to follow any HOT chain starting at each such
    2177                 :          * offset.
    2178                 :          */
    2179 ECB             :         int         curslot;
    2180                 : 
    2181 GIC     3683137 :         for (curslot = 0; curslot < tbmres->ntuples; curslot++)
    2182                 :         {
    2183         3501059 :             OffsetNumber offnum = tbmres->offsets[curslot];
    2184 ECB             :             ItemPointerData tid;
    2185                 :             HeapTupleData heapTuple;
    2186                 : 
    2187 GNC     3501059 :             ItemPointerSet(&tid, block, offnum);
    2188 GIC     3501059 :             if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
    2189                 :                                        &heapTuple, NULL, true))
    2190         3351716 :                 hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
    2191                 :         }
    2192                 :     }
    2193 ECB             :     else
    2194                 :     {
    2195                 :         /*
    2196                 :          * Bitmap is lossy, so we must examine each line pointer on the page.
    2197                 :          * But we can ignore HOT chains, since we'll check each tuple anyway.
    2198                 :          */
    2199 GNC       72330 :         Page        page = BufferGetPage(buffer);
    2200           72330 :         OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
    2201                 :         OffsetNumber offnum;
    2202 ECB             : 
    2203 GIC      486285 :         for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
    2204                 :         {
    2205                 :             ItemId      lp;
    2206                 :             HeapTupleData loctup;
    2207                 :             bool        valid;
    2208                 : 
    2209 GNC      413955 :             lp = PageGetItemId(page, offnum);
    2210 GIC      413955 :             if (!ItemIdIsNormal(lp))
    2211 LBC           0 :                 continue;
    2212 GNC      413955 :             loctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
    2213 GIC      413955 :             loctup.t_len = ItemIdGetLength(lp);
    2214          413955 :             loctup.t_tableOid = scan->rs_rd->rd_id;
    2215 GNC      413955 :             ItemPointerSet(&loctup.t_self, block, offnum);
    2216 GIC      413955 :             valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
    2217          413955 :             if (valid)
    2218                 :             {
    2219          413892 :                 hscan->rs_vistuples[ntup++] = offnum;
    2220          413892 :                 PredicateLockTID(scan->rs_rd, &loctup.t_self, snapshot,
    2221 CBC      413892 :                                  HeapTupleHeaderGetXmin(loctup.t_data));
    2222 ECB             :             }
    2223 GBC      413955 :             HeapCheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
    2224 ECB             :                                                 buffer, snapshot);
    2225                 :         }
    2226                 :     }
    2227                 : 
    2228 CBC      254408 :     LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
    2229 ECB             : 
    2230 GIC      254408 :     Assert(ntup <= MaxHeapTuplesPerPage);
    2231 CBC      254408 :     hscan->rs_ntuples = ntup;
    2232 ECB             : 
    2233 CBC      254408 :     return ntup > 0;
    2234                 : }
    2235 ECB             : 
    2236                 : static bool
    2237 GIC     4018247 : heapam_scan_bitmap_next_tuple(TableScanDesc scan,
    2238                 :                               TBMIterateResult *tbmres,
    2239                 :                               TupleTableSlot *slot)
    2240 ECB             : {
    2241 GIC     4018247 :     HeapScanDesc hscan = (HeapScanDesc) scan;
    2242 ECB             :     OffsetNumber targoffset;
    2243                 :     Page        page;
    2244                 :     ItemId      lp;
    2245                 : 
    2246                 :     /*
    2247                 :      * Out of range?  If so, nothing more to look at on this page
    2248                 :      */
    2249 CBC     4018247 :     if (hscan->rs_cindex < 0 || hscan->rs_cindex >= hscan->rs_ntuples)
    2250 GIC      254228 :         return false;
    2251                 : 
    2252         3764019 :     targoffset = hscan->rs_vistuples[hscan->rs_cindex];
    2253 GNC     3764019 :     page = BufferGetPage(hscan->rs_cbuf);
    2254         3764019 :     lp = PageGetItemId(page, targoffset);
    2255 GIC     3764019 :     Assert(ItemIdIsNormal(lp));
    2256                 : 
    2257 GNC     3764019 :     hscan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
    2258 GIC     3764019 :     hscan->rs_ctup.t_len = ItemIdGetLength(lp);
    2259         3764019 :     hscan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
    2260         3764019 :     ItemPointerSet(&hscan->rs_ctup.t_self, hscan->rs_cblock, targoffset);
    2261 ECB             : 
    2262 CBC     3764019 :     pgstat_count_heap_fetch(scan->rs_rd);
    2263                 : 
    2264 ECB             :     /*
    2265                 :      * Set up the result slot to point to this tuple.  Note that the slot
    2266                 :      * acquires a pin on the buffer.
    2267                 :      */
    2268 GIC     3764019 :     ExecStoreBufferHeapTuple(&hscan->rs_ctup,
    2269 ECB             :                              slot,
    2270                 :                              hscan->rs_cbuf);
    2271                 : 
    2272 CBC     3764019 :     hscan->rs_cindex++;
    2273                 : 
    2274         3764019 :     return true;
    2275                 : }
    2276                 : 
    2277                 : static bool
    2278 GIC        6455 : heapam_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate)
    2279                 : {
    2280 CBC        6455 :     HeapScanDesc hscan = (HeapScanDesc) scan;
    2281 GIC        6455 :     TsmRoutine *tsm = scanstate->tsmroutine;
    2282                 :     BlockNumber blockno;
    2283                 : 
    2284 ECB             :     /* return false immediately if relation is empty */
    2285 GIC        6455 :     if (hscan->rs_nblocks == 0)
    2286 LBC           0 :         return false;
    2287                 : 
    2288 GIC        6455 :     if (tsm->NextSampleBlock)
    2289                 :     {
    2290 CBC        2222 :         blockno = tsm->NextSampleBlock(scanstate, hscan->rs_nblocks);
    2291 GIC        2222 :         hscan->rs_cblock = blockno;
    2292 ECB             :     }
    2293                 :     else
    2294                 :     {
    2295                 :         /* scanning table sequentially */
    2296                 : 
    2297 CBC        4233 :         if (hscan->rs_cblock == InvalidBlockNumber)
    2298 EUB             :         {
    2299 GIC          39 :             Assert(!hscan->rs_inited);
    2300 CBC          39 :             blockno = hscan->rs_startblock;
    2301                 :         }
    2302 ECB             :         else
    2303                 :         {
    2304 GIC        4194 :             Assert(hscan->rs_inited);
    2305                 : 
    2306            4194 :             blockno = hscan->rs_cblock + 1;
    2307                 : 
    2308            4194 :             if (blockno >= hscan->rs_nblocks)
    2309 ECB             :             {
    2310                 :                 /* wrap to beginning of rel, might not have started at 0 */
    2311 CBC          39 :                 blockno = 0;
    2312 ECB             :             }
    2313                 : 
    2314                 :             /*
    2315                 :              * Report our new scan position for synchronization purposes.
    2316                 :              *
    2317                 :              * Note: we do this before checking for end of scan so that the
    2318                 :              * final state of the position hint is back at the start of the
    2319                 :              * rel.  That's not strictly necessary, but otherwise when you run
    2320                 :              * the same query multiple times the starting position would shift
    2321                 :              * a little bit backwards on every invocation, which is confusing.
    2322                 :              * We don't guarantee any specific ordering in general, though.
    2323                 :              */
    2324 GIC        4194 :             if (scan->rs_flags & SO_ALLOW_SYNC)
    2325 UIC           0 :                 ss_report_location(scan->rs_rd, blockno);
    2326                 : 
    2327 GIC        4194 :             if (blockno == hscan->rs_startblock)
    2328                 :             {
    2329              39 :                 blockno = InvalidBlockNumber;
    2330                 :             }
    2331                 :         }
    2332                 :     }
    2333                 : 
    2334            6455 :     if (!BlockNumberIsValid(blockno))
    2335                 :     {
    2336 CBC          85 :         if (BufferIsValid(hscan->rs_cbuf))
    2337 GBC          72 :             ReleaseBuffer(hscan->rs_cbuf);
    2338 GIC          85 :         hscan->rs_cbuf = InvalidBuffer;
    2339 CBC          85 :         hscan->rs_cblock = InvalidBlockNumber;
    2340 GIC          85 :         hscan->rs_inited = false;
    2341 ECB             : 
    2342 GIC          85 :         return false;
    2343                 :     }
    2344                 : 
    2345            6370 :     heapgetpage(scan, blockno);
    2346 CBC        6370 :     hscan->rs_inited = true;
    2347                 : 
    2348            6370 :     return true;
    2349 ECB             : }
    2350                 : 
    2351                 : static bool
    2352 CBC      126947 : heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate,
    2353                 :                               TupleTableSlot *slot)
    2354 ECB             : {
    2355 GIC      126947 :     HeapScanDesc hscan = (HeapScanDesc) scan;
    2356          126947 :     TsmRoutine *tsm = scanstate->tsmroutine;
    2357 CBC      126947 :     BlockNumber blockno = hscan->rs_cblock;
    2358          126947 :     bool        pagemode = (scan->rs_flags & SO_ALLOW_PAGEMODE) != 0;
    2359                 : 
    2360 ECB             :     Page        page;
    2361                 :     bool        all_visible;
    2362                 :     OffsetNumber maxoffset;
    2363                 : 
    2364                 :     /*
    2365                 :      * When not using pagemode, we must lock the buffer during tuple
    2366                 :      * visibility checks.
    2367                 :      */
    2368 CBC      126947 :     if (!pagemode)
    2369            2097 :         LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
    2370 ECB             : 
    2371 GIC      126947 :     page = (Page) BufferGetPage(hscan->rs_cbuf);
    2372          253343 :     all_visible = PageIsAllVisible(page) &&
    2373          126396 :         !scan->rs_snapshot->takenDuringRecovery;
    2374          126947 :     maxoffset = PageGetMaxOffsetNumber(page);
    2375                 : 
    2376                 :     for (;;)
    2377 UIC           0 :     {
    2378                 :         OffsetNumber tupoffset;
    2379                 : 
    2380 CBC      126947 :         CHECK_FOR_INTERRUPTS();
    2381 ECB             : 
    2382                 :         /* Ask the tablesample method which tuples to check on this page. */
    2383 CBC      126947 :         tupoffset = tsm->NextSampleTuple(scanstate,
    2384 ECB             :                                          blockno,
    2385                 :                                          maxoffset);
    2386                 : 
    2387 GIC      126947 :         if (OffsetNumberIsValid(tupoffset))
    2388                 :         {
    2389 EUB             :             ItemId      itemid;
    2390                 :             bool        visible;
    2391 GIC      120580 :             HeapTuple   tuple = &(hscan->rs_ctup);
    2392 ECB             : 
    2393                 :             /* Skip invalid tuple pointers. */
    2394 GIC      120580 :             itemid = PageGetItemId(page, tupoffset);
    2395 CBC      120580 :             if (!ItemIdIsNormal(itemid))
    2396 UIC           0 :                 continue;
    2397                 : 
    2398 GIC      120580 :             tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
    2399 CBC      120580 :             tuple->t_len = ItemIdGetLength(itemid);
    2400 GIC      120580 :             ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
    2401                 : 
    2402                 : 
    2403 CBC      120580 :             if (all_visible)
    2404 GIC      120174 :                 visible = true;
    2405                 :             else
    2406 CBC         406 :                 visible = SampleHeapTupleVisible(scan, hscan->rs_cbuf,
    2407 ECB             :                                                  tuple, tupoffset);
    2408 EUB             : 
    2409                 :             /* in pagemode, heapgetpage did this for us */
    2410 CBC      120580 :             if (!pagemode)
    2411               3 :                 HeapCheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
    2412 ECB             :                                                     hscan->rs_cbuf, scan->rs_snapshot);
    2413                 : 
    2414                 :             /* Try next tuple from same page. */
    2415 CBC      120580 :             if (!visible)
    2416 LBC           0 :                 continue;
    2417                 : 
    2418 ECB             :             /* Found visible tuple, return it. */
    2419 GIC      120580 :             if (!pagemode)
    2420               3 :                 LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    2421                 : 
    2422 CBC      120580 :             ExecStoreBufferHeapTuple(tuple, slot, hscan->rs_cbuf);
    2423 ECB             : 
    2424                 :             /* Count successfully-fetched tuples as heap fetches */
    2425 GIC      120580 :             pgstat_count_heap_getnext(scan->rs_rd);
    2426                 : 
    2427 CBC      120580 :             return true;
    2428 EUB             :         }
    2429                 :         else
    2430                 :         {
    2431 ECB             :             /*
    2432                 :              * If we get here, it means we've exhausted the items on this page
    2433                 :              * and it's time to move to the next.
    2434                 :              */
    2435 GIC        6367 :             if (!pagemode)
    2436            2094 :                 LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    2437 ECB             : 
    2438 GIC        6367 :             ExecClearTuple(slot);
    2439 CBC        6367 :             return false;
    2440                 :         }
    2441                 :     }
    2442                 : 
    2443                 :     Assert(0);
    2444                 : }
    2445                 : 
    2446                 : 
    2447 ECB             : /* ----------------------------------------------------------------------------
    2448                 :  *  Helper functions for the above.
    2449                 :  * ----------------------------------------------------------------------------
    2450                 :  */
    2451                 : 
    2452                 : /*
    2453                 :  * Reconstruct and rewrite the given tuple
    2454                 :  *
    2455                 :  * We cannot simply copy the tuple as-is, for several reasons:
    2456                 :  *
    2457                 :  * 1. We'd like to squeeze out the values of any dropped columns, both
    2458                 :  * to save space and to ensure we have no corner-case failures. (It's
    2459                 :  * possible for example that the new table hasn't got a TOAST table
    2460                 :  * and so is unable to store any large values of dropped cols.)
    2461                 :  *
    2462                 :  * 2. The tuple might not even be legal for the new table; this is
    2463                 :  * currently only known to happen as an after-effect of ALTER TABLE
    2464                 :  * SET WITHOUT OIDS.
    2465                 :  *
    2466                 :  * So, we must reconstruct the tuple from component Datums.
    2467                 :  */
    2468                 : static void
    2469 GIC      384627 : reform_and_rewrite_tuple(HeapTuple tuple,
    2470                 :                          Relation OldHeap, Relation NewHeap,
    2471                 :                          Datum *values, bool *isnull, RewriteState rwstate)
    2472                 : {
    2473          384627 :     TupleDesc   oldTupDesc = RelationGetDescr(OldHeap);
    2474          384627 :     TupleDesc   newTupDesc = RelationGetDescr(NewHeap);
    2475                 :     HeapTuple   copiedTuple;
    2476                 :     int         i;
    2477                 : 
    2478          384627 :     heap_deform_tuple(tuple, oldTupDesc, values, isnull);
    2479                 : 
    2480                 :     /* Be sure to null out any dropped columns */
    2481 CBC     3083520 :     for (i = 0; i < newTupDesc->natts; i++)
    2482                 :     {
    2483 GIC     2698893 :         if (TupleDescAttr(newTupDesc, i)->attisdropped)
    2484 UIC           0 :             isnull[i] = true;
    2485 ECB             :     }
    2486                 : 
    2487 GIC      384627 :     copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
    2488                 : 
    2489                 :     /* The heap rewrite module does the rest */
    2490 CBC      384627 :     rewrite_heap_tuple(rwstate, tuple, copiedTuple);
    2491                 : 
    2492 GIC      384627 :     heap_freetuple(copiedTuple);
    2493 CBC      384627 : }
    2494                 : 
    2495 ECB             : /*
    2496 EUB             :  * Check visibility of the tuple.
    2497                 :  */
    2498                 : static bool
    2499 CBC         406 : SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
    2500                 :                        HeapTuple tuple,
    2501                 :                        OffsetNumber tupoffset)
    2502 ECB             : {
    2503 GIC         406 :     HeapScanDesc hscan = (HeapScanDesc) scan;
    2504 ECB             : 
    2505 CBC         406 :     if (scan->rs_flags & SO_ALLOW_PAGEMODE)
    2506                 :     {
    2507                 :         /*
    2508                 :          * In pageatatime mode, heapgetpage() already did visibility checks,
    2509                 :          * so just look at the info it left in rs_vistuples[].
    2510                 :          *
    2511 ECB             :          * We use a binary search over the known-sorted array.  Note: we could
    2512                 :          * save some effort if we insisted that NextSampleTuple select tuples
    2513                 :          * in increasing order, but it's not clear that there would be enough
    2514                 :          * gain to justify the restriction.
    2515                 :          */
    2516 GIC         403 :         int         start = 0,
    2517 CBC         403 :                     end = hscan->rs_ntuples - 1;
    2518                 : 
    2519 GIC         776 :         while (start <= end)
    2520                 :         {
    2521             776 :             int         mid = (start + end) / 2;
    2522             776 :             OffsetNumber curoffset = hscan->rs_vistuples[mid];
    2523                 : 
    2524             776 :             if (tupoffset == curoffset)
    2525             403 :                 return true;
    2526             373 :             else if (tupoffset < curoffset)
    2527             145 :                 end = mid - 1;
    2528 ECB             :             else
    2529 CBC         228 :                 start = mid + 1;
    2530                 :         }
    2531 ECB             : 
    2532 UIC           0 :         return false;
    2533 ECB             :     }
    2534                 :     else
    2535                 :     {
    2536                 :         /* Otherwise, we have to check the tuple individually. */
    2537 CBC           3 :         return HeapTupleSatisfiesVisibility(tuple, scan->rs_snapshot,
    2538 ECB             :                                             buffer);
    2539                 :     }
    2540                 : }
    2541                 : 
    2542                 : 
    2543                 : /* ------------------------------------------------------------------------
    2544 EUB             :  * Definition of the heap table access method.
    2545                 :  * ------------------------------------------------------------------------
    2546                 :  */
    2547                 : 
    2548                 : static const TableAmRoutine heapam_methods = {
    2549 ECB             :     .type = T_TableAmRoutine,
    2550                 : 
    2551                 :     .slot_callbacks = heapam_slot_callbacks,
    2552                 : 
    2553                 :     .scan_begin = heap_beginscan,
    2554                 :     .scan_end = heap_endscan,
    2555                 :     .scan_rescan = heap_rescan,
    2556                 :     .scan_getnextslot = heap_getnextslot,
    2557                 : 
    2558                 :     .scan_set_tidrange = heap_set_tidrange,
    2559                 :     .scan_getnextslot_tidrange = heap_getnextslot_tidrange,
    2560                 : 
    2561                 :     .parallelscan_estimate = table_block_parallelscan_estimate,
    2562                 :     .parallelscan_initialize = table_block_parallelscan_initialize,
    2563                 :     .parallelscan_reinitialize = table_block_parallelscan_reinitialize,
    2564                 : 
    2565                 :     .index_fetch_begin = heapam_index_fetch_begin,
    2566                 :     .index_fetch_reset = heapam_index_fetch_reset,
    2567                 :     .index_fetch_end = heapam_index_fetch_end,
    2568                 :     .index_fetch_tuple = heapam_index_fetch_tuple,
    2569                 : 
    2570                 :     .tuple_insert = heapam_tuple_insert,
    2571                 :     .tuple_insert_speculative = heapam_tuple_insert_speculative,
    2572                 :     .tuple_complete_speculative = heapam_tuple_complete_speculative,
    2573                 :     .multi_insert = heap_multi_insert,
    2574                 :     .tuple_delete = heapam_tuple_delete,
    2575                 :     .tuple_update = heapam_tuple_update,
    2576                 :     .tuple_lock = heapam_tuple_lock,
    2577                 : 
    2578                 :     .tuple_fetch_row_version = heapam_fetch_row_version,
    2579                 :     .tuple_get_latest_tid = heap_get_latest_tid,
    2580                 :     .tuple_tid_valid = heapam_tuple_tid_valid,
    2581                 :     .tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
    2582                 :     .index_delete_tuples = heap_index_delete_tuples,
    2583                 : 
    2584                 :     .relation_set_new_filelocator = heapam_relation_set_new_filelocator,
    2585                 :     .relation_nontransactional_truncate = heapam_relation_nontransactional_truncate,
    2586                 :     .relation_copy_data = heapam_relation_copy_data,
    2587                 :     .relation_copy_for_cluster = heapam_relation_copy_for_cluster,
    2588                 :     .relation_vacuum = heap_vacuum_rel,
    2589                 :     .scan_analyze_next_block = heapam_scan_analyze_next_block,
    2590                 :     .scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
    2591                 :     .index_build_range_scan = heapam_index_build_range_scan,
    2592                 :     .index_validate_scan = heapam_index_validate_scan,
    2593                 : 
    2594                 :     .relation_size = table_block_relation_size,
    2595                 :     .relation_needs_toast_table = heapam_relation_needs_toast_table,
    2596                 :     .relation_toast_am = heapam_relation_toast_am,
    2597                 :     .relation_fetch_toast_slice = heap_fetch_toast_slice,
    2598                 : 
    2599                 :     .relation_estimate_size = heapam_estimate_rel_size,
    2600                 : 
    2601                 :     .scan_bitmap_next_block = heapam_scan_bitmap_next_block,
    2602                 :     .scan_bitmap_next_tuple = heapam_scan_bitmap_next_tuple,
    2603                 :     .scan_sample_next_block = heapam_scan_sample_next_block,
    2604                 :     .scan_sample_next_tuple = heapam_scan_sample_next_tuple
    2605                 : };
    2606                 : 
    2607                 : 
    2608                 : const TableAmRoutine *
    2609 GIC    14674113 : GetHeapamTableAmRoutine(void)
    2610                 : {
    2611        14674113 :     return &heapam_methods;
    2612                 : }
    2613                 : 
    2614                 : Datum
    2615          881174 : heap_tableam_handler(PG_FUNCTION_ARGS)
    2616                 : {
    2617          881174 :     PG_RETURN_POINTER(&heapam_methods);
    2618                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a