LCOV - differential code coverage report
Current view: top level - src/backend/storage/ipc - procarray.c (source / functions) Coverage Total Hit LBC UIC UBC GBC GIC GNC CBC EUB ECB DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 86.1 % 1439 1239 22 113 65 9 804 5 421 124 792 2 4
Current Date: 2023-04-08 15:15:32 Functions: 91.1 % 79 72 7 72 7 72
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * procarray.c
       4                 :  *    POSTGRES process array code.
       5                 :  *
       6                 :  *
       7                 :  * This module maintains arrays of PGPROC substructures, as well as associated
       8                 :  * arrays in ProcGlobal, for all active backends.  Although there are several
       9                 :  * uses for this, the principal one is as a means of determining the set of
      10                 :  * currently running transactions.
      11                 :  *
      12                 :  * Because of various subtle race conditions it is critical that a backend
      13                 :  * hold the correct locks while setting or clearing its xid (in
      14                 :  * ProcGlobal->xids[]/MyProc->xid).  See notes in
      15                 :  * src/backend/access/transam/README.
      16                 :  *
      17                 :  * The process arrays now also include structures representing prepared
      18                 :  * transactions.  The xid and subxids fields of these are valid, as are the
      19                 :  * myProcLocks lists.  They can be distinguished from regular backend PGPROCs
      20                 :  * at need by checking for pid == 0.
      21                 :  *
      22                 :  * During hot standby, we also keep a list of XIDs representing transactions
      23                 :  * that are known to be running on the primary (or more precisely, were running
      24                 :  * as of the current point in the WAL stream).  This list is kept in the
      25                 :  * KnownAssignedXids array, and is updated by watching the sequence of
      26                 :  * arriving XIDs.  This is necessary because if we leave those XIDs out of
      27                 :  * snapshots taken for standby queries, then they will appear to be already
      28                 :  * complete, leading to MVCC failures.  Note that in hot standby, the PGPROC
      29                 :  * array represents standby processes, which by definition are not running
      30                 :  * transactions that have XIDs.
      31                 :  *
      32                 :  * It is perhaps possible for a backend on the primary to terminate without
      33                 :  * writing an abort record for its transaction.  While that shouldn't really
      34                 :  * happen, it would tie up KnownAssignedXids indefinitely, so we protect
      35                 :  * ourselves by pruning the array when a valid list of running XIDs arrives.
      36                 :  *
      37                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      38                 :  * Portions Copyright (c) 1994, Regents of the University of California
      39                 :  *
      40                 :  *
      41                 :  * IDENTIFICATION
      42                 :  *    src/backend/storage/ipc/procarray.c
      43                 :  *
      44                 :  *-------------------------------------------------------------------------
      45                 :  */
      46                 : #include "postgres.h"
      47                 : 
      48                 : #include <signal.h>
      49                 : 
      50                 : #include "access/clog.h"
      51                 : #include "access/subtrans.h"
      52                 : #include "access/transam.h"
      53                 : #include "access/twophase.h"
      54                 : #include "access/xact.h"
      55                 : #include "access/xlogutils.h"
      56                 : #include "catalog/catalog.h"
      57                 : #include "catalog/pg_authid.h"
      58                 : #include "commands/dbcommands.h"
      59                 : #include "miscadmin.h"
      60                 : #include "pgstat.h"
      61                 : #include "port/pg_lfind.h"
      62                 : #include "storage/proc.h"
      63                 : #include "storage/procarray.h"
      64                 : #include "storage/spin.h"
      65                 : #include "utils/acl.h"
      66                 : #include "utils/builtins.h"
      67                 : #include "utils/rel.h"
      68                 : #include "utils/snapmgr.h"
      69                 : 
      70                 : #define UINT32_ACCESS_ONCE(var)      ((uint32)(*((volatile uint32 *)&(var))))
      71                 : 
      72                 : /* Our shared memory area */
      73                 : typedef struct ProcArrayStruct
      74                 : {
      75                 :     int         numProcs;       /* number of valid procs entries */
      76                 :     int         maxProcs;       /* allocated size of procs array */
      77                 : 
      78                 :     /*
      79                 :      * Known assigned XIDs handling
      80                 :      */
      81                 :     int         maxKnownAssignedXids;   /* allocated size of array */
      82                 :     int         numKnownAssignedXids;   /* current # of valid entries */
      83                 :     int         tailKnownAssignedXids;  /* index of oldest valid element */
      84                 :     int         headKnownAssignedXids;  /* index of newest element, + 1 */
      85                 :     slock_t     known_assigned_xids_lck;    /* protects head/tail pointers */
      86                 : 
      87                 :     /*
      88                 :      * Highest subxid that has been removed from KnownAssignedXids array to
      89                 :      * prevent overflow; or InvalidTransactionId if none.  We track this for
      90                 :      * similar reasons to tracking overflowing cached subxids in PGPROC
      91                 :      * entries.  Must hold exclusive ProcArrayLock to change this, and shared
      92                 :      * lock to read it.
      93                 :      */
      94                 :     TransactionId lastOverflowedXid;
      95                 : 
      96                 :     /* oldest xmin of any replication slot */
      97                 :     TransactionId replication_slot_xmin;
      98                 :     /* oldest catalog xmin of any replication slot */
      99                 :     TransactionId replication_slot_catalog_xmin;
     100                 : 
     101                 :     /* indexes into allProcs[], has PROCARRAY_MAXPROCS entries */
     102                 :     int         pgprocnos[FLEXIBLE_ARRAY_MEMBER];
     103                 : } ProcArrayStruct;
     104                 : 
     105                 : /*
     106                 :  * State for the GlobalVisTest* family of functions. Those functions can
     107                 :  * e.g. be used to decide if a deleted row can be removed without violating
     108                 :  * MVCC semantics: If the deleted row's xmax is not considered to be running
     109                 :  * by anyone, the row can be removed.
     110                 :  *
     111                 :  * To avoid slowing down GetSnapshotData(), we don't calculate a precise
     112                 :  * cutoff XID while building a snapshot (looking at the frequently changing
     113                 :  * xmins scales badly). Instead we compute two boundaries while building the
     114                 :  * snapshot:
     115                 :  *
     116                 :  * 1) definitely_needed, indicating that rows deleted by XIDs >=
     117                 :  *    definitely_needed are definitely still visible.
     118                 :  *
     119                 :  * 2) maybe_needed, indicating that rows deleted by XIDs < maybe_needed can
     120                 :  *    definitely be removed
     121                 :  *
     122                 :  * When testing an XID that falls in between the two (i.e. XID >= maybe_needed
     123                 :  * && XID < definitely_needed), the boundaries can be recomputed (using
     124                 :  * ComputeXidHorizons()) to get a more accurate answer. This is cheaper than
     125                 :  * maintaining an accurate value all the time.
     126                 :  *
     127                 :  * As it is not cheap to compute accurate boundaries, we limit the number of
     128                 :  * times that happens in short succession. See GlobalVisTestShouldUpdate().
     129                 :  *
     130                 :  *
     131                 :  * There are three backend lifetime instances of this struct, optimized for
     132                 :  * different types of relations. As e.g. a normal user defined table in one
     133                 :  * database is inaccessible to backends connected to another database, a test
     134                 :  * specific to a relation can be more aggressive than a test for a shared
     135                 :  * relation.  Currently we track four different states:
     136                 :  *
     137                 :  * 1) GlobalVisSharedRels, which only considers an XID's
     138                 :  *    effects visible-to-everyone if neither snapshots in any database, nor a
     139                 :  *    replication slot's xmin, nor a replication slot's catalog_xmin might
     140                 :  *    still consider XID as running.
     141                 :  *
     142                 :  * 2) GlobalVisCatalogRels, which only considers an XID's
     143                 :  *    effects visible-to-everyone if neither snapshots in the current
     144                 :  *    database, nor a replication slot's xmin, nor a replication slot's
     145                 :  *    catalog_xmin might still consider XID as running.
     146                 :  *
     147                 :  *    I.e. the difference to GlobalVisSharedRels is that
     148                 :  *    snapshot in other databases are ignored.
     149                 :  *
     150                 :  * 3) GlobalVisDataRels, which only considers an XID's
     151                 :  *    effects visible-to-everyone if neither snapshots in the current
     152                 :  *    database, nor a replication slot's xmin consider XID as running.
     153                 :  *
     154                 :  *    I.e. the difference to GlobalVisCatalogRels is that
     155                 :  *    replication slot's catalog_xmin is not taken into account.
     156                 :  *
     157                 :  * 4) GlobalVisTempRels, which only considers the current session, as temp
     158                 :  *    tables are not visible to other sessions.
     159                 :  *
     160                 :  * GlobalVisTestFor(relation) returns the appropriate state
     161                 :  * for the relation.
     162                 :  *
     163                 :  * The boundaries are FullTransactionIds instead of TransactionIds to avoid
     164                 :  * wraparound dangers. There e.g. would otherwise exist no procarray state to
     165                 :  * prevent maybe_needed to become old enough after the GetSnapshotData()
     166                 :  * call.
     167                 :  *
     168                 :  * The typedef is in the header.
     169                 :  */
     170                 : struct GlobalVisState
     171                 : {
     172                 :     /* XIDs >= are considered running by some backend */
     173                 :     FullTransactionId definitely_needed;
     174                 : 
     175                 :     /* XIDs < are not considered to be running by any backend */
     176                 :     FullTransactionId maybe_needed;
     177                 : };
     178                 : 
     179                 : /*
     180                 :  * Result of ComputeXidHorizons().
     181                 :  */
     182                 : typedef struct ComputeXidHorizonsResult
     183                 : {
     184                 :     /*
     185                 :      * The value of ShmemVariableCache->latestCompletedXid when
     186                 :      * ComputeXidHorizons() held ProcArrayLock.
     187                 :      */
     188                 :     FullTransactionId latest_completed;
     189                 : 
     190                 :     /*
     191                 :      * The same for procArray->replication_slot_xmin and.
     192                 :      * procArray->replication_slot_catalog_xmin.
     193                 :      */
     194                 :     TransactionId slot_xmin;
     195                 :     TransactionId slot_catalog_xmin;
     196                 : 
     197                 :     /*
     198                 :      * Oldest xid that any backend might still consider running. This needs to
     199                 :      * include processes running VACUUM, in contrast to the normal visibility
     200                 :      * cutoffs, as vacuum needs to be able to perform pg_subtrans lookups when
     201                 :      * determining visibility, but doesn't care about rows above its xmin to
     202                 :      * be removed.
     203                 :      *
     204                 :      * This likely should only be needed to determine whether pg_subtrans can
     205                 :      * be truncated. It currently includes the effects of replication slots,
     206                 :      * for historical reasons. But that could likely be changed.
     207                 :      */
     208                 :     TransactionId oldest_considered_running;
     209                 : 
     210                 :     /*
     211                 :      * Oldest xid for which deleted tuples need to be retained in shared
     212                 :      * tables.
     213                 :      *
     214                 :      * This includes the effects of replication slots. If that's not desired,
     215                 :      * look at shared_oldest_nonremovable_raw;
     216                 :      */
     217                 :     TransactionId shared_oldest_nonremovable;
     218                 : 
     219                 :     /*
     220                 :      * Oldest xid that may be necessary to retain in shared tables. This is
     221                 :      * the same as shared_oldest_nonremovable, except that is not affected by
     222                 :      * replication slot's catalog_xmin.
     223                 :      *
     224                 :      * This is mainly useful to be able to send the catalog_xmin to upstream
     225                 :      * streaming replication servers via hot_standby_feedback, so they can
     226                 :      * apply the limit only when accessing catalog tables.
     227                 :      */
     228                 :     TransactionId shared_oldest_nonremovable_raw;
     229                 : 
     230                 :     /*
     231                 :      * Oldest xid for which deleted tuples need to be retained in non-shared
     232                 :      * catalog tables.
     233                 :      */
     234                 :     TransactionId catalog_oldest_nonremovable;
     235                 : 
     236                 :     /*
     237                 :      * Oldest xid for which deleted tuples need to be retained in normal user
     238                 :      * defined tables.
     239                 :      */
     240                 :     TransactionId data_oldest_nonremovable;
     241                 : 
     242                 :     /*
     243                 :      * Oldest xid for which deleted tuples need to be retained in this
     244                 :      * session's temporary tables.
     245                 :      */
     246                 :     TransactionId temp_oldest_nonremovable;
     247                 : } ComputeXidHorizonsResult;
     248                 : 
     249                 : /*
     250                 :  * Return value for GlobalVisHorizonKindForRel().
     251                 :  */
     252                 : typedef enum GlobalVisHorizonKind
     253                 : {
     254                 :     VISHORIZON_SHARED,
     255                 :     VISHORIZON_CATALOG,
     256                 :     VISHORIZON_DATA,
     257                 :     VISHORIZON_TEMP
     258                 : } GlobalVisHorizonKind;
     259                 : 
     260                 : /*
     261                 :  * Reason codes for KnownAssignedXidsCompress().
     262                 :  */
     263                 : typedef enum KAXCompressReason
     264                 : {
     265                 :     KAX_NO_SPACE,               /* need to free up space at array end */
     266                 :     KAX_PRUNE,                  /* we just pruned old entries */
     267                 :     KAX_TRANSACTION_END,        /* we just committed/removed some XIDs */
     268                 :     KAX_STARTUP_PROCESS_IDLE    /* startup process is about to sleep */
     269                 : } KAXCompressReason;
     270                 : 
     271                 : 
     272                 : static ProcArrayStruct *procArray;
     273                 : 
     274                 : static PGPROC *allProcs;
     275                 : 
     276                 : /*
     277                 :  * Cache to reduce overhead of repeated calls to TransactionIdIsInProgress()
     278                 :  */
     279                 : static TransactionId cachedXidIsNotInProgress = InvalidTransactionId;
     280                 : 
     281                 : /*
     282                 :  * Bookkeeping for tracking emulated transactions in recovery
     283                 :  */
     284                 : static TransactionId *KnownAssignedXids;
     285                 : static bool *KnownAssignedXidsValid;
     286                 : static TransactionId latestObservedXid = InvalidTransactionId;
     287                 : 
     288                 : /*
     289                 :  * If we're in STANDBY_SNAPSHOT_PENDING state, standbySnapshotPendingXmin is
     290                 :  * the highest xid that might still be running that we don't have in
     291                 :  * KnownAssignedXids.
     292                 :  */
     293                 : static TransactionId standbySnapshotPendingXmin;
     294                 : 
     295                 : /*
     296                 :  * State for visibility checks on different types of relations. See struct
     297                 :  * GlobalVisState for details. As shared, catalog, normal and temporary
     298                 :  * relations can have different horizons, one such state exists for each.
     299                 :  */
     300                 : static GlobalVisState GlobalVisSharedRels;
     301                 : static GlobalVisState GlobalVisCatalogRels;
     302                 : static GlobalVisState GlobalVisDataRels;
     303                 : static GlobalVisState GlobalVisTempRels;
     304                 : 
     305                 : /*
     306                 :  * This backend's RecentXmin at the last time the accurate xmin horizon was
     307                 :  * recomputed, or InvalidTransactionId if it has not. Used to limit how many
     308                 :  * times accurate horizons are recomputed. See GlobalVisTestShouldUpdate().
     309                 :  */
     310                 : static TransactionId ComputeXidHorizonsResultLastXmin;
     311                 : 
     312                 : #ifdef XIDCACHE_DEBUG
     313                 : 
     314                 : /* counters for XidCache measurement */
     315                 : static long xc_by_recent_xmin = 0;
     316                 : static long xc_by_known_xact = 0;
     317                 : static long xc_by_my_xact = 0;
     318                 : static long xc_by_latest_xid = 0;
     319                 : static long xc_by_main_xid = 0;
     320                 : static long xc_by_child_xid = 0;
     321                 : static long xc_by_known_assigned = 0;
     322                 : static long xc_no_overflow = 0;
     323                 : static long xc_slow_answer = 0;
     324                 : 
     325                 : #define xc_by_recent_xmin_inc()     (xc_by_recent_xmin++)
     326                 : #define xc_by_known_xact_inc()      (xc_by_known_xact++)
     327                 : #define xc_by_my_xact_inc()         (xc_by_my_xact++)
     328                 : #define xc_by_latest_xid_inc()      (xc_by_latest_xid++)
     329                 : #define xc_by_main_xid_inc()        (xc_by_main_xid++)
     330                 : #define xc_by_child_xid_inc()       (xc_by_child_xid++)
     331                 : #define xc_by_known_assigned_inc()  (xc_by_known_assigned++)
     332                 : #define xc_no_overflow_inc()        (xc_no_overflow++)
     333                 : #define xc_slow_answer_inc()        (xc_slow_answer++)
     334                 : 
     335                 : static void DisplayXidCache(void);
     336                 : #else                           /* !XIDCACHE_DEBUG */
     337                 : 
     338                 : #define xc_by_recent_xmin_inc()     ((void) 0)
     339                 : #define xc_by_known_xact_inc()      ((void) 0)
     340                 : #define xc_by_my_xact_inc()         ((void) 0)
     341                 : #define xc_by_latest_xid_inc()      ((void) 0)
     342                 : #define xc_by_main_xid_inc()        ((void) 0)
     343                 : #define xc_by_child_xid_inc()       ((void) 0)
     344                 : #define xc_by_known_assigned_inc()  ((void) 0)
     345                 : #define xc_no_overflow_inc()        ((void) 0)
     346                 : #define xc_slow_answer_inc()        ((void) 0)
     347                 : #endif                          /* XIDCACHE_DEBUG */
     348                 : 
     349                 : /* Primitives for KnownAssignedXids array handling for standby */
     350                 : static void KnownAssignedXidsCompress(KAXCompressReason reason, bool haveLock);
     351                 : static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
     352                 :                                  bool exclusive_lock);
     353                 : static bool KnownAssignedXidsSearch(TransactionId xid, bool remove);
     354                 : static bool KnownAssignedXidExists(TransactionId xid);
     355                 : static void KnownAssignedXidsRemove(TransactionId xid);
     356                 : static void KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids,
     357                 :                                         TransactionId *subxids);
     358                 : static void KnownAssignedXidsRemovePreceding(TransactionId removeXid);
     359                 : static int  KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax);
     360                 : static int  KnownAssignedXidsGetAndSetXmin(TransactionId *xarray,
     361                 :                                            TransactionId *xmin,
     362                 :                                            TransactionId xmax);
     363                 : static TransactionId KnownAssignedXidsGetOldestXmin(void);
     364                 : static void KnownAssignedXidsDisplay(int trace_level);
     365                 : static void KnownAssignedXidsReset(void);
     366                 : static inline void ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid);
     367                 : static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid);
     368                 : static void MaintainLatestCompletedXid(TransactionId latestXid);
     369                 : static void MaintainLatestCompletedXidRecovery(TransactionId latestXid);
     370                 : static void TransactionIdRetreatSafely(TransactionId *xid,
     371                 :                                        int retreat_by,
     372                 :                                        FullTransactionId rel);
     373                 : 
     374                 : static inline FullTransactionId FullXidRelativeTo(FullTransactionId rel,
     375                 :                                                   TransactionId xid);
     376                 : static void GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons);
     377                 : 
     378                 : /*
     379                 :  * Report shared-memory space needed by CreateSharedProcArray.
     380                 :  */
     381                 : Size
     382 GIC        2738 : ProcArrayShmemSize(void)
     383 ECB             : {
     384                 :     Size        size;
     385                 : 
     386                 :     /* Size of the ProcArray structure itself */
     387                 : #define PROCARRAY_MAXPROCS  (MaxBackends + max_prepared_xacts)
     388                 : 
     389 GIC        2738 :     size = offsetof(ProcArrayStruct, pgprocnos);
     390 CBC        2738 :     size = add_size(size, mul_size(sizeof(int), PROCARRAY_MAXPROCS));
     391 ECB             : 
     392                 :     /*
     393                 :      * During Hot Standby processing we have a data structure called
     394                 :      * KnownAssignedXids, created in shared memory. Local data structures are
     395                 :      * also created in various backends during GetSnapshotData(),
     396                 :      * TransactionIdIsInProgress() and GetRunningTransactionData(). All of the
     397                 :      * main structures created in those functions must be identically sized,
     398                 :      * since we may at times copy the whole of the data structures around. We
     399                 :      * refer to this size as TOTAL_MAX_CACHED_SUBXIDS.
     400                 :      *
     401                 :      * Ideally we'd only create this structure if we were actually doing hot
     402                 :      * standby in the current run, but we don't know that yet at the time
     403                 :      * shared memory is being set up.
     404                 :      */
     405                 : #define TOTAL_MAX_CACHED_SUBXIDS \
     406                 :     ((PGPROC_MAX_CACHED_SUBXIDS + 1) * PROCARRAY_MAXPROCS)
     407                 : 
     408 GIC        2738 :     if (EnableHotStandby)
     409 ECB             :     {
     410 GIC        2728 :         size = add_size(size,
     411 ECB             :                         mul_size(sizeof(TransactionId),
     412 GIC        2728 :                                  TOTAL_MAX_CACHED_SUBXIDS));
     413 CBC        2728 :         size = add_size(size,
     414            2728 :                         mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS));
     415 ECB             :     }
     416                 : 
     417 GIC        2738 :     return size;
     418 ECB             : }
     419                 : 
     420                 : /*
     421                 :  * Initialize the shared PGPROC array during postmaster startup.
     422                 :  */
     423                 : void
     424 GIC        1826 : CreateSharedProcArray(void)
     425 ECB             : {
     426                 :     bool        found;
     427                 : 
     428                 :     /* Create or attach to the ProcArray shared structure */
     429 GIC        1826 :     procArray = (ProcArrayStruct *)
     430 CBC        1826 :         ShmemInitStruct("Proc Array",
     431 ECB             :                         add_size(offsetof(ProcArrayStruct, pgprocnos),
     432                 :                                  mul_size(sizeof(int),
     433 GIC        1826 :                                           PROCARRAY_MAXPROCS)),
     434 ECB             :                         &found);
     435                 : 
     436 GIC        1826 :     if (!found)
     437 ECB             :     {
     438                 :         /*
     439                 :          * We're the first - initialize.
     440                 :          */
     441 GIC        1826 :         procArray->numProcs = 0;
     442 CBC        1826 :         procArray->maxProcs = PROCARRAY_MAXPROCS;
     443            1826 :         procArray->maxKnownAssignedXids = TOTAL_MAX_CACHED_SUBXIDS;
     444            1826 :         procArray->numKnownAssignedXids = 0;
     445            1826 :         procArray->tailKnownAssignedXids = 0;
     446            1826 :         procArray->headKnownAssignedXids = 0;
     447            1826 :         SpinLockInit(&procArray->known_assigned_xids_lck);
     448            1826 :         procArray->lastOverflowedXid = InvalidTransactionId;
     449            1826 :         procArray->replication_slot_xmin = InvalidTransactionId;
     450            1826 :         procArray->replication_slot_catalog_xmin = InvalidTransactionId;
     451            1826 :         ShmemVariableCache->xactCompletionCount = 1;
     452 ECB             :     }
     453                 : 
     454 GIC        1826 :     allProcs = ProcGlobal->allProcs;
     455 ECB             : 
     456                 :     /* Create or attach to the KnownAssignedXids arrays too, if needed */
     457 GIC        1826 :     if (EnableHotStandby)
     458 ECB             :     {
     459 GIC        1821 :         KnownAssignedXids = (TransactionId *)
     460 CBC        1821 :             ShmemInitStruct("KnownAssignedXids",
     461 ECB             :                             mul_size(sizeof(TransactionId),
     462 GIC        1821 :                                      TOTAL_MAX_CACHED_SUBXIDS),
     463 ECB             :                             &found);
     464 GIC        1821 :         KnownAssignedXidsValid = (bool *)
     465 CBC        1821 :             ShmemInitStruct("KnownAssignedXidsValid",
     466            1821 :                             mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS),
     467 ECB             :                             &found);
     468                 :     }
     469 GIC        1826 : }
     470 ECB             : 
     471                 : /*
     472                 :  * Add the specified PGPROC to the shared array.
     473                 :  */
     474                 : void
     475 GIC       11890 : ProcArrayAdd(PGPROC *proc)
     476 ECB             : {
     477 GIC       11890 :     ProcArrayStruct *arrayP = procArray;
     478 ECB             :     int         index;
     479                 :     int         movecount;
     480                 : 
     481                 :     /* See ProcGlobal comment explaining why both locks are held */
     482 GIC       11890 :     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
     483 CBC       11890 :     LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
     484 ECB             : 
     485 GIC       11890 :     if (arrayP->numProcs >= arrayP->maxProcs)
     486 ECB             :     {
     487                 :         /*
     488                 :          * Oops, no room.  (This really shouldn't happen, since there is a
     489                 :          * fixed supply of PGPROC structs too, and so we should have failed
     490                 :          * earlier.)
     491                 :          */
     492 UIC           0 :         ereport(FATAL,
     493 EUB             :                 (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
     494                 :                  errmsg("sorry, too many clients already")));
     495                 :     }
     496                 : 
     497                 :     /*
     498                 :      * Keep the procs array sorted by (PGPROC *) so that we can utilize
     499                 :      * locality of references much better. This is useful while traversing the
     500                 :      * ProcArray because there is an increased likelihood of finding the next
     501                 :      * PGPROC structure in the cache.
     502                 :      *
     503                 :      * Since the occurrence of adding/removing a proc is much lower than the
     504                 :      * access to the ProcArray itself, the overhead should be marginal
     505                 :      */
     506 GIC       23473 :     for (index = 0; index < arrayP->numProcs; index++)
     507 ECB             :     {
     508 GIC       21448 :         int         procno PG_USED_FOR_ASSERTS_ONLY = arrayP->pgprocnos[index];
     509 ECB             : 
     510 GIC       21448 :         Assert(procno >= 0 && procno < (arrayP->maxProcs + NUM_AUXILIARY_PROCS));
     511 CBC       21448 :         Assert(allProcs[procno].pgxactoff == index);
     512 ECB             : 
     513                 :         /* If we have found our right position in the array, break */
     514 GIC       21448 :         if (arrayP->pgprocnos[index] > proc->pgprocno)
     515 CBC        9865 :             break;
     516 ECB             :     }
     517                 : 
     518 GIC       11890 :     movecount = arrayP->numProcs - index;
     519 CBC       11890 :     memmove(&arrayP->pgprocnos[index + 1],
     520           11890 :             &arrayP->pgprocnos[index],
     521 ECB             :             movecount * sizeof(*arrayP->pgprocnos));
     522 GIC       11890 :     memmove(&ProcGlobal->xids[index + 1],
     523 CBC       11890 :             &ProcGlobal->xids[index],
     524 ECB             :             movecount * sizeof(*ProcGlobal->xids));
     525 GIC       11890 :     memmove(&ProcGlobal->subxidStates[index + 1],
     526 CBC       11890 :             &ProcGlobal->subxidStates[index],
     527 ECB             :             movecount * sizeof(*ProcGlobal->subxidStates));
     528 GIC       11890 :     memmove(&ProcGlobal->statusFlags[index + 1],
     529 CBC       11890 :             &ProcGlobal->statusFlags[index],
     530 ECB             :             movecount * sizeof(*ProcGlobal->statusFlags));
     531                 : 
     532 GIC       11890 :     arrayP->pgprocnos[index] = proc->pgprocno;
     533 CBC       11890 :     proc->pgxactoff = index;
     534           11890 :     ProcGlobal->xids[index] = proc->xid;
     535           11890 :     ProcGlobal->subxidStates[index] = proc->subxidStatus;
     536           11890 :     ProcGlobal->statusFlags[index] = proc->statusFlags;
     537 ECB             : 
     538 GIC       11890 :     arrayP->numProcs++;
     539 ECB             : 
     540                 :     /* adjust pgxactoff for all following PGPROCs */
     541 GIC       11890 :     index++;
     542 CBC       38685 :     for (; index < arrayP->numProcs; index++)
     543 ECB             :     {
     544 GIC       26795 :         int         procno = arrayP->pgprocnos[index];
     545 ECB             : 
     546 GIC       26795 :         Assert(procno >= 0 && procno < (arrayP->maxProcs + NUM_AUXILIARY_PROCS));
     547 CBC       26795 :         Assert(allProcs[procno].pgxactoff == index - 1);
     548 ECB             : 
     549 GIC       26795 :         allProcs[procno].pgxactoff = index;
     550 ECB             :     }
     551                 : 
     552                 :     /*
     553                 :      * Release in reversed acquisition order, to reduce frequency of having to
     554                 :      * wait for XidGenLock while holding ProcArrayLock.
     555                 :      */
     556 GIC       11890 :     LWLockRelease(XidGenLock);
     557 CBC       11890 :     LWLockRelease(ProcArrayLock);
     558           11890 : }
     559 ECB             : 
     560                 : /*
     561                 :  * Remove the specified PGPROC from the shared array.
     562                 :  *
     563                 :  * When latestXid is a valid XID, we are removing a live 2PC gxact from the
     564                 :  * array, and thus causing it to appear as "not running" anymore.  In this
     565                 :  * case we must advance latestCompletedXid.  (This is essentially the same
     566                 :  * as ProcArrayEndTransaction followed by removal of the PGPROC, but we take
     567                 :  * the ProcArrayLock only once, and don't damage the content of the PGPROC;
     568                 :  * twophase.c depends on the latter.)
     569                 :  */
     570                 : void
     571 GIC       11864 : ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
     572 ECB             : {
     573 GIC       11864 :     ProcArrayStruct *arrayP = procArray;
     574 ECB             :     int         myoff;
     575                 :     int         movecount;
     576                 : 
     577                 : #ifdef XIDCACHE_DEBUG
     578                 :     /* dump stats at backend shutdown, but not prepared-xact end */
     579                 :     if (proc->pid != 0)
     580                 :         DisplayXidCache();
     581                 : #endif
     582                 : 
     583                 :     /* See ProcGlobal comment explaining why both locks are held */
     584 GIC       11864 :     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
     585 CBC       11864 :     LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
     586 ECB             : 
     587 GIC       11864 :     myoff = proc->pgxactoff;
     588 ECB             : 
     589 GIC       11864 :     Assert(myoff >= 0 && myoff < arrayP->numProcs);
     590 CBC       11864 :     Assert(ProcGlobal->allProcs[arrayP->pgprocnos[myoff]].pgxactoff == myoff);
     591 ECB             : 
     592 GIC       11864 :     if (TransactionIdIsValid(latestXid))
     593 ECB             :     {
     594 GIC         365 :         Assert(TransactionIdIsValid(ProcGlobal->xids[myoff]));
     595 ECB             : 
     596                 :         /* Advance global latestCompletedXid while holding the lock */
     597 GIC         365 :         MaintainLatestCompletedXid(latestXid);
     598 ECB             : 
     599                 :         /* Same with xactCompletionCount  */
     600 GIC         365 :         ShmemVariableCache->xactCompletionCount++;
     601 ECB             : 
     602 GIC         365 :         ProcGlobal->xids[myoff] = InvalidTransactionId;
     603 CBC         365 :         ProcGlobal->subxidStates[myoff].overflowed = false;
     604             365 :         ProcGlobal->subxidStates[myoff].count = 0;
     605 ECB             :     }
     606                 :     else
     607                 :     {
     608                 :         /* Shouldn't be trying to remove a live transaction here */
     609 GIC       11499 :         Assert(!TransactionIdIsValid(ProcGlobal->xids[myoff]));
     610 ECB             :     }
     611                 : 
     612 GIC       11864 :     Assert(!TransactionIdIsValid(ProcGlobal->xids[myoff]));
     613 CBC       11864 :     Assert(ProcGlobal->subxidStates[myoff].count == 0);
     614           11864 :     Assert(ProcGlobal->subxidStates[myoff].overflowed == false);
     615 ECB             : 
     616 GIC       11864 :     ProcGlobal->statusFlags[myoff] = 0;
     617 ECB             : 
     618                 :     /* Keep the PGPROC array sorted. See notes above */
     619 GIC       11864 :     movecount = arrayP->numProcs - myoff - 1;
     620 CBC       11864 :     memmove(&arrayP->pgprocnos[myoff],
     621           11864 :             &arrayP->pgprocnos[myoff + 1],
     622 ECB             :             movecount * sizeof(*arrayP->pgprocnos));
     623 GIC       11864 :     memmove(&ProcGlobal->xids[myoff],
     624 CBC       11864 :             &ProcGlobal->xids[myoff + 1],
     625 ECB             :             movecount * sizeof(*ProcGlobal->xids));
     626 GIC       11864 :     memmove(&ProcGlobal->subxidStates[myoff],
     627 CBC       11864 :             &ProcGlobal->subxidStates[myoff + 1],
     628 ECB             :             movecount * sizeof(*ProcGlobal->subxidStates));
     629 GIC       11864 :     memmove(&ProcGlobal->statusFlags[myoff],
     630 CBC       11864 :             &ProcGlobal->statusFlags[myoff + 1],
     631 ECB             :             movecount * sizeof(*ProcGlobal->statusFlags));
     632                 : 
     633 GIC       11864 :     arrayP->pgprocnos[arrayP->numProcs - 1] = -1; /* for debugging */
     634 CBC       11864 :     arrayP->numProcs--;
     635 ECB             : 
     636                 :     /*
     637                 :      * Adjust pgxactoff of following procs for removed PGPROC (note that
     638                 :      * numProcs already has been decremented).
     639                 :      */
     640 GIC       36833 :     for (int index = myoff; index < arrayP->numProcs; index++)
     641 ECB             :     {
     642 GIC       24969 :         int         procno = arrayP->pgprocnos[index];
     643 ECB             : 
     644 GIC       24969 :         Assert(procno >= 0 && procno < (arrayP->maxProcs + NUM_AUXILIARY_PROCS));
     645 CBC       24969 :         Assert(allProcs[procno].pgxactoff - 1 == index);
     646 ECB             : 
     647 GIC       24969 :         allProcs[procno].pgxactoff = index;
     648 ECB             :     }
     649                 : 
     650                 :     /*
     651                 :      * Release in reversed acquisition order, to reduce frequency of having to
     652                 :      * wait for XidGenLock while holding ProcArrayLock.
     653                 :      */
     654 GIC       11864 :     LWLockRelease(XidGenLock);
     655 CBC       11864 :     LWLockRelease(ProcArrayLock);
     656           11864 : }
     657 ECB             : 
     658                 : 
     659                 : /*
     660                 :  * ProcArrayEndTransaction -- mark a transaction as no longer running
     661                 :  *
     662                 :  * This is used interchangeably for commit and abort cases.  The transaction
     663                 :  * commit/abort must already be reported to WAL and pg_xact.
     664                 :  *
     665                 :  * proc is currently always MyProc, but we pass it explicitly for flexibility.
     666                 :  * latestXid is the latest Xid among the transaction's main XID and
     667                 :  * subtransactions, or InvalidTransactionId if it has no XID.  (We must ask
     668                 :  * the caller to pass latestXid, instead of computing it from the PGPROC's
     669                 :  * contents, because the subxid information in the PGPROC might be
     670                 :  * incomplete.)
     671                 :  */
     672                 : void
     673 GIC      485806 : ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
     674 ECB             : {
     675 GIC      485806 :     if (TransactionIdIsValid(latestXid))
     676 ECB             :     {
     677                 :         /*
     678                 :          * We must lock ProcArrayLock while clearing our advertised XID, so
     679                 :          * that we do not exit the set of "running" transactions while someone
     680                 :          * else is taking a snapshot.  See discussion in
     681                 :          * src/backend/access/transam/README.
     682                 :          */
     683 GIC      297704 :         Assert(TransactionIdIsValid(proc->xid));
     684 ECB             : 
     685                 :         /*
     686                 :          * If we can immediately acquire ProcArrayLock, we clear our own XID
     687                 :          * and release the lock.  If not, use group XID clearing to improve
     688                 :          * efficiency.
     689                 :          */
     690 GIC      297704 :         if (LWLockConditionalAcquire(ProcArrayLock, LW_EXCLUSIVE))
     691 ECB             :         {
     692 GIC      297529 :             ProcArrayEndTransactionInternal(proc, latestXid);
     693 CBC      297529 :             LWLockRelease(ProcArrayLock);
     694 ECB             :         }
     695                 :         else
     696 GIC         175 :             ProcArrayGroupClearXid(proc, latestXid);
     697 ECB             :     }
     698                 :     else
     699                 :     {
     700                 :         /*
     701                 :          * If we have no XID, we don't need to lock, since we won't affect
     702                 :          * anyone else's calculation of a snapshot.  We might change their
     703                 :          * estimate of global xmin, but that's OK.
     704                 :          */
     705 GIC      188102 :         Assert(!TransactionIdIsValid(proc->xid));
     706 CBC      188102 :         Assert(proc->subxidStatus.count == 0);
     707          188102 :         Assert(!proc->subxidStatus.overflowed);
     708 ECB             : 
     709 GIC      188102 :         proc->lxid = InvalidLocalTransactionId;
     710 CBC      188102 :         proc->xmin = InvalidTransactionId;
     711 ECB             : 
     712                 :         /* be sure this is cleared in abort */
     713 GIC      188102 :         proc->delayChkptFlags = 0;
     714 ECB             : 
     715 GIC      188102 :         proc->recoveryConflictPending = false;
     716 ECB             : 
     717                 :         /* must be cleared with xid/xmin: */
     718                 :         /* avoid unnecessarily dirtying shared cachelines */
     719 GIC      188102 :         if (proc->statusFlags & PROC_VACUUM_STATE_MASK)
     720 ECB             :         {
     721 GIC       37331 :             Assert(!LWLockHeldByMe(ProcArrayLock));
     722 CBC       37331 :             LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
     723           37331 :             Assert(proc->statusFlags == ProcGlobal->statusFlags[proc->pgxactoff]);
     724           37331 :             proc->statusFlags &= ~PROC_VACUUM_STATE_MASK;
     725           37331 :             ProcGlobal->statusFlags[proc->pgxactoff] = proc->statusFlags;
     726           37331 :             LWLockRelease(ProcArrayLock);
     727 ECB             :         }
     728                 :     }
     729 GIC      485806 : }
     730 ECB             : 
     731                 : /*
     732                 :  * Mark a write transaction as no longer running.
     733                 :  *
     734                 :  * We don't do any locking here; caller must handle that.
     735                 :  */
     736                 : static inline void
     737 GIC      297704 : ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid)
     738 ECB             : {
     739 GIC      297704 :     int         pgxactoff = proc->pgxactoff;
     740 ECB             : 
     741                 :     /*
     742                 :      * Note: we need exclusive lock here because we're going to change other
     743                 :      * processes' PGPROC entries.
     744                 :      */
     745 GIC      297704 :     Assert(LWLockHeldByMeInMode(ProcArrayLock, LW_EXCLUSIVE));
     746 CBC      297704 :     Assert(TransactionIdIsValid(ProcGlobal->xids[pgxactoff]));
     747          297704 :     Assert(ProcGlobal->xids[pgxactoff] == proc->xid);
     748 ECB             : 
     749 GIC      297704 :     ProcGlobal->xids[pgxactoff] = InvalidTransactionId;
     750 CBC      297704 :     proc->xid = InvalidTransactionId;
     751          297704 :     proc->lxid = InvalidLocalTransactionId;
     752          297704 :     proc->xmin = InvalidTransactionId;
     753 ECB             : 
     754                 :     /* be sure this is cleared in abort */
     755 GIC      297704 :     proc->delayChkptFlags = 0;
     756 ECB             : 
     757 GIC      297704 :     proc->recoveryConflictPending = false;
     758 ECB             : 
     759                 :     /* must be cleared with xid/xmin: */
     760                 :     /* avoid unnecessarily dirtying shared cachelines */
     761 GIC      297704 :     if (proc->statusFlags & PROC_VACUUM_STATE_MASK)
     762 ECB             :     {
     763 GIC         534 :         proc->statusFlags &= ~PROC_VACUUM_STATE_MASK;
     764 CBC         534 :         ProcGlobal->statusFlags[proc->pgxactoff] = proc->statusFlags;
     765 ECB             :     }
     766                 : 
     767                 :     /* Clear the subtransaction-XID cache too while holding the lock */
     768 GIC      297704 :     Assert(ProcGlobal->subxidStates[pgxactoff].count == proc->subxidStatus.count &&
     769 ECB             :            ProcGlobal->subxidStates[pgxactoff].overflowed == proc->subxidStatus.overflowed);
     770 GIC      297704 :     if (proc->subxidStatus.count > 0 || proc->subxidStatus.overflowed)
     771 ECB             :     {
     772 GIC         466 :         ProcGlobal->subxidStates[pgxactoff].count = 0;
     773 CBC         466 :         ProcGlobal->subxidStates[pgxactoff].overflowed = false;
     774             466 :         proc->subxidStatus.count = 0;
     775             466 :         proc->subxidStatus.overflowed = false;
     776 ECB             :     }
     777                 : 
     778                 :     /* Also advance global latestCompletedXid while holding the lock */
     779 GIC      297704 :     MaintainLatestCompletedXid(latestXid);
     780 ECB             : 
     781                 :     /* Same with xactCompletionCount  */
     782 GIC      297704 :     ShmemVariableCache->xactCompletionCount++;
     783 CBC      297704 : }
     784 ECB             : 
     785                 : /*
     786                 :  * ProcArrayGroupClearXid -- group XID clearing
     787                 :  *
     788                 :  * When we cannot immediately acquire ProcArrayLock in exclusive mode at
     789                 :  * commit time, add ourselves to a list of processes that need their XIDs
     790                 :  * cleared.  The first process to add itself to the list will acquire
     791                 :  * ProcArrayLock in exclusive mode and perform ProcArrayEndTransactionInternal
     792                 :  * on behalf of all group members.  This avoids a great deal of contention
     793                 :  * around ProcArrayLock when many processes are trying to commit at once,
     794                 :  * since the lock need not be repeatedly handed off from one committing
     795                 :  * process to the next.
     796                 :  */
     797                 : static void
     798 GIC         175 : ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid)
     799 ECB             : {
     800 GIC         175 :     PROC_HDR   *procglobal = ProcGlobal;
     801 ECB             :     uint32      nextidx;
     802                 :     uint32      wakeidx;
     803                 : 
     804                 :     /* We should definitely have an XID to clear. */
     805 GIC         175 :     Assert(TransactionIdIsValid(proc->xid));
     806 ECB             : 
     807                 :     /* Add ourselves to the list of processes needing a group XID clear. */
     808 GIC         175 :     proc->procArrayGroupMember = true;
     809 CBC         175 :     proc->procArrayGroupMemberXid = latestXid;
     810             175 :     nextidx = pg_atomic_read_u32(&procglobal->procArrayGroupFirst);
     811 ECB             :     while (true)
     812                 :     {
     813 GIC         175 :         pg_atomic_write_u32(&proc->procArrayGroupNext, nextidx);
     814 ECB             : 
     815 GIC         175 :         if (pg_atomic_compare_exchange_u32(&procglobal->procArrayGroupFirst,
     816 ECB             :                                            &nextidx,
     817 GIC         175 :                                            (uint32) proc->pgprocno))
     818 CBC         175 :             break;
     819 ECB             :     }
     820                 : 
     821                 :     /*
     822                 :      * If the list was not empty, the leader will clear our XID.  It is
     823                 :      * impossible to have followers without a leader because the first process
     824                 :      * that has added itself to the list will always have nextidx as
     825                 :      * INVALID_PGPROCNO.
     826                 :      */
     827 GIC         175 :     if (nextidx != INVALID_PGPROCNO)
     828 ECB             :     {
     829 GIC          11 :         int         extraWaits = 0;
     830 ECB             : 
     831                 :         /* Sleep until the leader clears our XID. */
     832 GIC          11 :         pgstat_report_wait_start(WAIT_EVENT_PROCARRAY_GROUP_UPDATE);
     833 ECB             :         for (;;)
     834                 :         {
     835                 :             /* acts as a read barrier */
     836 GIC          11 :             PGSemaphoreLock(proc->sem);
     837 CBC          11 :             if (!proc->procArrayGroupMember)
     838              11 :                 break;
     839 LBC           0 :             extraWaits++;
     840 EUB             :         }
     841 GIC          11 :         pgstat_report_wait_end();
     842 ECB             : 
     843 GIC          11 :         Assert(pg_atomic_read_u32(&proc->procArrayGroupNext) == INVALID_PGPROCNO);
     844 ECB             : 
     845                 :         /* Fix semaphore count for any absorbed wakeups */
     846 GIC          11 :         while (extraWaits-- > 0)
     847 LBC           0 :             PGSemaphoreUnlock(proc->sem);
     848 GBC          11 :         return;
     849 ECB             :     }
     850                 : 
     851                 :     /* We are the leader.  Acquire the lock on behalf of everyone. */
     852 GIC         164 :     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
     853 ECB             : 
     854                 :     /*
     855                 :      * Now that we've got the lock, clear the list of processes waiting for
     856                 :      * group XID clearing, saving a pointer to the head of the list.  Trying
     857                 :      * to pop elements one at a time could lead to an ABA problem.
     858                 :      */
     859 GIC         164 :     nextidx = pg_atomic_exchange_u32(&procglobal->procArrayGroupFirst,
     860 ECB             :                                      INVALID_PGPROCNO);
     861                 : 
     862                 :     /* Remember head of list so we can perform wakeups after dropping lock. */
     863 GIC         164 :     wakeidx = nextidx;
     864 ECB             : 
     865                 :     /* Walk the list and clear all XIDs. */
     866 GIC         339 :     while (nextidx != INVALID_PGPROCNO)
     867 ECB             :     {
     868 GIC         175 :         PGPROC     *nextproc = &allProcs[nextidx];
     869 ECB             : 
     870 GIC         175 :         ProcArrayEndTransactionInternal(nextproc, nextproc->procArrayGroupMemberXid);
     871 ECB             : 
     872                 :         /* Move to next proc in list. */
     873 GIC         175 :         nextidx = pg_atomic_read_u32(&nextproc->procArrayGroupNext);
     874 ECB             :     }
     875                 : 
     876                 :     /* We're done with the lock now. */
     877 GIC         164 :     LWLockRelease(ProcArrayLock);
     878 ECB             : 
     879                 :     /*
     880                 :      * Now that we've released the lock, go back and wake everybody up.  We
     881                 :      * don't do this under the lock so as to keep lock hold times to a
     882                 :      * minimum.  The system calls we need to perform to wake other processes
     883                 :      * up are probably much slower than the simple memory writes we did while
     884                 :      * holding the lock.
     885                 :      */
     886 GIC         339 :     while (wakeidx != INVALID_PGPROCNO)
     887 ECB             :     {
     888 GIC         175 :         PGPROC     *nextproc = &allProcs[wakeidx];
     889 ECB             : 
     890 GIC         175 :         wakeidx = pg_atomic_read_u32(&nextproc->procArrayGroupNext);
     891 CBC         175 :         pg_atomic_write_u32(&nextproc->procArrayGroupNext, INVALID_PGPROCNO);
     892 ECB             : 
     893                 :         /* ensure all previous writes are visible before follower continues. */
     894 GIC         175 :         pg_write_barrier();
     895 ECB             : 
     896 GIC         175 :         nextproc->procArrayGroupMember = false;
     897 ECB             : 
     898 GIC         175 :         if (nextproc != MyProc)
     899 CBC          11 :             PGSemaphoreUnlock(nextproc->sem);
     900 ECB             :     }
     901                 : }
     902                 : 
     903                 : /*
     904                 :  * ProcArrayClearTransaction -- clear the transaction fields
     905                 :  *
     906                 :  * This is used after successfully preparing a 2-phase transaction.  We are
     907                 :  * not actually reporting the transaction's XID as no longer running --- it
     908                 :  * will still appear as running because the 2PC's gxact is in the ProcArray
     909                 :  * too.  We just have to clear out our own PGPROC.
     910                 :  */
     911                 : void
     912 GIC         361 : ProcArrayClearTransaction(PGPROC *proc)
     913 ECB             : {
     914                 :     int         pgxactoff;
     915                 : 
     916                 :     /*
     917                 :      * Currently we need to lock ProcArrayLock exclusively here, as we
     918                 :      * increment xactCompletionCount below. We also need it at least in shared
     919                 :      * mode for pgproc->pgxactoff to stay the same below.
     920                 :      *
     921                 :      * We could however, as this action does not actually change anyone's view
     922                 :      * of the set of running XIDs (our entry is duplicate with the gxact that
     923                 :      * has already been inserted into the ProcArray), lower the lock level to
     924                 :      * shared if we were to make xactCompletionCount an atomic variable. But
     925                 :      * that doesn't seem worth it currently, as a 2PC commit is heavyweight
     926                 :      * enough for this not to be the bottleneck.  If it ever becomes a
     927                 :      * bottleneck it may also be worth considering to combine this with the
     928                 :      * subsequent ProcArrayRemove()
     929                 :      */
     930 GIC         361 :     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
     931 ECB             : 
     932 GIC         361 :     pgxactoff = proc->pgxactoff;
     933 ECB             : 
     934 GIC         361 :     ProcGlobal->xids[pgxactoff] = InvalidTransactionId;
     935 CBC         361 :     proc->xid = InvalidTransactionId;
     936 ECB             : 
     937 GIC         361 :     proc->lxid = InvalidLocalTransactionId;
     938 CBC         361 :     proc->xmin = InvalidTransactionId;
     939             361 :     proc->recoveryConflictPending = false;
     940 ECB             : 
     941 GIC         361 :     Assert(!(proc->statusFlags & PROC_VACUUM_STATE_MASK));
     942 CBC         361 :     Assert(!proc->delayChkptFlags);
     943 ECB             : 
     944                 :     /*
     945                 :      * Need to increment completion count even though transaction hasn't
     946                 :      * really committed yet. The reason for that is that GetSnapshotData()
     947                 :      * omits the xid of the current transaction, thus without the increment we
     948                 :      * otherwise could end up reusing the snapshot later. Which would be bad,
     949                 :      * because it might not count the prepared transaction as running.
     950                 :      */
     951 GIC         361 :     ShmemVariableCache->xactCompletionCount++;
     952 ECB             : 
     953                 :     /* Clear the subtransaction-XID cache too */
     954 GIC         361 :     Assert(ProcGlobal->subxidStates[pgxactoff].count == proc->subxidStatus.count &&
     955 ECB             :            ProcGlobal->subxidStates[pgxactoff].overflowed == proc->subxidStatus.overflowed);
     956 GIC         361 :     if (proc->subxidStatus.count > 0 || proc->subxidStatus.overflowed)
     957 ECB             :     {
     958 GIC         149 :         ProcGlobal->subxidStates[pgxactoff].count = 0;
     959 CBC         149 :         ProcGlobal->subxidStates[pgxactoff].overflowed = false;
     960             149 :         proc->subxidStatus.count = 0;
     961             149 :         proc->subxidStatus.overflowed = false;
     962 ECB             :     }
     963                 : 
     964 GIC         361 :     LWLockRelease(ProcArrayLock);
     965 CBC         361 : }
     966 ECB             : 
     967                 : /*
     968                 :  * Update ShmemVariableCache->latestCompletedXid to point to latestXid if
     969                 :  * currently older.
     970                 :  */
     971                 : static void
     972 GIC      298679 : MaintainLatestCompletedXid(TransactionId latestXid)
     973 ECB             : {
     974 GIC      298679 :     FullTransactionId cur_latest = ShmemVariableCache->latestCompletedXid;
     975 ECB             : 
     976 GIC      298679 :     Assert(FullTransactionIdIsValid(cur_latest));
     977 CBC      298679 :     Assert(!RecoveryInProgress());
     978          298679 :     Assert(LWLockHeldByMe(ProcArrayLock));
     979 ECB             : 
     980 GIC      298679 :     if (TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid))
     981 ECB             :     {
     982 GIC      288173 :         ShmemVariableCache->latestCompletedXid =
     983 CBC      288173 :             FullXidRelativeTo(cur_latest, latestXid);
     984 ECB             :     }
     985                 : 
     986 GIC      298679 :     Assert(IsBootstrapProcessingMode() ||
     987 ECB             :            FullTransactionIdIsNormal(ShmemVariableCache->latestCompletedXid));
     988 GIC      298679 : }
     989 ECB             : 
     990                 : /*
     991                 :  * Same as MaintainLatestCompletedXid, except for use during WAL replay.
     992                 :  */
     993                 : static void
     994 GIC       18250 : MaintainLatestCompletedXidRecovery(TransactionId latestXid)
     995 ECB             : {
     996 GIC       18250 :     FullTransactionId cur_latest = ShmemVariableCache->latestCompletedXid;
     997 ECB             :     FullTransactionId rel;
     998                 : 
     999 GIC       18250 :     Assert(AmStartupProcess() || !IsUnderPostmaster);
    1000 CBC       18250 :     Assert(LWLockHeldByMe(ProcArrayLock));
    1001 ECB             : 
    1002                 :     /*
    1003                 :      * Need a FullTransactionId to compare latestXid with. Can't rely on
    1004                 :      * latestCompletedXid to be initialized in recovery. But in recovery it's
    1005                 :      * safe to access nextXid without a lock for the startup process.
    1006                 :      */
    1007 GIC       18250 :     rel = ShmemVariableCache->nextXid;
    1008 CBC       18250 :     Assert(FullTransactionIdIsValid(ShmemVariableCache->nextXid));
    1009 ECB             : 
    1010 GIC       36429 :     if (!FullTransactionIdIsValid(cur_latest) ||
    1011 CBC       18179 :         TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid))
    1012 ECB             :     {
    1013 GIC       14716 :         ShmemVariableCache->latestCompletedXid =
    1014 CBC       14716 :             FullXidRelativeTo(rel, latestXid);
    1015 ECB             :     }
    1016                 : 
    1017 GIC       18250 :     Assert(FullTransactionIdIsNormal(ShmemVariableCache->latestCompletedXid));
    1018 CBC       18250 : }
    1019 ECB             : 
    1020                 : /*
    1021                 :  * ProcArrayInitRecovery -- initialize recovery xid mgmt environment
    1022                 :  *
    1023                 :  * Remember up to where the startup process initialized the CLOG and subtrans
    1024                 :  * so we can ensure it's initialized gaplessly up to the point where necessary
    1025                 :  * while in recovery.
    1026                 :  */
    1027                 : void
    1028 GIC          71 : ProcArrayInitRecovery(TransactionId initializedUptoXID)
    1029 ECB             : {
    1030 GIC          71 :     Assert(standbyState == STANDBY_INITIALIZED);
    1031 CBC          71 :     Assert(TransactionIdIsNormal(initializedUptoXID));
    1032 ECB             : 
    1033                 :     /*
    1034                 :      * we set latestObservedXid to the xid SUBTRANS has been initialized up
    1035                 :      * to, so we can extend it from that point onwards in
    1036                 :      * RecordKnownAssignedTransactionIds, and when we get consistent in
    1037                 :      * ProcArrayApplyRecoveryInfo().
    1038                 :      */
    1039 GIC          71 :     latestObservedXid = initializedUptoXID;
    1040 CBC          71 :     TransactionIdRetreat(latestObservedXid);
    1041              71 : }
    1042 ECB             : 
    1043                 : /*
    1044                 :  * ProcArrayApplyRecoveryInfo -- apply recovery info about xids
    1045                 :  *
    1046                 :  * Takes us through 3 states: Initialized, Pending and Ready.
    1047                 :  * Normal case is to go all the way to Ready straight away, though there
    1048                 :  * are atypical cases where we need to take it in steps.
    1049                 :  *
    1050                 :  * Use the data about running transactions on the primary to create the initial
    1051                 :  * state of KnownAssignedXids. We also use these records to regularly prune
    1052                 :  * KnownAssignedXids because we know it is possible that some transactions
    1053                 :  * with FATAL errors fail to write abort records, which could cause eventual
    1054                 :  * overflow.
    1055                 :  *
    1056                 :  * See comments for LogStandbySnapshot().
    1057                 :  */
    1058                 : void
    1059 GIC         195 : ProcArrayApplyRecoveryInfo(RunningTransactions running)
    1060 ECB             : {
    1061                 :     TransactionId *xids;
    1062                 :     int         nxids;
    1063                 :     int         i;
    1064                 : 
    1065 GIC         195 :     Assert(standbyState >= STANDBY_INITIALIZED);
    1066 CBC         195 :     Assert(TransactionIdIsValid(running->nextXid));
    1067             195 :     Assert(TransactionIdIsValid(running->oldestRunningXid));
    1068             195 :     Assert(TransactionIdIsNormal(running->latestCompletedXid));
    1069 ECB             : 
    1070                 :     /*
    1071                 :      * Remove stale transactions, if any.
    1072                 :      */
    1073 GIC         195 :     ExpireOldKnownAssignedTransactionIds(running->oldestRunningXid);
    1074 ECB             : 
    1075                 :     /*
    1076                 :      * Remove stale locks, if any.
    1077                 :      */
    1078 GIC         195 :     StandbyReleaseOldLocks(running->oldestRunningXid);
    1079 ECB             : 
    1080                 :     /*
    1081                 :      * If our snapshot is already valid, nothing else to do...
    1082                 :      */
    1083 GIC         195 :     if (standbyState == STANDBY_SNAPSHOT_READY)
    1084 CBC         124 :         return;
    1085 ECB             : 
    1086                 :     /*
    1087                 :      * If our initial RunningTransactionsData had an overflowed snapshot then
    1088                 :      * we knew we were missing some subxids from our snapshot. If we continue
    1089                 :      * to see overflowed snapshots then we might never be able to start up, so
    1090                 :      * we make another test to see if our snapshot is now valid. We know that
    1091                 :      * the missing subxids are equal to or earlier than nextXid. After we
    1092                 :      * initialise we continue to apply changes during recovery, so once the
    1093                 :      * oldestRunningXid is later than the nextXid from the initial snapshot we
    1094                 :      * know that we no longer have missing information and can mark the
    1095                 :      * snapshot as valid.
    1096                 :      */
    1097 GIC          71 :     if (standbyState == STANDBY_SNAPSHOT_PENDING)
    1098 ECB             :     {
    1099                 :         /*
    1100                 :          * If the snapshot isn't overflowed or if its empty we can reset our
    1101                 :          * pending state and use this snapshot instead.
    1102                 :          */
    1103 UIC           0 :         if (!running->subxid_overflow || running->xcnt == 0)
    1104 EUB             :         {
    1105                 :             /*
    1106                 :              * If we have already collected known assigned xids, we need to
    1107                 :              * throw them away before we apply the recovery snapshot.
    1108                 :              */
    1109 UIC           0 :             KnownAssignedXidsReset();
    1110 UBC           0 :             standbyState = STANDBY_INITIALIZED;
    1111 EUB             :         }
    1112                 :         else
    1113                 :         {
    1114 UIC           0 :             if (TransactionIdPrecedes(standbySnapshotPendingXmin,
    1115 EUB             :                                       running->oldestRunningXid))
    1116                 :             {
    1117 UIC           0 :                 standbyState = STANDBY_SNAPSHOT_READY;
    1118 UBC           0 :                 elog(trace_recovery(DEBUG1),
    1119 EUB             :                      "recovery snapshots are now enabled");
    1120                 :             }
    1121                 :             else
    1122 UIC           0 :                 elog(trace_recovery(DEBUG1),
    1123 EUB             :                      "recovery snapshot waiting for non-overflowed snapshot or "
    1124                 :                      "until oldest active xid on standby is at least %u (now %u)",
    1125                 :                      standbySnapshotPendingXmin,
    1126                 :                      running->oldestRunningXid);
    1127 UIC           0 :             return;
    1128 EUB             :         }
    1129                 :     }
    1130                 : 
    1131 GIC          71 :     Assert(standbyState == STANDBY_INITIALIZED);
    1132 ECB             : 
    1133                 :     /*
    1134                 :      * NB: this can be reached at least twice, so make sure new code can deal
    1135                 :      * with that.
    1136                 :      */
    1137                 : 
    1138                 :     /*
    1139                 :      * Nobody else is running yet, but take locks anyhow
    1140                 :      */
    1141 GIC          71 :     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
    1142 ECB             : 
    1143                 :     /*
    1144                 :      * KnownAssignedXids is sorted so we cannot just add the xids, we have to
    1145                 :      * sort them first.
    1146                 :      *
    1147                 :      * Some of the new xids are top-level xids and some are subtransactions.
    1148                 :      * We don't call SubTransSetParent because it doesn't matter yet. If we
    1149                 :      * aren't overflowed then all xids will fit in snapshot and so we don't
    1150                 :      * need subtrans. If we later overflow, an xid assignment record will add
    1151                 :      * xids to subtrans. If RunningTransactionsData is overflowed then we
    1152                 :      * don't have enough information to correctly update subtrans anyway.
    1153                 :      */
    1154                 : 
    1155                 :     /*
    1156                 :      * Allocate a temporary array to avoid modifying the array passed as
    1157                 :      * argument.
    1158                 :      */
    1159 GIC          71 :     xids = palloc(sizeof(TransactionId) * (running->xcnt + running->subxcnt));
    1160 ECB             : 
    1161                 :     /*
    1162                 :      * Add to the temp array any xids which have not already completed.
    1163                 :      */
    1164 GIC          71 :     nxids = 0;
    1165 CBC          73 :     for (i = 0; i < running->xcnt + running->subxcnt; i++)
    1166 ECB             :     {
    1167 GIC           2 :         TransactionId xid = running->xids[i];
    1168 ECB             : 
    1169                 :         /*
    1170                 :          * The running-xacts snapshot can contain xids that were still visible
    1171                 :          * in the procarray when the snapshot was taken, but were already
    1172                 :          * WAL-logged as completed. They're not running anymore, so ignore
    1173                 :          * them.
    1174                 :          */
    1175 GIC           2 :         if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid))
    1176 LBC           0 :             continue;
    1177 EUB             : 
    1178 GIC           2 :         xids[nxids++] = xid;
    1179 ECB             :     }
    1180                 : 
    1181 GIC          71 :     if (nxids > 0)
    1182 ECB             :     {
    1183 GIC           2 :         if (procArray->numKnownAssignedXids != 0)
    1184 ECB             :         {
    1185 UIC           0 :             LWLockRelease(ProcArrayLock);
    1186 UBC           0 :             elog(ERROR, "KnownAssignedXids is not empty");
    1187 EUB             :         }
    1188                 : 
    1189                 :         /*
    1190                 :          * Sort the array so that we can add them safely into
    1191                 :          * KnownAssignedXids.
    1192                 :          *
    1193                 :          * We have to sort them logically, because in KnownAssignedXidsAdd we
    1194                 :          * call TransactionIdFollowsOrEquals and so on. But we know these XIDs
    1195                 :          * come from RUNNING_XACTS, which means there are only normal XIDs
    1196                 :          * from the same epoch, so this is safe.
    1197                 :          */
    1198 GIC           2 :         qsort(xids, nxids, sizeof(TransactionId), xidLogicalComparator);
    1199 ECB             : 
    1200                 :         /*
    1201                 :          * Add the sorted snapshot into KnownAssignedXids.  The running-xacts
    1202                 :          * snapshot may include duplicated xids because of prepared
    1203                 :          * transactions, so ignore them.
    1204                 :          */
    1205 GIC           4 :         for (i = 0; i < nxids; i++)
    1206 ECB             :         {
    1207 GIC           2 :             if (i > 0 && TransactionIdEquals(xids[i - 1], xids[i]))
    1208 ECB             :             {
    1209 UIC           0 :                 elog(DEBUG1,
    1210 EUB             :                      "found duplicated transaction %u for KnownAssignedXids insertion",
    1211                 :                      xids[i]);
    1212 UIC           0 :                 continue;
    1213 EUB             :             }
    1214 GIC           2 :             KnownAssignedXidsAdd(xids[i], xids[i], true);
    1215 ECB             :         }
    1216                 : 
    1217 GIC           2 :         KnownAssignedXidsDisplay(trace_recovery(DEBUG3));
    1218 ECB             :     }
    1219                 : 
    1220 GIC          71 :     pfree(xids);
    1221 ECB             : 
    1222                 :     /*
    1223                 :      * latestObservedXid is at least set to the point where SUBTRANS was
    1224                 :      * started up to (cf. ProcArrayInitRecovery()) or to the biggest xid
    1225                 :      * RecordKnownAssignedTransactionIds() was called for.  Initialize
    1226                 :      * subtrans from thereon, up to nextXid - 1.
    1227                 :      *
    1228                 :      * We need to duplicate parts of RecordKnownAssignedTransactionId() here,
    1229                 :      * because we've just added xids to the known assigned xids machinery that
    1230                 :      * haven't gone through RecordKnownAssignedTransactionId().
    1231                 :      */
    1232 GIC          71 :     Assert(TransactionIdIsNormal(latestObservedXid));
    1233 CBC          71 :     TransactionIdAdvance(latestObservedXid);
    1234             142 :     while (TransactionIdPrecedes(latestObservedXid, running->nextXid))
    1235 ECB             :     {
    1236 UIC           0 :         ExtendSUBTRANS(latestObservedXid);
    1237 UBC           0 :         TransactionIdAdvance(latestObservedXid);
    1238 EUB             :     }
    1239 GIC          71 :     TransactionIdRetreat(latestObservedXid);    /* = running->nextXid - 1 */
    1240 ECB             : 
    1241                 :     /* ----------
    1242                 :      * Now we've got the running xids we need to set the global values that
    1243                 :      * are used to track snapshots as they evolve further.
    1244                 :      *
    1245                 :      * - latestCompletedXid which will be the xmax for snapshots
    1246                 :      * - lastOverflowedXid which shows whether snapshots overflow
    1247                 :      * - nextXid
    1248                 :      *
    1249                 :      * If the snapshot overflowed, then we still initialise with what we know,
    1250                 :      * but the recovery snapshot isn't fully valid yet because we know there
    1251                 :      * are some subxids missing. We don't know the specific subxids that are
    1252                 :      * missing, so conservatively assume the last one is latestObservedXid.
    1253                 :      * ----------
    1254                 :      */
    1255 GIC          71 :     if (running->subxid_overflow)
    1256 ECB             :     {
    1257 UIC           0 :         standbyState = STANDBY_SNAPSHOT_PENDING;
    1258 EUB             : 
    1259 UIC           0 :         standbySnapshotPendingXmin = latestObservedXid;
    1260 UBC           0 :         procArray->lastOverflowedXid = latestObservedXid;
    1261 EUB             :     }
    1262                 :     else
    1263                 :     {
    1264 GIC          71 :         standbyState = STANDBY_SNAPSHOT_READY;
    1265 ECB             : 
    1266 GIC          71 :         standbySnapshotPendingXmin = InvalidTransactionId;
    1267 ECB             :     }
    1268                 : 
    1269                 :     /*
    1270                 :      * If a transaction wrote a commit record in the gap between taking and
    1271                 :      * logging the snapshot then latestCompletedXid may already be higher than
    1272                 :      * the value from the snapshot, so check before we use the incoming value.
    1273                 :      * It also might not yet be set at all.
    1274                 :      */
    1275 GIC          71 :     MaintainLatestCompletedXidRecovery(running->latestCompletedXid);
    1276 ECB             : 
    1277                 :     /*
    1278                 :      * NB: No need to increment ShmemVariableCache->xactCompletionCount here,
    1279                 :      * nobody can see it yet.
    1280                 :      */
    1281                 : 
    1282 GIC          71 :     LWLockRelease(ProcArrayLock);
    1283 ECB             : 
    1284                 :     /* ShmemVariableCache->nextXid must be beyond any observed xid. */
    1285 GIC          71 :     AdvanceNextFullTransactionIdPastXid(latestObservedXid);
    1286 ECB             : 
    1287 GIC          71 :     Assert(FullTransactionIdIsValid(ShmemVariableCache->nextXid));
    1288 ECB             : 
    1289 GIC          71 :     KnownAssignedXidsDisplay(trace_recovery(DEBUG3));
    1290 CBC          71 :     if (standbyState == STANDBY_SNAPSHOT_READY)
    1291              71 :         elog(trace_recovery(DEBUG1), "recovery snapshots are now enabled");
    1292 ECB             :     else
    1293 UIC           0 :         elog(trace_recovery(DEBUG1),
    1294 EUB             :              "recovery snapshot waiting for non-overflowed snapshot or "
    1295                 :              "until oldest active xid on standby is at least %u (now %u)",
    1296                 :              standbySnapshotPendingXmin,
    1297                 :              running->oldestRunningXid);
    1298                 : }
    1299                 : 
    1300                 : /*
    1301                 :  * ProcArrayApplyXidAssignment
    1302                 :  *      Process an XLOG_XACT_ASSIGNMENT WAL record
    1303                 :  */
    1304                 : void
    1305 GIC          21 : ProcArrayApplyXidAssignment(TransactionId topxid,
    1306 ECB             :                             int nsubxids, TransactionId *subxids)
    1307                 : {
    1308                 :     TransactionId max_xid;
    1309                 :     int         i;
    1310                 : 
    1311 GIC          21 :     Assert(standbyState >= STANDBY_INITIALIZED);
    1312 ECB             : 
    1313 GIC          21 :     max_xid = TransactionIdLatest(topxid, nsubxids, subxids);
    1314 ECB             : 
    1315                 :     /*
    1316                 :      * Mark all the subtransactions as observed.
    1317                 :      *
    1318                 :      * NOTE: This will fail if the subxid contains too many previously
    1319                 :      * unobserved xids to fit into known-assigned-xids. That shouldn't happen
    1320                 :      * as the code stands, because xid-assignment records should never contain
    1321                 :      * more than PGPROC_MAX_CACHED_SUBXIDS entries.
    1322                 :      */
    1323 GIC          21 :     RecordKnownAssignedTransactionIds(max_xid);
    1324 ECB             : 
    1325                 :     /*
    1326                 :      * Notice that we update pg_subtrans with the top-level xid, rather than
    1327                 :      * the parent xid. This is a difference between normal processing and
    1328                 :      * recovery, yet is still correct in all cases. The reason is that
    1329                 :      * subtransaction commit is not marked in clog until commit processing, so
    1330                 :      * all aborted subtransactions have already been clearly marked in clog.
    1331                 :      * As a result we are able to refer directly to the top-level
    1332                 :      * transaction's state rather than skipping through all the intermediate
    1333                 :      * states in the subtransaction tree. This should be the first time we
    1334                 :      * have attempted to SubTransSetParent().
    1335                 :      */
    1336 GIC        1365 :     for (i = 0; i < nsubxids; i++)
    1337 CBC        1344 :         SubTransSetParent(subxids[i], topxid);
    1338 ECB             : 
    1339                 :     /* KnownAssignedXids isn't maintained yet, so we're done for now */
    1340 GIC          21 :     if (standbyState == STANDBY_INITIALIZED)
    1341 LBC           0 :         return;
    1342 EUB             : 
    1343                 :     /*
    1344                 :      * Uses same locking as transaction commit
    1345                 :      */
    1346 GIC          21 :     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
    1347 ECB             : 
    1348                 :     /*
    1349                 :      * Remove subxids from known-assigned-xacts.
    1350                 :      */
    1351 GIC          21 :     KnownAssignedXidsRemoveTree(InvalidTransactionId, nsubxids, subxids);
    1352 ECB             : 
    1353                 :     /*
    1354                 :      * Advance lastOverflowedXid to be at least the last of these subxids.
    1355                 :      */
    1356 GIC          21 :     if (TransactionIdPrecedes(procArray->lastOverflowedXid, max_xid))
    1357 CBC          21 :         procArray->lastOverflowedXid = max_xid;
    1358 ECB             : 
    1359 GIC          21 :     LWLockRelease(ProcArrayLock);
    1360 ECB             : }
    1361                 : 
    1362                 : /*
    1363                 :  * TransactionIdIsInProgress -- is given transaction running in some backend
    1364                 :  *
    1365                 :  * Aside from some shortcuts such as checking RecentXmin and our own Xid,
    1366                 :  * there are four possibilities for finding a running transaction:
    1367                 :  *
    1368                 :  * 1. The given Xid is a main transaction Id.  We will find this out cheaply
    1369                 :  * by looking at ProcGlobal->xids.
    1370                 :  *
    1371                 :  * 2. The given Xid is one of the cached subxact Xids in the PGPROC array.
    1372                 :  * We can find this out cheaply too.
    1373                 :  *
    1374                 :  * 3. In Hot Standby mode, we must search the KnownAssignedXids list to see
    1375                 :  * if the Xid is running on the primary.
    1376                 :  *
    1377                 :  * 4. Search the SubTrans tree to find the Xid's topmost parent, and then see
    1378                 :  * if that is running according to ProcGlobal->xids[] or KnownAssignedXids.
    1379                 :  * This is the slowest way, but sadly it has to be done always if the others
    1380                 :  * failed, unless we see that the cached subxact sets are complete (none have
    1381                 :  * overflowed).
    1382                 :  *
    1383                 :  * ProcArrayLock has to be held while we do 1, 2, 3.  If we save the top Xids
    1384                 :  * while doing 1 and 3, we can release the ProcArrayLock while we do 4.
    1385                 :  * This buys back some concurrency (and we can't retrieve the main Xids from
    1386                 :  * ProcGlobal->xids[] again anyway; see GetNewTransactionId).
    1387                 :  */
    1388                 : bool
    1389 GIC     7228314 : TransactionIdIsInProgress(TransactionId xid)
    1390 ECB             : {
    1391                 :     static TransactionId *xids = NULL;
    1392                 :     static TransactionId *other_xids;
    1393                 :     XidCacheStatus *other_subxidstates;
    1394 GIC     7228314 :     int         nxids = 0;
    1395 CBC     7228314 :     ProcArrayStruct *arrayP = procArray;
    1396 ECB             :     TransactionId topxid;
    1397                 :     TransactionId latestCompletedXid;
    1398                 :     int         mypgxactoff;
    1399                 :     int         numProcs;
    1400                 :     int         j;
    1401                 : 
    1402                 :     /*
    1403                 :      * Don't bother checking a transaction older than RecentXmin; it could not
    1404                 :      * possibly still be running.  (Note: in particular, this guarantees that
    1405                 :      * we reject InvalidTransactionId, FrozenTransactionId, etc as not
    1406                 :      * running.)
    1407                 :      */
    1408 GIC     7228314 :     if (TransactionIdPrecedes(xid, RecentXmin))
    1409 ECB             :     {
    1410                 :         xc_by_recent_xmin_inc();
    1411 GIC     5968169 :         return false;
    1412 ECB             :     }
    1413                 : 
    1414                 :     /*
    1415                 :      * We may have just checked the status of this transaction, so if it is
    1416                 :      * already known to be completed, we can fall out without any access to
    1417                 :      * shared memory.
    1418                 :      */
    1419 GIC     1260145 :     if (TransactionIdEquals(cachedXidIsNotInProgress, xid))
    1420 ECB             :     {
    1421                 :         xc_by_known_xact_inc();
    1422 GIC      953515 :         return false;
    1423 ECB             :     }
    1424                 : 
    1425                 :     /*
    1426                 :      * Also, we can handle our own transaction (and subtransactions) without
    1427                 :      * any access to shared memory.
    1428                 :      */
    1429 GIC      306630 :     if (TransactionIdIsCurrentTransactionId(xid))
    1430 ECB             :     {
    1431                 :         xc_by_my_xact_inc();
    1432 GIC      287949 :         return true;
    1433 ECB             :     }
    1434                 : 
    1435                 :     /*
    1436                 :      * If first time through, get workspace to remember main XIDs in. We
    1437                 :      * malloc it permanently to avoid repeated palloc/pfree overhead.
    1438                 :      */
    1439 GIC       18681 :     if (xids == NULL)
    1440 ECB             :     {
    1441                 :         /*
    1442                 :          * In hot standby mode, reserve enough space to hold all xids in the
    1443                 :          * known-assigned list. If we later finish recovery, we no longer need
    1444                 :          * the bigger array, but we don't bother to shrink it.
    1445                 :          */
    1446 GIC         600 :         int         maxxids = RecoveryInProgress() ? TOTAL_MAX_CACHED_SUBXIDS : arrayP->maxProcs;
    1447 ECB             : 
    1448 GIC         600 :         xids = (TransactionId *) malloc(maxxids * sizeof(TransactionId));
    1449 CBC         600 :         if (xids == NULL)
    1450 LBC           0 :             ereport(ERROR,
    1451 EUB             :                     (errcode(ERRCODE_OUT_OF_MEMORY),
    1452                 :                      errmsg("out of memory")));
    1453                 :     }
    1454                 : 
    1455 GIC       18681 :     other_xids = ProcGlobal->xids;
    1456 CBC       18681 :     other_subxidstates = ProcGlobal->subxidStates;
    1457 ECB             : 
    1458 GIC       18681 :     LWLockAcquire(ProcArrayLock, LW_SHARED);
    1459 ECB             : 
    1460                 :     /*
    1461                 :      * Now that we have the lock, we can check latestCompletedXid; if the
    1462                 :      * target Xid is after that, it's surely still running.
    1463                 :      */
    1464 GIC       18681 :     latestCompletedXid =
    1465 CBC       18681 :         XidFromFullTransactionId(ShmemVariableCache->latestCompletedXid);
    1466           18681 :     if (TransactionIdPrecedes(latestCompletedXid, xid))
    1467 ECB             :     {
    1468 GIC        4590 :         LWLockRelease(ProcArrayLock);
    1469 ECB             :         xc_by_latest_xid_inc();
    1470 GIC        4590 :         return true;
    1471 ECB             :     }
    1472                 : 
    1473                 :     /* No shortcuts, gotta grovel through the array */
    1474 GIC       14091 :     mypgxactoff = MyProc->pgxactoff;
    1475 CBC       14091 :     numProcs = arrayP->numProcs;
    1476          130983 :     for (int pgxactoff = 0; pgxactoff < numProcs; pgxactoff++)
    1477 ECB             :     {
    1478                 :         int         pgprocno;
    1479                 :         PGPROC     *proc;
    1480                 :         TransactionId pxid;
    1481                 :         int         pxids;
    1482                 : 
    1483                 :         /* Ignore ourselves --- dealt with it above */
    1484 GIC      119817 :         if (pgxactoff == mypgxactoff)
    1485 CBC       12122 :             continue;
    1486 ECB             : 
    1487                 :         /* Fetch xid just once - see GetNewTransactionId */
    1488 GIC      107695 :         pxid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]);
    1489 ECB             : 
    1490 GIC      107695 :         if (!TransactionIdIsValid(pxid))
    1491 CBC       75489 :             continue;
    1492 ECB             : 
    1493                 :         /*
    1494                 :          * Step 1: check the main Xid
    1495                 :          */
    1496 GIC       32206 :         if (TransactionIdEquals(pxid, xid))
    1497 ECB             :         {
    1498 GIC        2387 :             LWLockRelease(ProcArrayLock);
    1499 ECB             :             xc_by_main_xid_inc();
    1500 GIC        2387 :             return true;
    1501 ECB             :         }
    1502                 : 
    1503                 :         /*
    1504                 :          * We can ignore main Xids that are younger than the target Xid, since
    1505                 :          * the target could not possibly be their child.
    1506                 :          */
    1507 GIC       29819 :         if (TransactionIdPrecedes(xid, pxid))
    1508 CBC       13120 :             continue;
    1509 ECB             : 
    1510                 :         /*
    1511                 :          * Step 2: check the cached child-Xids arrays
    1512                 :          */
    1513 GIC       16699 :         pxids = other_subxidstates[pgxactoff].count;
    1514 CBC       16699 :         pg_read_barrier();      /* pairs with barrier in GetNewTransactionId() */
    1515           16699 :         pgprocno = arrayP->pgprocnos[pgxactoff];
    1516           16699 :         proc = &allProcs[pgprocno];
    1517           52181 :         for (j = pxids - 1; j >= 0; j--)
    1518 ECB             :         {
    1519                 :             /* Fetch xid just once - see GetNewTransactionId */
    1520 GIC       36020 :             TransactionId cxid = UINT32_ACCESS_ONCE(proc->subxids.xids[j]);
    1521 ECB             : 
    1522 GIC       36020 :             if (TransactionIdEquals(cxid, xid))
    1523 ECB             :             {
    1524 GIC         538 :                 LWLockRelease(ProcArrayLock);
    1525 ECB             :                 xc_by_child_xid_inc();
    1526 GIC         538 :                 return true;
    1527 ECB             :             }
    1528                 :         }
    1529                 : 
    1530                 :         /*
    1531                 :          * Save the main Xid for step 4.  We only need to remember main Xids
    1532                 :          * that have uncached children.  (Note: there is no race condition
    1533                 :          * here because the overflowed flag cannot be cleared, only set, while
    1534                 :          * we hold ProcArrayLock.  So we can't miss an Xid that we need to
    1535                 :          * worry about.)
    1536                 :          */
    1537 GIC       16161 :         if (other_subxidstates[pgxactoff].overflowed)
    1538 CBC         157 :             xids[nxids++] = pxid;
    1539 ECB             :     }
    1540                 : 
    1541                 :     /*
    1542                 :      * Step 3: in hot standby mode, check the known-assigned-xids list.  XIDs
    1543                 :      * in the list must be treated as running.
    1544                 :      */
    1545 GIC       11166 :     if (RecoveryInProgress())
    1546 ECB             :     {
    1547                 :         /* none of the PGPROC entries should have XIDs in hot standby mode */
    1548 UIC           0 :         Assert(nxids == 0);
    1549 EUB             : 
    1550 UIC           0 :         if (KnownAssignedXidExists(xid))
    1551 EUB             :         {
    1552 UIC           0 :             LWLockRelease(ProcArrayLock);
    1553 EUB             :             xc_by_known_assigned_inc();
    1554 UIC           0 :             return true;
    1555 EUB             :         }
    1556                 : 
    1557                 :         /*
    1558                 :          * If the KnownAssignedXids overflowed, we have to check pg_subtrans
    1559                 :          * too.  Fetch all xids from KnownAssignedXids that are lower than
    1560                 :          * xid, since if xid is a subtransaction its parent will always have a
    1561                 :          * lower value.  Note we will collect both main and subXIDs here, but
    1562                 :          * there's no help for it.
    1563                 :          */
    1564 UIC           0 :         if (TransactionIdPrecedesOrEquals(xid, procArray->lastOverflowedXid))
    1565 UBC           0 :             nxids = KnownAssignedXidsGet(xids, xid);
    1566 EUB             :     }
    1567                 : 
    1568 GIC       11166 :     LWLockRelease(ProcArrayLock);
    1569 ECB             : 
    1570                 :     /*
    1571                 :      * If none of the relevant caches overflowed, we know the Xid is not
    1572                 :      * running without even looking at pg_subtrans.
    1573                 :      */
    1574 GIC       11166 :     if (nxids == 0)
    1575 ECB             :     {
    1576                 :         xc_no_overflow_inc();
    1577 GIC       11009 :         cachedXidIsNotInProgress = xid;
    1578 CBC       11009 :         return false;
    1579 ECB             :     }
    1580                 : 
    1581                 :     /*
    1582                 :      * Step 4: have to check pg_subtrans.
    1583                 :      *
    1584                 :      * At this point, we know it's either a subtransaction of one of the Xids
    1585                 :      * in xids[], or it's not running.  If it's an already-failed
    1586                 :      * subtransaction, we want to say "not running" even though its parent may
    1587                 :      * still be running.  So first, check pg_xact to see if it's been aborted.
    1588                 :      */
    1589                 :     xc_slow_answer_inc();
    1590                 : 
    1591 GIC         157 :     if (TransactionIdDidAbort(xid))
    1592 ECB             :     {
    1593 UIC           0 :         cachedXidIsNotInProgress = xid;
    1594 UBC           0 :         return false;
    1595 EUB             :     }
    1596                 : 
    1597                 :     /*
    1598                 :      * It isn't aborted, so check whether the transaction tree it belongs to
    1599                 :      * is still running (or, more precisely, whether it was running when we
    1600                 :      * held ProcArrayLock).
    1601                 :      */
    1602 GIC         157 :     topxid = SubTransGetTopmostTransaction(xid);
    1603 CBC         157 :     Assert(TransactionIdIsValid(topxid));
    1604 GNC         314 :     if (!TransactionIdEquals(topxid, xid) &&
    1605             157 :         pg_lfind32(topxid, xids, nxids))
    1606             157 :         return true;
    1607                 : 
    1608 UIC           0 :     cachedXidIsNotInProgress = xid;
    1609               0 :     return false;
    1610                 : }
    1611                 : 
    1612                 : /*
    1613                 :  * TransactionIdIsActive -- is xid the top-level XID of an active backend?
    1614                 :  *
    1615                 :  * This differs from TransactionIdIsInProgress in that it ignores prepared
    1616                 :  * transactions, as well as transactions running on the primary if we're in
    1617 EUB             :  * hot standby.  Also, we ignore subtransactions since that's not needed
    1618                 :  * for current uses.
    1619                 :  */
    1620                 : bool
    1621 UBC           0 : TransactionIdIsActive(TransactionId xid)
    1622                 : {
    1623 UIC           0 :     bool        result = false;
    1624               0 :     ProcArrayStruct *arrayP = procArray;
    1625               0 :     TransactionId *other_xids = ProcGlobal->xids;
    1626                 :     int         i;
    1627                 : 
    1628 EUB             :     /*
    1629                 :      * Don't bother checking a transaction older than RecentXmin; it could not
    1630                 :      * possibly still be running.
    1631                 :      */
    1632 UIC           0 :     if (TransactionIdPrecedes(xid, RecentXmin))
    1633 UBC           0 :         return false;
    1634                 : 
    1635               0 :     LWLockAcquire(ProcArrayLock, LW_SHARED);
    1636 EUB             : 
    1637 UIC           0 :     for (i = 0; i < arrayP->numProcs; i++)
    1638                 :     {
    1639               0 :         int         pgprocno = arrayP->pgprocnos[i];
    1640 UBC           0 :         PGPROC     *proc = &allProcs[pgprocno];
    1641                 :         TransactionId pxid;
    1642 EUB             : 
    1643                 :         /* Fetch xid just once - see GetNewTransactionId */
    1644 UIC           0 :         pxid = UINT32_ACCESS_ONCE(other_xids[i]);
    1645 EUB             : 
    1646 UBC           0 :         if (!TransactionIdIsValid(pxid))
    1647 UIC           0 :             continue;
    1648 EUB             : 
    1649 UIC           0 :         if (proc->pid == 0)
    1650 UBC           0 :             continue;           /* ignore prepared transactions */
    1651 EUB             : 
    1652 UIC           0 :         if (TransactionIdEquals(pxid, xid))
    1653                 :         {
    1654               0 :             result = true;
    1655 UBC           0 :             break;
    1656                 :         }
    1657 EUB             :     }
    1658                 : 
    1659 UIC           0 :     LWLockRelease(ProcArrayLock);
    1660                 : 
    1661               0 :     return result;
    1662                 : }
    1663                 : 
    1664                 : 
    1665                 : /*
    1666                 :  * Determine XID horizons.
    1667                 :  *
    1668                 :  * This is used by wrapper functions like GetOldestNonRemovableTransactionId()
    1669                 :  * (for VACUUM), GetReplicationHorizons() (for hot_standby_feedback), etc as
    1670                 :  * well as "internally" by GlobalVisUpdate() (see comment above struct
    1671                 :  * GlobalVisState).
    1672                 :  *
    1673                 :  * See the definition of ComputeXidHorizonsResult for the various computed
    1674                 :  * horizons.
    1675                 :  *
    1676                 :  * For VACUUM separate horizons (used to decide which deleted tuples must
    1677                 :  * be preserved), for shared and non-shared tables are computed.  For shared
    1678                 :  * relations backends in all databases must be considered, but for non-shared
    1679                 :  * relations that's not required, since only backends in my own database could
    1680                 :  * ever see the tuples in them. Also, we can ignore concurrently running lazy
    1681                 :  * VACUUMs because (a) they must be working on other tables, and (b) they
    1682                 :  * don't need to do snapshot-based lookups.
    1683                 :  *
    1684                 :  * This also computes a horizon used to truncate pg_subtrans. For that
    1685                 :  * backends in all databases have to be considered, and concurrently running
    1686                 :  * lazy VACUUMs cannot be ignored, as they still may perform pg_subtrans
    1687                 :  * accesses.
    1688                 :  *
    1689                 :  * Note: we include all currently running xids in the set of considered xids.
    1690                 :  * This ensures that if a just-started xact has not yet set its snapshot,
    1691                 :  * when it does set the snapshot it cannot set xmin less than what we compute.
    1692                 :  * See notes in src/backend/access/transam/README.
    1693                 :  *
    1694                 :  * Note: despite the above, it's possible for the calculated values to move
    1695                 :  * backwards on repeated calls. The calculated values are conservative, so
    1696                 :  * that anything older is definitely not considered as running by anyone
    1697                 :  * anymore, but the exact values calculated depend on a number of things. For
    1698                 :  * example, if there are no transactions running in the current database, the
    1699                 :  * horizon for normal tables will be latestCompletedXid. If a transaction
    1700                 :  * begins after that, its xmin will include in-progress transactions in other
    1701                 :  * databases that started earlier, so another call will return a lower value.
    1702                 :  * Nonetheless it is safe to vacuum a table in the current database with the
    1703                 :  * first result.  There are also replication-related effects: a walsender
    1704                 :  * process can set its xmin based on transactions that are no longer running
    1705                 :  * on the primary but are still being replayed on the standby, thus possibly
    1706                 :  * making the values go backwards.  In this case there is a possibility that
    1707                 :  * we lose data that the standby would like to have, but unless the standby
    1708                 :  * uses a replication slot to make its xmin persistent there is little we can
    1709                 :  * do about that --- data is only protected if the walsender runs continuously
    1710                 :  * while queries are executed on the standby.  (The Hot Standby code deals
    1711                 :  * with such cases by failing standby queries that needed to access
    1712                 :  * already-removed data, so there's no integrity bug.)  The computed values
    1713                 :  * are also adjusted with vacuum_defer_cleanup_age, so increasing that setting
    1714                 :  * on the fly is another easy way to make horizons move backwards, with no
    1715                 :  * consequences for data integrity.
    1716                 :  *
    1717                 :  * Note: the approximate horizons (see definition of GlobalVisState) are
    1718                 :  * updated by the computations done here. That's currently required for
    1719                 :  * correctness and a small optimization. Without doing so it's possible that
    1720                 :  * heap vacuum's call to heap_page_prune() uses a more conservative horizon
    1721 ECB             :  * than later when deciding which tuples can be removed - which the code
    1722                 :  * doesn't expect (breaking HOT).
    1723                 :  */
    1724                 : static void
    1725 CBC      204530 : ComputeXidHorizons(ComputeXidHorizonsResult *h)
    1726 ECB             : {
    1727 GIC      204530 :     ProcArrayStruct *arrayP = procArray;
    1728                 :     TransactionId kaxmin;
    1729 CBC      204530 :     bool        in_recovery = RecoveryInProgress();
    1730 GIC      204530 :     TransactionId *other_xids = ProcGlobal->xids;
    1731 ECB             : 
    1732                 :     /* inferred after ProcArrayLock is released */
    1733 CBC      204530 :     h->catalog_oldest_nonremovable = InvalidTransactionId;
    1734                 : 
    1735 GIC      204530 :     LWLockAcquire(ProcArrayLock, LW_SHARED);
    1736                 : 
    1737          204530 :     h->latest_completed = ShmemVariableCache->latestCompletedXid;
    1738                 : 
    1739                 :     /*
    1740                 :      * We initialize the MIN() calculation with latestCompletedXid + 1. This
    1741                 :      * is a lower bound for the XIDs that might appear in the ProcArray later,
    1742                 :      * and so protects us against overestimating the result due to future
    1743                 :      * additions.
    1744 ECB             :      */
    1745                 :     {
    1746                 :         TransactionId initial;
    1747                 : 
    1748 CBC      204530 :         initial = XidFromFullTransactionId(h->latest_completed);
    1749          204530 :         Assert(TransactionIdIsValid(initial));
    1750          204530 :         TransactionIdAdvance(initial);
    1751                 : 
    1752 GIC      204530 :         h->oldest_considered_running = initial;
    1753          204530 :         h->shared_oldest_nonremovable = initial;
    1754          204530 :         h->data_oldest_nonremovable = initial;
    1755                 : 
    1756                 :         /*
    1757                 :          * Only modifications made by this backend affect the horizon for
    1758                 :          * temporary relations. Instead of a check in each iteration of the
    1759                 :          * loop over all PGPROCs it is cheaper to just initialize to the
    1760                 :          * current top-level xid any.
    1761                 :          *
    1762                 :          * Without an assigned xid we could use a horizon as aggressive as
    1763                 :          * ReadNewTransactionid(), but we can get away with the much cheaper
    1764 ECB             :          * latestCompletedXid + 1: If this backend has no xid there, by
    1765                 :          * definition, can't be any newer changes in the temp table than
    1766                 :          * latestCompletedXid.
    1767                 :          */
    1768 GIC      204530 :         if (TransactionIdIsValid(MyProc->xid))
    1769           53112 :             h->temp_oldest_nonremovable = MyProc->xid;
    1770                 :         else
    1771          151418 :             h->temp_oldest_nonremovable = initial;
    1772                 :     }
    1773                 : 
    1774                 :     /*
    1775 ECB             :      * Fetch slot horizons while ProcArrayLock is held - the
    1776                 :      * LWLockAcquire/LWLockRelease are a barrier, ensuring this happens inside
    1777                 :      * the lock.
    1778                 :      */
    1779 GIC      204530 :     h->slot_xmin = procArray->replication_slot_xmin;
    1780 CBC      204530 :     h->slot_catalog_xmin = procArray->replication_slot_catalog_xmin;
    1781 ECB             : 
    1782 CBC      712499 :     for (int index = 0; index < arrayP->numProcs; index++)
    1783                 :     {
    1784 GIC      507969 :         int         pgprocno = arrayP->pgprocnos[index];
    1785          507969 :         PGPROC     *proc = &allProcs[pgprocno];
    1786          507969 :         int8        statusFlags = ProcGlobal->statusFlags[index];
    1787 ECB             :         TransactionId xid;
    1788                 :         TransactionId xmin;
    1789                 : 
    1790                 :         /* Fetch xid just once - see GetNewTransactionId */
    1791 GIC      507969 :         xid = UINT32_ACCESS_ONCE(other_xids[index]);
    1792          507969 :         xmin = UINT32_ACCESS_ONCE(proc->xmin);
    1793                 : 
    1794                 :         /*
    1795                 :          * Consider both the transaction's Xmin, and its Xid.
    1796                 :          *
    1797 ECB             :          * We must check both because a transaction might have an Xmin but not
    1798                 :          * (yet) an Xid; conversely, if it has an Xid, that could determine
    1799                 :          * some not-yet-set Xmin.
    1800                 :          */
    1801 CBC      507969 :         xmin = TransactionIdOlder(xmin, xid);
    1802                 : 
    1803                 :         /* if neither is set, this proc doesn't influence the horizon */
    1804 GIC      507969 :         if (!TransactionIdIsValid(xmin))
    1805          205291 :             continue;
    1806                 : 
    1807                 :         /*
    1808                 :          * Don't ignore any procs when determining which transactions might be
    1809 ECB             :          * considered running.  While slots should ensure logical decoding
    1810                 :          * backends are protected even without this check, it can't hurt to
    1811                 :          * include them here as well..
    1812                 :          */
    1813 GIC      302678 :         h->oldest_considered_running =
    1814          302678 :             TransactionIdOlder(h->oldest_considered_running, xmin);
    1815                 : 
    1816                 :         /*
    1817 ECB             :          * Skip over backends either vacuuming (which is ok with rows being
    1818                 :          * removed, as long as pg_subtrans is not truncated) or doing logical
    1819                 :          * decoding (which manages xmin separately, check below).
    1820                 :          */
    1821 CBC      302678 :         if (statusFlags & (PROC_IN_VACUUM | PROC_IN_LOGICAL_DECODING))
    1822           37947 :             continue;
    1823                 : 
    1824                 :         /* shared tables need to take backends in all databases into account */
    1825 GIC      264731 :         h->shared_oldest_nonremovable =
    1826          264731 :             TransactionIdOlder(h->shared_oldest_nonremovable, xmin);
    1827                 : 
    1828                 :         /*
    1829                 :          * Normally sessions in other databases are ignored for anything but
    1830                 :          * the shared horizon.
    1831                 :          *
    1832                 :          * However, include them when MyDatabaseId is not (yet) set.  A
    1833                 :          * backend in the process of starting up must not compute a "too
    1834                 :          * aggressive" horizon, otherwise we could end up using it to prune
    1835                 :          * still-needed data away.  If the current backend never connects to a
    1836                 :          * database this is harmless, because data_oldest_nonremovable will
    1837                 :          * never be utilized.
    1838                 :          *
    1839                 :          * Also, sessions marked with PROC_AFFECTS_ALL_HORIZONS should always
    1840                 :          * be included.  (This flag is used for hot standby feedback, which
    1841                 :          * can't be tied to a specific database.)
    1842                 :          *
    1843 ECB             :          * Also, while in recovery we cannot compute an accurate per-database
    1844                 :          * horizon, as all xids are managed via the KnownAssignedXids
    1845                 :          * machinery.
    1846                 :          */
    1847 GIC      264731 :         if (proc->databaseId == MyDatabaseId ||
    1848 CBC        6263 :             MyDatabaseId == InvalidOid ||
    1849             300 :             (statusFlags & PROC_AFFECTS_ALL_HORIZONS) ||
    1850                 :             in_recovery)
    1851                 :         {
    1852 GIC      264431 :             h->data_oldest_nonremovable =
    1853          264431 :                 TransactionIdOlder(h->data_oldest_nonremovable, xmin);
    1854                 :         }
    1855                 :     }
    1856                 : 
    1857 ECB             :     /*
    1858                 :      * If in recovery fetch oldest xid in KnownAssignedXids, will be applied
    1859                 :      * after lock is released.
    1860                 :      */
    1861 GIC      204530 :     if (in_recovery)
    1862             116 :         kaxmin = KnownAssignedXidsGetOldestXmin();
    1863                 : 
    1864 ECB             :     /*
    1865                 :      * No other information from shared state is needed, release the lock
    1866                 :      * immediately. The rest of the computations can be done without a lock.
    1867                 :      */
    1868 CBC      204530 :     LWLockRelease(ProcArrayLock);
    1869 ECB             : 
    1870 CBC      204530 :     if (in_recovery)
    1871 ECB             :     {
    1872 CBC         116 :         h->oldest_considered_running =
    1873             116 :             TransactionIdOlder(h->oldest_considered_running, kaxmin);
    1874 GIC         116 :         h->shared_oldest_nonremovable =
    1875             116 :             TransactionIdOlder(h->shared_oldest_nonremovable, kaxmin);
    1876             116 :         h->data_oldest_nonremovable =
    1877             116 :             TransactionIdOlder(h->data_oldest_nonremovable, kaxmin);
    1878                 :         /* temp relations cannot be accessed in recovery */
    1879                 :     }
    1880                 :     else
    1881                 :     {
    1882                 :         /*
    1883                 :          * Compute the cutoff XID by subtracting vacuum_defer_cleanup_age.
    1884                 :          *
    1885                 :          * vacuum_defer_cleanup_age provides some additional "slop" for the
    1886                 :          * benefit of hot standby queries on standby servers.  This is quick
    1887                 :          * and dirty, and perhaps not all that useful unless the primary has a
    1888                 :          * predictable transaction rate, but it offers some protection when
    1889                 :          * there's no walsender connection.  Note that we are assuming
    1890                 :          * vacuum_defer_cleanup_age isn't large enough to cause wraparound ---
    1891                 :          * so guc.c should limit it to no more than the xidStopLimit threshold
    1892                 :          * in varsup.c.  Also note that we intentionally don't apply
    1893                 :          * vacuum_defer_cleanup_age on standby servers.
    1894                 :          *
    1895 ECB             :          * Need to use TransactionIdRetreatSafely() instead of open-coding the
    1896                 :          * subtraction, to prevent creating an xid before
    1897                 :          * FirstNormalTransactionId.
    1898                 :          */
    1899 GIC      204414 :         Assert(TransactionIdPrecedesOrEquals(h->oldest_considered_running,
    1900 ECB             :                                              h->shared_oldest_nonremovable));
    1901 GIC      204414 :         Assert(TransactionIdPrecedesOrEquals(h->shared_oldest_nonremovable,
    1902 EUB             :                                              h->data_oldest_nonremovable));
    1903                 : 
    1904 GIC      204414 :         if (vacuum_defer_cleanup_age > 0)
    1905 EUB             :         {
    1906 UIC           0 :             TransactionIdRetreatSafely(&h->oldest_considered_running,
    1907                 :                                        vacuum_defer_cleanup_age,
    1908 EUB             :                                        h->latest_completed);
    1909 UIC           0 :             TransactionIdRetreatSafely(&h->shared_oldest_nonremovable,
    1910                 :                                        vacuum_defer_cleanup_age,
    1911                 :                                        h->latest_completed);
    1912               0 :             TransactionIdRetreatSafely(&h->data_oldest_nonremovable,
    1913                 :                                        vacuum_defer_cleanup_age,
    1914 EUB             :                                        h->latest_completed);
    1915                 :             /* defer doesn't apply to temp relations */
    1916                 : 
    1917                 : 
    1918 UIC           0 :             Assert(TransactionIdPrecedesOrEquals(h->oldest_considered_running,
    1919                 :                                                  h->shared_oldest_nonremovable));
    1920               0 :             Assert(TransactionIdPrecedesOrEquals(h->shared_oldest_nonremovable,
    1921                 :                                                  h->data_oldest_nonremovable));
    1922                 :         }
    1923                 :     }
    1924 ECB             : 
    1925                 :     /*
    1926                 :      * Check whether there are replication slots requiring an older xmin.
    1927                 :      */
    1928 GIC      204530 :     h->shared_oldest_nonremovable =
    1929          204530 :         TransactionIdOlder(h->shared_oldest_nonremovable, h->slot_xmin);
    1930          204530 :     h->data_oldest_nonremovable =
    1931          204530 :         TransactionIdOlder(h->data_oldest_nonremovable, h->slot_xmin);
    1932                 : 
    1933                 :     /*
    1934                 :      * The only difference between catalog / data horizons is that the slot's
    1935                 :      * catalog xmin is applied to the catalog one (so catalogs can be accessed
    1936 ECB             :      * for logical decoding). Initialize with data horizon, and then back up
    1937                 :      * further if necessary. Have to back up the shared horizon as well, since
    1938                 :      * that also can contain catalogs.
    1939                 :      */
    1940 CBC      204530 :     h->shared_oldest_nonremovable_raw = h->shared_oldest_nonremovable;
    1941          204530 :     h->shared_oldest_nonremovable =
    1942          204530 :         TransactionIdOlder(h->shared_oldest_nonremovable,
    1943                 :                            h->slot_catalog_xmin);
    1944 GIC      204530 :     h->catalog_oldest_nonremovable = h->data_oldest_nonremovable;
    1945          204530 :     h->catalog_oldest_nonremovable =
    1946          204530 :         TransactionIdOlder(h->catalog_oldest_nonremovable,
    1947                 :                            h->slot_catalog_xmin);
    1948                 : 
    1949 ECB             :     /*
    1950                 :      * It's possible that slots / vacuum_defer_cleanup_age backed up the
    1951                 :      * horizons further than oldest_considered_running. Fix.
    1952                 :      */
    1953 CBC      204530 :     h->oldest_considered_running =
    1954 GIC      204530 :         TransactionIdOlder(h->oldest_considered_running,
    1955 ECB             :                            h->shared_oldest_nonremovable);
    1956 CBC      204530 :     h->oldest_considered_running =
    1957 GIC      204530 :         TransactionIdOlder(h->oldest_considered_running,
    1958                 :                            h->catalog_oldest_nonremovable);
    1959          204530 :     h->oldest_considered_running =
    1960          204530 :         TransactionIdOlder(h->oldest_considered_running,
    1961                 :                            h->data_oldest_nonremovable);
    1962                 : 
    1963 ECB             :     /*
    1964                 :      * shared horizons have to be at least as old as the oldest visible in
    1965                 :      * current db
    1966                 :      */
    1967 GIC      204530 :     Assert(TransactionIdPrecedesOrEquals(h->shared_oldest_nonremovable,
    1968                 :                                          h->data_oldest_nonremovable));
    1969          204530 :     Assert(TransactionIdPrecedesOrEquals(h->shared_oldest_nonremovable,
    1970                 :                                          h->catalog_oldest_nonremovable));
    1971                 : 
    1972 ECB             :     /*
    1973                 :      * Horizons need to ensure that pg_subtrans access is still possible for
    1974                 :      * the relevant backends.
    1975                 :      */
    1976 CBC      204530 :     Assert(TransactionIdPrecedesOrEquals(h->oldest_considered_running,
    1977                 :                                          h->shared_oldest_nonremovable));
    1978          204530 :     Assert(TransactionIdPrecedesOrEquals(h->oldest_considered_running,
    1979                 :                                          h->catalog_oldest_nonremovable));
    1980          204530 :     Assert(TransactionIdPrecedesOrEquals(h->oldest_considered_running,
    1981                 :                                          h->data_oldest_nonremovable));
    1982 GIC      204530 :     Assert(TransactionIdPrecedesOrEquals(h->oldest_considered_running,
    1983 ECB             :                                          h->temp_oldest_nonremovable));
    1984 GIC      204530 :     Assert(!TransactionIdIsValid(h->slot_xmin) ||
    1985                 :            TransactionIdPrecedesOrEquals(h->oldest_considered_running,
    1986                 :                                          h->slot_xmin));
    1987          204530 :     Assert(!TransactionIdIsValid(h->slot_catalog_xmin) ||
    1988 ECB             :            TransactionIdPrecedesOrEquals(h->oldest_considered_running,
    1989                 :                                          h->slot_catalog_xmin));
    1990                 : 
    1991                 :     /* update approximate horizons with the computed horizons */
    1992 GIC      204530 :     GlobalVisUpdateApply(h);
    1993          204530 : }
    1994                 : 
    1995                 : /*
    1996 ECB             :  * Determine what kind of visibility horizon needs to be used for a
    1997                 :  * relation. If rel is NULL, the most conservative horizon is used.
    1998                 :  */
    1999                 : static inline GlobalVisHorizonKind
    2000 GIC    14117213 : GlobalVisHorizonKindForRel(Relation rel)
    2001                 : {
    2002 ECB             :     /*
    2003                 :      * Other relkkinds currently don't contain xids, nor always the necessary
    2004                 :      * logical decoding markers.
    2005                 :      */
    2006 GIC    14117213 :     Assert(!rel ||
    2007 ECB             :            rel->rd_rel->relkind == RELKIND_RELATION ||
    2008                 :            rel->rd_rel->relkind == RELKIND_MATVIEW ||
    2009                 :            rel->rd_rel->relkind == RELKIND_TOASTVALUE);
    2010                 : 
    2011 CBC    14117213 :     if (rel == NULL || rel->rd_rel->relisshared || RecoveryInProgress())
    2012           68019 :         return VISHORIZON_SHARED;
    2013        14049194 :     else if (IsCatalogRelation(rel) ||
    2014 GIC    11358891 :              RelationIsAccessibleInLogicalDecoding(rel))
    2015 CBC     2690307 :         return VISHORIZON_CATALOG;
    2016 GIC    11358887 :     else if (!RELATION_IS_LOCAL(rel))
    2017        11305763 :         return VISHORIZON_DATA;
    2018                 :     else
    2019           53124 :         return VISHORIZON_TEMP;
    2020                 : }
    2021                 : 
    2022                 : /*
    2023                 :  * Return the oldest XID for which deleted tuples must be preserved in the
    2024                 :  * passed table.
    2025                 :  *
    2026                 :  * If rel is not NULL the horizon may be considerably more recent than
    2027                 :  * otherwise (i.e. fewer tuples will be removable). In the NULL case a horizon
    2028                 :  * that is correct (but not optimal) for all relations will be returned.
    2029                 :  *
    2030 ECB             :  * This is used by VACUUM to decide which deleted tuples must be preserved in
    2031                 :  * the passed in table.
    2032                 :  */
    2033                 : TransactionId
    2034 CBC       80228 : GetOldestNonRemovableTransactionId(Relation rel)
    2035                 : {
    2036 ECB             :     ComputeXidHorizonsResult horizons;
    2037                 : 
    2038 CBC       80228 :     ComputeXidHorizons(&horizons);
    2039 ECB             : 
    2040 CBC       80228 :     switch (GlobalVisHorizonKindForRel(rel))
    2041 ECB             :     {
    2042 CBC       12107 :         case VISHORIZON_SHARED:
    2043           12107 :             return horizons.shared_oldest_nonremovable;
    2044           43630 :         case VISHORIZON_CATALOG:
    2045           43630 :             return horizons.catalog_oldest_nonremovable;
    2046 GIC       13134 :         case VISHORIZON_DATA:
    2047           13134 :             return horizons.data_oldest_nonremovable;
    2048           11357 :         case VISHORIZON_TEMP:
    2049 GBC       11357 :             return horizons.temp_oldest_nonremovable;
    2050                 :     }
    2051                 : 
    2052                 :     /* just to prevent compiler warnings */
    2053 UIC           0 :     return InvalidTransactionId;
    2054                 : }
    2055                 : 
    2056                 : /*
    2057                 :  * Return the oldest transaction id any currently running backend might still
    2058                 :  * consider running. This should not be used for visibility / pruning
    2059 ECB             :  * determinations (see GetOldestNonRemovableTransactionId()), but for
    2060                 :  * decisions like up to where pg_subtrans can be truncated.
    2061                 :  */
    2062                 : TransactionId
    2063 CBC        2336 : GetOldestTransactionIdConsideredRunning(void)
    2064                 : {
    2065 ECB             :     ComputeXidHorizonsResult horizons;
    2066                 : 
    2067 GIC        2336 :     ComputeXidHorizons(&horizons);
    2068                 : 
    2069            2336 :     return horizons.oldest_considered_running;
    2070                 : }
    2071                 : 
    2072 ECB             : /*
    2073                 :  * Return the visibility horizons for a hot standby feedback message.
    2074                 :  */
    2075                 : void
    2076 CBC          21 : GetReplicationHorizons(TransactionId *xmin, TransactionId *catalog_xmin)
    2077                 : {
    2078                 :     ComputeXidHorizonsResult horizons;
    2079                 : 
    2080 GIC          21 :     ComputeXidHorizons(&horizons);
    2081                 : 
    2082                 :     /*
    2083                 :      * Don't want to use shared_oldest_nonremovable here, as that contains the
    2084 ECB             :      * effect of replication slot's catalog_xmin. We want to send a separate
    2085                 :      * feedback for the catalog horizon, so the primary can remove data table
    2086                 :      * contents more aggressively.
    2087                 :      */
    2088 GIC          21 :     *xmin = horizons.shared_oldest_nonremovable_raw;
    2089              21 :     *catalog_xmin = horizons.slot_catalog_xmin;
    2090              21 : }
    2091                 : 
    2092                 : /*
    2093                 :  * GetMaxSnapshotXidCount -- get max size for snapshot XID array
    2094 ECB             :  *
    2095                 :  * We have to export this for use by snapmgr.c.
    2096                 :  */
    2097                 : int
    2098 GIC       25076 : GetMaxSnapshotXidCount(void)
    2099                 : {
    2100           25076 :     return procArray->maxProcs;
    2101                 : }
    2102                 : 
    2103                 : /*
    2104                 :  * GetMaxSnapshotSubxidCount -- get max size for snapshot sub-XID array
    2105 ECB             :  *
    2106                 :  * We have to export this for use by snapmgr.c.
    2107                 :  */
    2108                 : int
    2109 GIC       24930 : GetMaxSnapshotSubxidCount(void)
    2110                 : {
    2111           24930 :     return TOTAL_MAX_CACHED_SUBXIDS;
    2112                 : }
    2113                 : 
    2114 ECB             : /*
    2115                 :  * Initialize old_snapshot_threshold specific parts of a newly build snapshot.
    2116                 :  */
    2117                 : static void
    2118 GIC     2597176 : GetSnapshotDataInitOldSnapshot(Snapshot snapshot)
    2119                 : {
    2120         2597176 :     if (!OldSnapshotThresholdActive())
    2121                 :     {
    2122 ECB             :         /*
    2123                 :          * If not using "snapshot too old" feature, fill related fields with
    2124                 :          * dummy values that don't require any locking.
    2125                 :          */
    2126 GIC     2593616 :         snapshot->lsn = InvalidXLogRecPtr;
    2127         2593616 :         snapshot->whenTaken = 0;
    2128                 :     }
    2129                 :     else
    2130                 :     {
    2131                 :         /*
    2132 ECB             :          * Capture the current time and WAL stream location in case this
    2133                 :          * snapshot becomes old enough to need to fall back on the special
    2134                 :          * "old snapshot" logic.
    2135                 :          */
    2136 CBC        3560 :         snapshot->lsn = GetXLogInsertRecPtr();
    2137 GIC        3560 :         snapshot->whenTaken = GetSnapshotCurrentTimestamp();
    2138            3560 :         MaintainOldSnapshotTimeMapping(snapshot->whenTaken, snapshot->xmin);
    2139                 :     }
    2140         2597176 : }
    2141                 : 
    2142                 : /*
    2143                 :  * Helper function for GetSnapshotData() that checks if the bulk of the
    2144                 :  * visibility information in the snapshot is still valid. If so, it updates
    2145                 :  * the fields that need to change and returns true. Otherwise it returns
    2146                 :  * false.
    2147                 :  *
    2148 ECB             :  * This very likely can be evolved to not need ProcArrayLock held (at very
    2149                 :  * least in the case we already hold a snapshot), but that's for another day.
    2150                 :  */
    2151                 : static bool
    2152 CBC     2597176 : GetSnapshotDataReuse(Snapshot snapshot)
    2153                 : {
    2154 ECB             :     uint64      curXactCompletionCount;
    2155                 : 
    2156 GIC     2597176 :     Assert(LWLockHeldByMe(ProcArrayLock));
    2157 ECB             : 
    2158 CBC     2597176 :     if (unlikely(snapshot->snapXactCompletionCount == 0))
    2159           23627 :         return false;
    2160                 : 
    2161 GIC     2573549 :     curXactCompletionCount = ShmemVariableCache->xactCompletionCount;
    2162         2573549 :     if (curXactCompletionCount != snapshot->snapXactCompletionCount)
    2163          672660 :         return false;
    2164                 : 
    2165                 :     /*
    2166                 :      * If the current xactCompletionCount is still the same as it was at the
    2167                 :      * time the snapshot was built, we can be sure that rebuilding the
    2168                 :      * contents of the snapshot the hard way would result in the same snapshot
    2169                 :      * contents:
    2170                 :      *
    2171                 :      * As explained in transam/README, the set of xids considered running by
    2172                 :      * GetSnapshotData() cannot change while ProcArrayLock is held. Snapshot
    2173                 :      * contents only depend on transactions with xids and xactCompletionCount
    2174                 :      * is incremented whenever a transaction with an xid finishes (while
    2175                 :      * holding ProcArrayLock) exclusively). Thus the xactCompletionCount check
    2176                 :      * ensures we would detect if the snapshot would have changed.
    2177                 :      *
    2178                 :      * As the snapshot contents are the same as it was before, it is safe to
    2179                 :      * re-enter the snapshot's xmin into the PGPROC array. None of the rows
    2180                 :      * visible under the snapshot could already have been removed (that'd
    2181 ECB             :      * require the set of running transactions to change) and it fulfills the
    2182                 :      * requirement that concurrent GetSnapshotData() calls yield the same
    2183                 :      * xmin.
    2184                 :      */
    2185 CBC     1900889 :     if (!TransactionIdIsValid(MyProc->xmin))
    2186 GIC      581099 :         MyProc->xmin = TransactionXmin = snapshot->xmin;
    2187 ECB             : 
    2188 CBC     1900889 :     RecentXmin = snapshot->xmin;
    2189         1900889 :     Assert(TransactionIdPrecedesOrEquals(TransactionXmin, RecentXmin));
    2190 ECB             : 
    2191 GIC     1900889 :     snapshot->curcid = GetCurrentCommandId(false);
    2192 CBC     1900889 :     snapshot->active_count = 0;
    2193 GIC     1900889 :     snapshot->regd_count = 0;
    2194 CBC     1900889 :     snapshot->copied = false;
    2195                 : 
    2196 GIC     1900889 :     GetSnapshotDataInitOldSnapshot(snapshot);
    2197                 : 
    2198         1900889 :     return true;
    2199                 : }
    2200                 : 
    2201                 : /*
    2202                 :  * GetSnapshotData -- returns information about running transactions.
    2203                 :  *
    2204                 :  * The returned snapshot includes xmin (lowest still-running xact ID),
    2205                 :  * xmax (highest completed xact ID + 1), and a list of running xact IDs
    2206                 :  * in the range xmin <= xid < xmax.  It is used as follows:
    2207                 :  *      All xact IDs < xmin are considered finished.
    2208                 :  *      All xact IDs >= xmax are considered still running.
    2209                 :  *      For an xact ID xmin <= xid < xmax, consult list to see whether
    2210                 :  *      it is considered running or not.
    2211                 :  * This ensures that the set of transactions seen as "running" by the
    2212                 :  * current xact will not change after it takes the snapshot.
    2213                 :  *
    2214                 :  * All running top-level XIDs are included in the snapshot, except for lazy
    2215                 :  * VACUUM processes.  We also try to include running subtransaction XIDs,
    2216                 :  * but since PGPROC has only a limited cache area for subxact XIDs, full
    2217                 :  * information may not be available.  If we find any overflowed subxid arrays,
    2218                 :  * we have to mark the snapshot's subxid data as overflowed, and extra work
    2219                 :  * *may* need to be done to determine what's running (see XidInMVCCSnapshot()
    2220                 :  * in heapam_visibility.c).
    2221                 :  *
    2222                 :  * We also update the following backend-global variables:
    2223                 :  *      TransactionXmin: the oldest xmin of any snapshot in use in the
    2224                 :  *          current transaction (this is the same as MyProc->xmin).
    2225                 :  *      RecentXmin: the xmin computed for the most recent snapshot.  XIDs
    2226                 :  *          older than this are known not running any more.
    2227                 :  *
    2228                 :  * And try to advance the bounds of GlobalVis{Shared,Catalog,Data,Temp}Rels
    2229                 :  * for the benefit of the GlobalVisTest* family of functions.
    2230                 :  *
    2231 ECB             :  * Note: this function should probably not be called with an argument that's
    2232                 :  * not statically allocated (see xip allocation below).
    2233                 :  */
    2234                 : Snapshot
    2235 GIC     2597176 : GetSnapshotData(Snapshot snapshot)
    2236                 : {
    2237 CBC     2597176 :     ProcArrayStruct *arrayP = procArray;
    2238         2597176 :     TransactionId *other_xids = ProcGlobal->xids;
    2239 ECB             :     TransactionId xmin;
    2240                 :     TransactionId xmax;
    2241 GIC     2597176 :     int         count = 0;
    2242         2597176 :     int         subcount = 0;
    2243         2597176 :     bool        suboverflowed = false;
    2244                 :     FullTransactionId latest_completed;
    2245                 :     TransactionId oldestxid;
    2246 ECB             :     int         mypgxactoff;
    2247                 :     TransactionId myxid;
    2248                 :     uint64      curXactCompletionCount;
    2249                 : 
    2250 GIC     2597176 :     TransactionId replication_slot_xmin = InvalidTransactionId;
    2251         2597176 :     TransactionId replication_slot_catalog_xmin = InvalidTransactionId;
    2252                 : 
    2253         2597176 :     Assert(snapshot != NULL);
    2254                 : 
    2255                 :     /*
    2256                 :      * Allocating space for maxProcs xids is usually overkill; numProcs would
    2257                 :      * be sufficient.  But it seems better to do the malloc while not holding
    2258                 :      * the lock, so we can't look at numProcs.  Likewise, we allocate much
    2259                 :      * more subxip storage than is probably needed.
    2260                 :      *
    2261                 :      * This does open a possibility for avoiding repeated malloc/free: since
    2262 ECB             :      * maxProcs does not change at runtime, we can simply reuse the previous
    2263                 :      * xip arrays if any.  (This relies on the fact that all callers pass
    2264                 :      * static SnapshotData structs.)
    2265                 :      */
    2266 GIC     2597176 :     if (snapshot->xip == NULL)
    2267                 :     {
    2268 ECB             :         /*
    2269                 :          * First call for this snapshot. Snapshot is same size whether or not
    2270                 :          * we are in recovery, see later comments.
    2271 EUB             :          */
    2272 GIC       23432 :         snapshot->xip = (TransactionId *)
    2273           23432 :             malloc(GetMaxSnapshotXidCount() * sizeof(TransactionId));
    2274 CBC       23432 :         if (snapshot->xip == NULL)
    2275 LBC           0 :             ereport(ERROR,
    2276 ECB             :                     (errcode(ERRCODE_OUT_OF_MEMORY),
    2277                 :                      errmsg("out of memory")));
    2278 GBC       23432 :         Assert(snapshot->subxip == NULL);
    2279 GIC       23432 :         snapshot->subxip = (TransactionId *)
    2280           23432 :             malloc(GetMaxSnapshotSubxidCount() * sizeof(TransactionId));
    2281           23432 :         if (snapshot->subxip == NULL)
    2282 UIC           0 :             ereport(ERROR,
    2283                 :                     (errcode(ERRCODE_OUT_OF_MEMORY),
    2284                 :                      errmsg("out of memory")));
    2285                 :     }
    2286                 : 
    2287 ECB             :     /*
    2288                 :      * It is sufficient to get shared lock on ProcArrayLock, even if we are
    2289                 :      * going to set MyProc->xmin.
    2290                 :      */
    2291 CBC     2597176 :     LWLockAcquire(ProcArrayLock, LW_SHARED);
    2292 ECB             : 
    2293 GIC     2597176 :     if (GetSnapshotDataReuse(snapshot))
    2294                 :     {
    2295 CBC     1900889 :         LWLockRelease(ProcArrayLock);
    2296         1900889 :         return snapshot;
    2297 ECB             :     }
    2298                 : 
    2299 GIC      696287 :     latest_completed = ShmemVariableCache->latestCompletedXid;
    2300 CBC      696287 :     mypgxactoff = MyProc->pgxactoff;
    2301          696287 :     myxid = other_xids[mypgxactoff];
    2302 GIC      696287 :     Assert(myxid == MyProc->xid);
    2303                 : 
    2304 CBC      696287 :     oldestxid = ShmemVariableCache->oldestXid;
    2305          696287 :     curXactCompletionCount = ShmemVariableCache->xactCompletionCount;
    2306 ECB             : 
    2307                 :     /* xmax is always latestCompletedXid + 1 */
    2308 GIC      696287 :     xmax = XidFromFullTransactionId(latest_completed);
    2309 CBC      696287 :     TransactionIdAdvance(xmax);
    2310 GIC      696287 :     Assert(TransactionIdIsNormal(xmax));
    2311                 : 
    2312 ECB             :     /* initialize xmin calculation with xmax */
    2313 CBC      696287 :     xmin = xmax;
    2314                 : 
    2315 ECB             :     /* take own xid into account, saves a check inside the loop */
    2316 GIC      696287 :     if (TransactionIdIsNormal(myxid) && NormalTransactionIdPrecedes(myxid, xmin))
    2317 CBC       22367 :         xmin = myxid;
    2318                 : 
    2319          696287 :     snapshot->takenDuringRecovery = RecoveryInProgress();
    2320 ECB             : 
    2321 CBC      696287 :     if (!snapshot->takenDuringRecovery)
    2322 ECB             :     {
    2323 CBC      695461 :         int         numProcs = arrayP->numProcs;
    2324 GIC      695461 :         TransactionId *xip = snapshot->xip;
    2325          695461 :         int        *pgprocnos = arrayP->pgprocnos;
    2326          695461 :         XidCacheStatus *subxidStates = ProcGlobal->subxidStates;
    2327          695461 :         uint8      *allStatusFlags = ProcGlobal->statusFlags;
    2328                 : 
    2329 ECB             :         /*
    2330                 :          * First collect set of pgxactoff/xids that need to be included in the
    2331                 :          * snapshot.
    2332                 :          */
    2333 GIC     3559582 :         for (int pgxactoff = 0; pgxactoff < numProcs; pgxactoff++)
    2334                 :         {
    2335 ECB             :             /* Fetch xid just once - see GetNewTransactionId */
    2336 GIC     2864121 :             TransactionId xid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]);
    2337                 :             uint8       statusFlags;
    2338                 : 
    2339         2864121 :             Assert(allProcs[arrayP->pgprocnos[pgxactoff]].pgxactoff == pgxactoff);
    2340                 : 
    2341 ECB             :             /*
    2342                 :              * If the transaction has no XID assigned, we can skip it; it
    2343                 :              * won't have sub-XIDs either.
    2344                 :              */
    2345 GIC     2864121 :             if (likely(xid == InvalidTransactionId))
    2346         2324866 :                 continue;
    2347                 : 
    2348                 :             /*
    2349 ECB             :              * We don't include our own XIDs (if any) in the snapshot. It
    2350                 :              * needs to be included in the xmin computation, but we did so
    2351                 :              * outside the loop.
    2352                 :              */
    2353 GIC      539255 :             if (pgxactoff == mypgxactoff)
    2354           70899 :                 continue;
    2355                 : 
    2356                 :             /*
    2357                 :              * The only way we are able to get here with a non-normal xid is
    2358 ECB             :              * during bootstrap - with this backend using
    2359                 :              * BootstrapTransactionId. But the above test should filter that
    2360                 :              * out.
    2361                 :              */
    2362 GIC      468356 :             Assert(TransactionIdIsNormal(xid));
    2363                 : 
    2364                 :             /*
    2365 ECB             :              * If the XID is >= xmax, we can skip it; such transactions will
    2366                 :              * be treated as running anyway (and any sub-XIDs will also be >=
    2367                 :              * xmax).
    2368                 :              */
    2369 GIC      468356 :             if (!NormalTransactionIdPrecedes(xid, xmax))
    2370           93952 :                 continue;
    2371                 : 
    2372 ECB             :             /*
    2373                 :              * Skip over backends doing logical decoding which manages xmin
    2374 EUB             :              * separately (check below) and ones running LAZY VACUUM.
    2375                 :              */
    2376 CBC      374404 :             statusFlags = allStatusFlags[pgxactoff];
    2377          374404 :             if (statusFlags & (PROC_IN_LOGICAL_DECODING | PROC_IN_VACUUM))
    2378 UIC           0 :                 continue;
    2379                 : 
    2380 CBC      374404 :             if (NormalTransactionIdPrecedes(xid, xmin))
    2381 GIC      216005 :                 xmin = xid;
    2382                 : 
    2383                 :             /* Add XID to snapshot. */
    2384          374404 :             xip[count++] = xid;
    2385                 : 
    2386                 :             /*
    2387                 :              * Save subtransaction XIDs if possible (if we've already
    2388                 :              * overflowed, there's no point).  Note that the subxact XIDs must
    2389                 :              * be later than their parent, so no need to check them against
    2390                 :              * xmin.  We could filter against xmax, but it seems better not to
    2391                 :              * do that much work while holding the ProcArrayLock.
    2392                 :              *
    2393                 :              * The other backend can add more subxids concurrently, but cannot
    2394                 :              * remove any.  Hence it's important to fetch nxids just once.
    2395                 :              * Should be safe to use memcpy, though.  (We needn't worry about
    2396                 :              * missing any xids added concurrently, because they must postdate
    2397 ECB             :              * xmax.)
    2398                 :              *
    2399                 :              * Again, our own XIDs are not included in the snapshot.
    2400                 :              */
    2401 CBC      374404 :             if (!suboverflowed)
    2402                 :             {
    2403                 : 
    2404          374404 :                 if (subxidStates[pgxactoff].overflowed)
    2405 GIC          28 :                     suboverflowed = true;
    2406 ECB             :                 else
    2407                 :                 {
    2408 CBC      374376 :                     int         nsubxids = subxidStates[pgxactoff].count;
    2409 ECB             : 
    2410 GIC      374376 :                     if (nsubxids > 0)
    2411 ECB             :                     {
    2412 GIC        4762 :                         int         pgprocno = pgprocnos[pgxactoff];
    2413 CBC        4762 :                         PGPROC     *proc = &allProcs[pgprocno];
    2414 ECB             : 
    2415 GIC        4762 :                         pg_read_barrier();  /* pairs with GetNewTransactionId */
    2416 ECB             : 
    2417 GIC        4762 :                         memcpy(snapshot->subxip + subcount,
    2418 GNC        4762 :                                proc->subxids.xids,
    2419                 :                                nsubxids * sizeof(TransactionId));
    2420 GIC        4762 :                         subcount += nsubxids;
    2421                 :                     }
    2422                 :                 }
    2423                 :             }
    2424                 :         }
    2425                 :     }
    2426                 :     else
    2427                 :     {
    2428                 :         /*
    2429                 :          * We're in hot standby, so get XIDs from KnownAssignedXids.
    2430                 :          *
    2431                 :          * We store all xids directly into subxip[]. Here's why:
    2432                 :          *
    2433                 :          * In recovery we don't know which xids are top-level and which are
    2434                 :          * subxacts, a design choice that greatly simplifies xid processing.
    2435                 :          *
    2436                 :          * It seems like we would want to try to put xids into xip[] only, but
    2437                 :          * that is fairly small. We would either need to make that bigger or
    2438                 :          * to increase the rate at which we WAL-log xid assignment; neither is
    2439                 :          * an appealing choice.
    2440                 :          *
    2441                 :          * We could try to store xids into xip[] first and then into subxip[]
    2442                 :          * if there are too many xids. That only works if the snapshot doesn't
    2443                 :          * overflow because we do not search subxip[] in that case. A simpler
    2444                 :          * way is to just store all xids in the subxip array because this is
    2445                 :          * by far the bigger array. We just leave the xip array empty.
    2446                 :          *
    2447                 :          * Either way we need to change the way XidInMVCCSnapshot() works
    2448                 :          * depending upon when the snapshot was taken, or change normal
    2449                 :          * snapshot processing so it matches.
    2450                 :          *
    2451                 :          * Note: It is possible for recovery to end before we finish taking
    2452                 :          * the snapshot, and for newly assigned transaction ids to be added to
    2453 ECB             :          * the ProcArray.  xmax cannot change while we hold ProcArrayLock, so
    2454                 :          * those newly added transaction ids would be filtered away, so we
    2455                 :          * need not be concerned about them.
    2456                 :          */
    2457 CBC         826 :         subcount = KnownAssignedXidsGetAndSetXmin(snapshot->subxip, &xmin,
    2458                 :                                                   xmax);
    2459                 : 
    2460 GIC         826 :         if (TransactionIdPrecedesOrEquals(xmin, procArray->lastOverflowedXid))
    2461               4 :             suboverflowed = true;
    2462                 :     }
    2463                 : 
    2464                 : 
    2465                 :     /*
    2466 ECB             :      * Fetch into local variable while ProcArrayLock is held - the
    2467                 :      * LWLockRelease below is a barrier, ensuring this happens inside the
    2468                 :      * lock.
    2469                 :      */
    2470 CBC      696287 :     replication_slot_xmin = procArray->replication_slot_xmin;
    2471 GIC      696287 :     replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin;
    2472 ECB             : 
    2473 GIC      696287 :     if (!TransactionIdIsValid(MyProc->xmin))
    2474          349709 :         MyProc->xmin = TransactionXmin = xmin;
    2475                 : 
    2476          696287 :     LWLockRelease(ProcArrayLock);
    2477                 : 
    2478                 :     /* maintain state for GlobalVis* */
    2479                 :     {
    2480                 :         TransactionId def_vis_xid;
    2481                 :         TransactionId def_vis_xid_data;
    2482                 :         FullTransactionId def_vis_fxid;
    2483                 :         FullTransactionId def_vis_fxid_data;
    2484                 :         FullTransactionId oldestfxid;
    2485                 : 
    2486                 :         /*
    2487 ECB             :          * Converting oldestXid is only safe when xid horizon cannot advance,
    2488                 :          * i.e. holding locks. While we don't hold the lock anymore, all the
    2489                 :          * necessary data has been gathered with lock held.
    2490                 :          */
    2491 CBC      696287 :         oldestfxid = FullXidRelativeTo(latest_completed, oldestxid);
    2492                 : 
    2493                 :         /* apply vacuum_defer_cleanup_age */
    2494 GIC      696287 :         def_vis_xid_data = xmin;
    2495          696287 :         TransactionIdRetreatSafely(&def_vis_xid_data,
    2496 ECB             :                                    vacuum_defer_cleanup_age,
    2497                 :                                    oldestfxid);
    2498                 : 
    2499                 :         /* Check whether there's a replication slot requiring an older xmin. */
    2500 GIC      696287 :         def_vis_xid_data =
    2501          696287 :             TransactionIdOlder(def_vis_xid_data, replication_slot_xmin);
    2502                 : 
    2503 ECB             :         /*
    2504                 :          * Rows in non-shared, non-catalog tables possibly could be vacuumed
    2505                 :          * if older than this xid.
    2506                 :          */
    2507 GIC      696287 :         def_vis_xid = def_vis_xid_data;
    2508                 : 
    2509                 :         /*
    2510 ECB             :          * Check whether there's a replication slot requiring an older catalog
    2511                 :          * xmin.
    2512                 :          */
    2513                 :         def_vis_xid =
    2514 GIC      696287 :             TransactionIdOlder(replication_slot_catalog_xmin, def_vis_xid);
    2515                 : 
    2516          696287 :         def_vis_fxid = FullXidRelativeTo(latest_completed, def_vis_xid);
    2517          696287 :         def_vis_fxid_data = FullXidRelativeTo(latest_completed, def_vis_xid_data);
    2518                 : 
    2519                 :         /*
    2520                 :          * Check if we can increase upper bound. As a previous
    2521 ECB             :          * GlobalVisUpdate() might have computed more aggressive values, don't
    2522                 :          * overwrite them if so.
    2523                 :          */
    2524                 :         GlobalVisSharedRels.definitely_needed =
    2525 GIC      696287 :             FullTransactionIdNewer(def_vis_fxid,
    2526                 :                                    GlobalVisSharedRels.definitely_needed);
    2527 ECB             :         GlobalVisCatalogRels.definitely_needed =
    2528 GIC      696287 :             FullTransactionIdNewer(def_vis_fxid,
    2529                 :                                    GlobalVisCatalogRels.definitely_needed);
    2530 ECB             :         GlobalVisDataRels.definitely_needed =
    2531 GIC      696287 :             FullTransactionIdNewer(def_vis_fxid_data,
    2532 ECB             :                                    GlobalVisDataRels.definitely_needed);
    2533                 :         /* See temp_oldest_nonremovable computation in ComputeXidHorizons() */
    2534 GIC      696287 :         if (TransactionIdIsNormal(myxid))
    2535 ECB             :             GlobalVisTempRels.definitely_needed =
    2536 CBC       70289 :                 FullXidRelativeTo(latest_completed, myxid);
    2537                 :         else
    2538                 :         {
    2539 GIC      625998 :             GlobalVisTempRels.definitely_needed = latest_completed;
    2540          625998 :             FullTransactionIdAdvance(&GlobalVisTempRels.definitely_needed);
    2541                 :         }
    2542                 : 
    2543                 :         /*
    2544                 :          * Check if we know that we can initialize or increase the lower
    2545                 :          * bound. Currently the only cheap way to do so is to use
    2546                 :          * ShmemVariableCache->oldestXid as input.
    2547                 :          *
    2548 ECB             :          * We should definitely be able to do better. We could e.g. put a
    2549                 :          * global lower bound value into ShmemVariableCache.
    2550                 :          */
    2551                 :         GlobalVisSharedRels.maybe_needed =
    2552 GIC      696287 :             FullTransactionIdNewer(GlobalVisSharedRels.maybe_needed,
    2553                 :                                    oldestfxid);
    2554 ECB             :         GlobalVisCatalogRels.maybe_needed =
    2555 GIC      696287 :             FullTransactionIdNewer(GlobalVisCatalogRels.maybe_needed,
    2556                 :                                    oldestfxid);
    2557 ECB             :         GlobalVisDataRels.maybe_needed =
    2558 GIC      696287 :             FullTransactionIdNewer(GlobalVisDataRels.maybe_needed,
    2559                 :                                    oldestfxid);
    2560 ECB             :         /* accurate value known */
    2561 CBC      696287 :         GlobalVisTempRels.maybe_needed = GlobalVisTempRels.definitely_needed;
    2562                 :     }
    2563 ECB             : 
    2564 CBC      696287 :     RecentXmin = xmin;
    2565          696287 :     Assert(TransactionIdPrecedesOrEquals(TransactionXmin, RecentXmin));
    2566 ECB             : 
    2567 CBC      696287 :     snapshot->xmin = xmin;
    2568          696287 :     snapshot->xmax = xmax;
    2569 GIC      696287 :     snapshot->xcnt = count;
    2570 CBC      696287 :     snapshot->subxcnt = subcount;
    2571 GIC      696287 :     snapshot->suboverflowed = suboverflowed;
    2572          696287 :     snapshot->snapXactCompletionCount = curXactCompletionCount;
    2573                 : 
    2574          696287 :     snapshot->curcid = GetCurrentCommandId(false);
    2575                 : 
    2576 ECB             :     /*
    2577                 :      * This is a new snapshot, so set both refcounts are zero, and mark it as
    2578                 :      * not copied in persistent memory.
    2579                 :      */
    2580 CBC      696287 :     snapshot->active_count = 0;
    2581 GIC      696287 :     snapshot->regd_count = 0;
    2582 CBC      696287 :     snapshot->copied = false;
    2583                 : 
    2584 GIC      696287 :     GetSnapshotDataInitOldSnapshot(snapshot);
    2585                 : 
    2586          696287 :     return snapshot;
    2587                 : }
    2588                 : 
    2589                 : /*
    2590                 :  * ProcArrayInstallImportedXmin -- install imported xmin into MyProc->xmin
    2591                 :  *
    2592                 :  * This is called when installing a snapshot imported from another
    2593                 :  * transaction.  To ensure that OldestXmin doesn't go backwards, we must
    2594                 :  * check that the source transaction is still running, and we'd better do
    2595                 :  * that atomically with installing the new xmin.
    2596 ECB             :  *
    2597                 :  * Returns true if successful, false if source xact is no longer running.
    2598                 :  */
    2599                 : bool
    2600 CBC          18 : ProcArrayInstallImportedXmin(TransactionId xmin,
    2601                 :                              VirtualTransactionId *sourcevxid)
    2602                 : {
    2603              18 :     bool        result = false;
    2604              18 :     ProcArrayStruct *arrayP = procArray;
    2605 EUB             :     int         index;
    2606                 : 
    2607 GIC          18 :     Assert(TransactionIdIsNormal(xmin));
    2608 CBC          18 :     if (!sourcevxid)
    2609 UIC           0 :         return false;
    2610 ECB             : 
    2611                 :     /* Get lock so source xact can't end while we're doing this */
    2612 CBC          18 :     LWLockAcquire(ProcArrayLock, LW_SHARED);
    2613 ECB             : 
    2614 CBC          53 :     for (index = 0; index < arrayP->numProcs; index++)
    2615                 :     {
    2616 GIC          53 :         int         pgprocno = arrayP->pgprocnos[index];
    2617              53 :         PGPROC     *proc = &allProcs[pgprocno];
    2618 CBC          53 :         int         statusFlags = ProcGlobal->statusFlags[index];
    2619 EUB             :         TransactionId xid;
    2620                 : 
    2621                 :         /* Ignore procs running LAZY VACUUM */
    2622 CBC          53 :         if (statusFlags & PROC_IN_VACUUM)
    2623 LBC           0 :             continue;
    2624 ECB             : 
    2625 EUB             :         /* We are only interested in the specific virtual transaction. */
    2626 GIC          53 :         if (proc->backendId != sourcevxid->backendId)
    2627              35 :             continue;
    2628              18 :         if (proc->lxid != sourcevxid->localTransactionId)
    2629 UIC           0 :             continue;
    2630                 : 
    2631                 :         /*
    2632                 :          * We check the transaction's database ID for paranoia's sake: if it's
    2633 ECB             :          * in another DB then its xmin does not cover us.  Caller should have
    2634 EUB             :          * detected this already, so we just treat any funny cases as
    2635                 :          * "transaction not found".
    2636                 :          */
    2637 GIC          18 :         if (proc->databaseId != MyDatabaseId)
    2638 UIC           0 :             continue;
    2639 ECB             : 
    2640                 :         /*
    2641                 :          * Likewise, let's just make real sure its xmin does cover us.
    2642 EUB             :          */
    2643 GIC          18 :         xid = UINT32_ACCESS_ONCE(proc->xmin);
    2644              18 :         if (!TransactionIdIsNormal(xid) ||
    2645              18 :             !TransactionIdPrecedesOrEquals(xid, xmin))
    2646 UIC           0 :             continue;
    2647                 : 
    2648                 :         /*
    2649                 :          * We're good.  Install the new xmin.  As in GetSnapshotData, set
    2650 ECB             :          * TransactionXmin too.  (Note that because snapmgr.c called
    2651                 :          * GetSnapshotData first, we'll be overwriting a valid xmin here, so
    2652                 :          * we don't check that.)
    2653                 :          */
    2654 GIC          18 :         MyProc->xmin = TransactionXmin = xmin;
    2655                 : 
    2656 CBC          18 :         result = true;
    2657 GIC          18 :         break;
    2658 ECB             :     }
    2659                 : 
    2660 GIC          18 :     LWLockRelease(ProcArrayLock);
    2661                 : 
    2662              18 :     return result;
    2663                 : }
    2664                 : 
    2665                 : /*
    2666                 :  * ProcArrayInstallRestoredXmin -- install restored xmin into MyProc->xmin
    2667                 :  *
    2668                 :  * This is like ProcArrayInstallImportedXmin, but we have a pointer to the
    2669                 :  * PGPROC of the transaction from which we imported the snapshot, rather than
    2670                 :  * an XID.
    2671                 :  *
    2672                 :  * Note that this function also copies statusFlags from the source `proc` in
    2673                 :  * order to avoid the case where MyProc's xmin needs to be skipped for
    2674                 :  * computing xid horizon.
    2675 ECB             :  *
    2676                 :  * Returns true if successful, false if source xact is no longer running.
    2677                 :  */
    2678                 : bool
    2679 GIC        1453 : ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc)
    2680 ECB             : {
    2681 CBC        1453 :     bool        result = false;
    2682                 :     TransactionId xid;
    2683                 : 
    2684 GIC        1453 :     Assert(TransactionIdIsNormal(xmin));
    2685            1453 :     Assert(proc != NULL);
    2686 ECB             : 
    2687                 :     /*
    2688                 :      * Get an exclusive lock so that we can copy statusFlags from source proc.
    2689                 :      */
    2690 GIC        1453 :     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
    2691                 : 
    2692                 :     /*
    2693                 :      * Be certain that the referenced PGPROC has an advertised xmin which is
    2694 ECB             :      * no later than the one we're installing, so that the system-wide xmin
    2695                 :      * can't go backwards.  Also, make sure it's running in the same database,
    2696                 :      * so that the per-database xmin cannot go backwards.
    2697                 :      */
    2698 GIC        1453 :     xid = UINT32_ACCESS_ONCE(proc->xmin);
    2699            1453 :     if (proc->databaseId == MyDatabaseId &&
    2700            1453 :         TransactionIdIsNormal(xid) &&
    2701            1453 :         TransactionIdPrecedesOrEquals(xid, xmin))
    2702                 :     {
    2703 ECB             :         /*
    2704                 :          * Install xmin and propagate the statusFlags that affect how the
    2705                 :          * value is interpreted by vacuum.
    2706                 :          */
    2707 GIC        1453 :         MyProc->xmin = TransactionXmin = xmin;
    2708 CBC        1453 :         MyProc->statusFlags = (MyProc->statusFlags & ~PROC_XMIN_FLAGS) |
    2709 GIC        1453 :             (proc->statusFlags & PROC_XMIN_FLAGS);
    2710            1453 :         ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
    2711 ECB             : 
    2712 GIC        1453 :         result = true;
    2713 ECB             :     }
    2714                 : 
    2715 GIC        1453 :     LWLockRelease(ProcArrayLock);
    2716                 : 
    2717            1453 :     return result;
    2718                 : }
    2719                 : 
    2720                 : /*
    2721                 :  * GetRunningTransactionData -- returns information about running transactions.
    2722                 :  *
    2723                 :  * Similar to GetSnapshotData but returns more information. We include
    2724                 :  * all PGPROCs with an assigned TransactionId, even VACUUM processes and
    2725                 :  * prepared transactions.
    2726                 :  *
    2727                 :  * We acquire XidGenLock and ProcArrayLock, but the caller is responsible for
    2728                 :  * releasing them. Acquiring XidGenLock ensures that no new XIDs enter the proc
    2729                 :  * array until the caller has WAL-logged this snapshot, and releases the
    2730                 :  * lock. Acquiring ProcArrayLock ensures that no transactions commit until the
    2731                 :  * lock is released.
    2732                 :  *
    2733                 :  * The returned data structure is statically allocated; caller should not
    2734                 :  * modify it, and must not assume it is valid past the next call.
    2735                 :  *
    2736                 :  * This is never executed during recovery so there is no need to look at
    2737                 :  * KnownAssignedXids.
    2738                 :  *
    2739                 :  * Dummy PGPROCs from prepared transaction are included, meaning that this
    2740                 :  * may return entries with duplicated TransactionId values coming from
    2741                 :  * transaction finishing to prepare.  Nothing is done about duplicated
    2742                 :  * entries here to not hold on ProcArrayLock more than necessary.
    2743                 :  *
    2744                 :  * We don't worry about updating other counters, we want to keep this as
    2745                 :  * simple as possible and leave GetSnapshotData() as the primary code for
    2746                 :  * that bookkeeping.
    2747                 :  *
    2748 ECB             :  * Note that if any transaction has overflowed its cached subtransactions
    2749                 :  * then there is no real need include any subtransactions.
    2750                 :  */
    2751                 : RunningTransactions
    2752 GIC        1749 : GetRunningTransactionData(void)
    2753 ECB             : {
    2754                 :     /* result workspace */
    2755                 :     static RunningTransactionsData CurrentRunningXactsData;
    2756                 : 
    2757 GIC        1749 :     ProcArrayStruct *arrayP = procArray;
    2758            1749 :     TransactionId *other_xids = ProcGlobal->xids;
    2759            1749 :     RunningTransactions CurrentRunningXacts = &CurrentRunningXactsData;
    2760                 :     TransactionId latestCompletedXid;
    2761                 :     TransactionId oldestRunningXid;
    2762                 :     TransactionId *xids;
    2763                 :     int         index;
    2764 ECB             :     int         count;
    2765                 :     int         subcount;
    2766                 :     bool        suboverflowed;
    2767                 : 
    2768 GIC        1749 :     Assert(!RecoveryInProgress());
    2769                 : 
    2770                 :     /*
    2771                 :      * Allocating space for maxProcs xids is usually overkill; numProcs would
    2772                 :      * be sufficient.  But it seems better to do the malloc while not holding
    2773                 :      * the lock, so we can't look at numProcs.  Likewise, we allocate much
    2774                 :      * more subxip storage than is probably needed.
    2775 ECB             :      *
    2776                 :      * Should only be allocated in bgwriter, since only ever executed during
    2777                 :      * checkpoints.
    2778                 :      */
    2779 GIC        1749 :     if (CurrentRunningXacts->xids == NULL)
    2780 ECB             :     {
    2781                 :         /*
    2782                 :          * First call
    2783 EUB             :          */
    2784 GIC         667 :         CurrentRunningXacts->xids = (TransactionId *)
    2785             667 :             malloc(TOTAL_MAX_CACHED_SUBXIDS * sizeof(TransactionId));
    2786             667 :         if (CurrentRunningXacts->xids == NULL)
    2787 UIC           0 :             ereport(ERROR,
    2788 ECB             :                     (errcode(ERRCODE_OUT_OF_MEMORY),
    2789                 :                      errmsg("out of memory")));
    2790                 :     }
    2791                 : 
    2792 GIC        1749 :     xids = CurrentRunningXacts->xids;
    2793                 : 
    2794            1749 :     count = subcount = 0;
    2795            1749 :     suboverflowed = false;
    2796                 : 
    2797 ECB             :     /*
    2798                 :      * Ensure that no xids enter or leave the procarray while we obtain
    2799                 :      * snapshot.
    2800                 :      */
    2801 CBC        1749 :     LWLockAcquire(ProcArrayLock, LW_SHARED);
    2802            1749 :     LWLockAcquire(XidGenLock, LW_SHARED);
    2803 ECB             : 
    2804 GIC        1749 :     latestCompletedXid =
    2805            1749 :         XidFromFullTransactionId(ShmemVariableCache->latestCompletedXid);
    2806            1749 :     oldestRunningXid =
    2807            1749 :         XidFromFullTransactionId(ShmemVariableCache->nextXid);
    2808 ECB             : 
    2809                 :     /*
    2810                 :      * Spin over procArray collecting all xids
    2811                 :      */
    2812 GIC        5129 :     for (index = 0; index < arrayP->numProcs; index++)
    2813 ECB             :     {
    2814                 :         TransactionId xid;
    2815                 : 
    2816                 :         /* Fetch xid just once - see GetNewTransactionId */
    2817 GIC        3380 :         xid = UINT32_ACCESS_ONCE(other_xids[index]);
    2818                 : 
    2819 ECB             :         /*
    2820                 :          * We don't need to store transactions that don't have a TransactionId
    2821                 :          * yet because they will not show as running on a standby server.
    2822                 :          */
    2823 GIC        3380 :         if (!TransactionIdIsValid(xid))
    2824            2075 :             continue;
    2825                 : 
    2826                 :         /*
    2827 ECB             :          * Be careful not to exclude any xids before calculating the values of
    2828                 :          * oldestRunningXid and suboverflowed, since these are used to clean
    2829                 :          * up transaction information held on standbys.
    2830                 :          */
    2831 CBC        1305 :         if (TransactionIdPrecedes(xid, oldestRunningXid))
    2832 GIC        1295 :             oldestRunningXid = xid;
    2833                 : 
    2834            1305 :         if (ProcGlobal->subxidStates[index].overflowed)
    2835               1 :             suboverflowed = true;
    2836                 : 
    2837                 :         /*
    2838                 :          * If we wished to exclude xids this would be the right place for it.
    2839                 :          * Procs with the PROC_IN_VACUUM flag set don't usually assign xids,
    2840                 :          * but they do during truncation at the end when they get the lock and
    2841 ECB             :          * truncate, so it is not much of a problem to include them if they
    2842                 :          * are seen and it is cleaner to include them.
    2843                 :          */
    2844                 : 
    2845 GIC        1305 :         xids[count++] = xid;
    2846                 :     }
    2847                 : 
    2848 ECB             :     /*
    2849                 :      * Spin over procArray collecting all subxids, but only if there hasn't
    2850                 :      * been a suboverflow.
    2851                 :      */
    2852 CBC        1749 :     if (!suboverflowed)
    2853                 :     {
    2854            1748 :         XidCacheStatus *other_subxidstates = ProcGlobal->subxidStates;
    2855 ECB             : 
    2856 GIC        5126 :         for (index = 0; index < arrayP->numProcs; index++)
    2857                 :         {
    2858            3378 :             int         pgprocno = arrayP->pgprocnos[index];
    2859            3378 :             PGPROC     *proc = &allProcs[pgprocno];
    2860                 :             int         nsubxids;
    2861                 : 
    2862 ECB             :             /*
    2863                 :              * Save subtransaction XIDs. Other backends can't add or remove
    2864                 :              * entries while we're holding XidGenLock.
    2865                 :              */
    2866 CBC        3378 :             nsubxids = other_subxidstates[index].count;
    2867 GIC        3378 :             if (nsubxids > 0)
    2868 ECB             :             {
    2869                 :                 /* barrier not really required, as XidGenLock is held, but ... */
    2870 CBC           7 :                 pg_read_barrier();  /* pairs with GetNewTransactionId */
    2871 ECB             : 
    2872 GNC           7 :                 memcpy(&xids[count], proc->subxids.xids,
    2873                 :                        nsubxids * sizeof(TransactionId));
    2874 GIC           7 :                 count += nsubxids;
    2875               7 :                 subcount += nsubxids;
    2876                 : 
    2877                 :                 /*
    2878                 :                  * Top-level XID of a transaction is always less than any of
    2879                 :                  * its subxids, so we don't need to check if any of the
    2880                 :                  * subxids are smaller than oldestRunningXid
    2881                 :                  */
    2882                 :             }
    2883                 :         }
    2884                 :     }
    2885                 : 
    2886                 :     /*
    2887                 :      * It's important *not* to include the limits set by slots here because
    2888                 :      * snapbuild.c uses oldestRunningXid to manage its xmin horizon. If those
    2889                 :      * were to be included here the initial value could never increase because
    2890                 :      * of a circular dependency where slots only increase their limits when
    2891 ECB             :      * running xacts increases oldestRunningXid and running xacts only
    2892                 :      * increases if slots do.
    2893                 :      */
    2894                 : 
    2895 CBC        1749 :     CurrentRunningXacts->xcnt = count - subcount;
    2896            1749 :     CurrentRunningXacts->subxcnt = subcount;
    2897 GIC        1749 :     CurrentRunningXacts->subxid_overflow = suboverflowed;
    2898 CBC        1749 :     CurrentRunningXacts->nextXid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
    2899            1749 :     CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
    2900            1749 :     CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
    2901                 : 
    2902 GIC        1749 :     Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid));
    2903            1749 :     Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid));
    2904 CBC        1749 :     Assert(TransactionIdIsNormal(CurrentRunningXacts->latestCompletedXid));
    2905                 : 
    2906                 :     /* We don't release the locks here, the caller is responsible for that */
    2907                 : 
    2908 GIC        1749 :     return CurrentRunningXacts;
    2909                 : }
    2910                 : 
    2911                 : /*
    2912                 :  * GetOldestActiveTransactionId()
    2913                 :  *
    2914                 :  * Similar to GetSnapshotData but returns just oldestActiveXid. We include
    2915                 :  * all PGPROCs with an assigned TransactionId, even VACUUM processes.
    2916                 :  * We look at all databases, though there is no need to include WALSender
    2917                 :  * since this has no effect on hot standby conflicts.
    2918                 :  *
    2919                 :  * This is never executed during recovery so there is no need to look at
    2920                 :  * KnownAssignedXids.
    2921                 :  *
    2922                 :  * We don't worry about updating other counters, we want to keep this as
    2923 ECB             :  * simple as possible and leave GetSnapshotData() as the primary code for
    2924                 :  * that bookkeeping.
    2925                 :  */
    2926                 : TransactionId
    2927 GIC        1353 : GetOldestActiveTransactionId(void)
    2928                 : {
    2929            1353 :     ProcArrayStruct *arrayP = procArray;
    2930 CBC        1353 :     TransactionId *other_xids = ProcGlobal->xids;
    2931                 :     TransactionId oldestRunningXid;
    2932                 :     int         index;
    2933                 : 
    2934 GIC        1353 :     Assert(!RecoveryInProgress());
    2935                 : 
    2936                 :     /*
    2937                 :      * Read nextXid, as the upper bound of what's still active.
    2938                 :      *
    2939 ECB             :      * Reading a TransactionId is atomic, but we must grab the lock to make
    2940                 :      * sure that all XIDs < nextXid are already present in the proc array (or
    2941                 :      * have already completed), when we spin over it.
    2942                 :      */
    2943 GIC        1353 :     LWLockAcquire(XidGenLock, LW_SHARED);
    2944            1353 :     oldestRunningXid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
    2945            1353 :     LWLockRelease(XidGenLock);
    2946 ECB             : 
    2947                 :     /*
    2948                 :      * Spin over procArray collecting all xids and subxids.
    2949                 :      */
    2950 GIC        1353 :     LWLockAcquire(ProcArrayLock, LW_SHARED);
    2951            3088 :     for (index = 0; index < arrayP->numProcs; index++)
    2952 ECB             :     {
    2953                 :         TransactionId xid;
    2954                 : 
    2955                 :         /* Fetch xid just once - see GetNewTransactionId */
    2956 GIC        1735 :         xid = UINT32_ACCESS_ONCE(other_xids[index]);
    2957 ECB             : 
    2958 CBC        1735 :         if (!TransactionIdIsNormal(xid))
    2959 GIC         460 :             continue;
    2960                 : 
    2961            1275 :         if (TransactionIdPrecedes(xid, oldestRunningXid))
    2962            1257 :             oldestRunningXid = xid;
    2963                 : 
    2964                 :         /*
    2965                 :          * Top-level XID of a transaction is always less than any of its
    2966 ECB             :          * subxids, so we don't need to check if any of the subxids are
    2967                 :          * smaller than oldestRunningXid
    2968                 :          */
    2969                 :     }
    2970 GIC        1353 :     LWLockRelease(ProcArrayLock);
    2971                 : 
    2972            1353 :     return oldestRunningXid;
    2973                 : }
    2974                 : 
    2975                 : /*
    2976                 :  * GetOldestSafeDecodingTransactionId -- lowest xid not affected by vacuum
    2977                 :  *
    2978                 :  * Returns the oldest xid that we can guarantee not to have been affected by
    2979                 :  * vacuum, i.e. no rows >= that xid have been vacuumed away unless the
    2980                 :  * transaction aborted. Note that the value can (and most of the time will) be
    2981                 :  * much more conservative than what really has been affected by vacuum, but we
    2982                 :  * currently don't have better data available.
    2983                 :  *
    2984                 :  * This is useful to initialize the cutoff xid after which a new changeset
    2985                 :  * extraction replication slot can start decoding changes.
    2986                 :  *
    2987                 :  * Must be called with ProcArrayLock held either shared or exclusively,
    2988 ECB             :  * although most callers will want to use exclusive mode since it is expected
    2989                 :  * that the caller will immediately use the xid to peg the xmin horizon.
    2990                 :  */
    2991                 : TransactionId
    2992 GIC         497 : GetOldestSafeDecodingTransactionId(bool catalogOnly)
    2993 ECB             : {
    2994 GIC         497 :     ProcArrayStruct *arrayP = procArray;
    2995 ECB             :     TransactionId oldestSafeXid;
    2996                 :     int         index;
    2997 GIC         497 :     bool        recovery_in_progress = RecoveryInProgress();
    2998                 : 
    2999             497 :     Assert(LWLockHeldByMe(ProcArrayLock));
    3000                 : 
    3001                 :     /*
    3002                 :      * Acquire XidGenLock, so no transactions can acquire an xid while we're
    3003                 :      * running. If no transaction with xid were running concurrently a new xid
    3004                 :      * could influence the RecentXmin et al.
    3005 ECB             :      *
    3006                 :      * We initialize the computation to nextXid since that's guaranteed to be
    3007                 :      * a safe, albeit pessimal, value.
    3008                 :      */
    3009 GIC         497 :     LWLockAcquire(XidGenLock, LW_SHARED);
    3010             497 :     oldestSafeXid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
    3011                 : 
    3012                 :     /*
    3013                 :      * If there's already a slot pegging the xmin horizon, we can start with
    3014                 :      * that value, it's guaranteed to be safe since it's computed by this
    3015 ECB             :      * routine initially and has been enforced since.  We can always use the
    3016                 :      * slot's general xmin horizon, but the catalog horizon is only usable
    3017                 :      * when only catalog data is going to be looked at.
    3018                 :      */
    3019 GIC         656 :     if (TransactionIdIsValid(procArray->replication_slot_xmin) &&
    3020 CBC         159 :         TransactionIdPrecedes(procArray->replication_slot_xmin,
    3021 ECB             :                               oldestSafeXid))
    3022 CBC           2 :         oldestSafeXid = procArray->replication_slot_xmin;
    3023                 : 
    3024             497 :     if (catalogOnly &&
    3025 GIC         233 :         TransactionIdIsValid(procArray->replication_slot_catalog_xmin) &&
    3026              46 :         TransactionIdPrecedes(procArray->replication_slot_catalog_xmin,
    3027                 :                               oldestSafeXid))
    3028              11 :         oldestSafeXid = procArray->replication_slot_catalog_xmin;
    3029                 : 
    3030                 :     /*
    3031                 :      * If we're not in recovery, we walk over the procarray and collect the
    3032                 :      * lowest xid. Since we're called with ProcArrayLock held and have
    3033                 :      * acquired XidGenLock, no entries can vanish concurrently, since
    3034                 :      * ProcGlobal->xids[i] is only set with XidGenLock held and only cleared
    3035                 :      * with ProcArrayLock held.
    3036                 :      *
    3037                 :      * In recovery we can't lower the safe value besides what we've computed
    3038 ECB             :      * above, so we'll have to wait a bit longer there. We unfortunately can
    3039                 :      * *not* use KnownAssignedXidsGetOldestXmin() since the KnownAssignedXids
    3040                 :      * machinery can miss values and return an older value than is safe.
    3041                 :      */
    3042 GIC         497 :     if (!recovery_in_progress)
    3043                 :     {
    3044             476 :         TransactionId *other_xids = ProcGlobal->xids;
    3045 ECB             : 
    3046                 :         /*
    3047                 :          * Spin over procArray collecting min(ProcGlobal->xids[i])
    3048                 :          */
    3049 GIC        2478 :         for (index = 0; index < arrayP->numProcs; index++)
    3050 ECB             :         {
    3051                 :             TransactionId xid;
    3052                 : 
    3053                 :             /* Fetch xid just once - see GetNewTransactionId */
    3054 GIC        2002 :             xid = UINT32_ACCESS_ONCE(other_xids[index]);
    3055 ECB             : 
    3056 CBC        2002 :             if (!TransactionIdIsNormal(xid))
    3057 GIC        1994 :                 continue;
    3058                 : 
    3059               8 :             if (TransactionIdPrecedes(xid, oldestSafeXid))
    3060 CBC           8 :                 oldestSafeXid = xid;
    3061                 :         }
    3062 ECB             :     }
    3063                 : 
    3064 GIC         497 :     LWLockRelease(XidGenLock);
    3065                 : 
    3066             497 :     return oldestSafeXid;
    3067                 : }
    3068                 : 
    3069                 : /*
    3070                 :  * GetVirtualXIDsDelayingChkpt -- Get the VXIDs of transactions that are
    3071                 :  * delaying checkpoint because they have critical actions in progress.
    3072                 :  *
    3073                 :  * Constructs an array of VXIDs of transactions that are currently in commit
    3074                 :  * critical sections, as shown by having specified delayChkptFlags bits set
    3075                 :  * in their PGPROC.
    3076                 :  *
    3077                 :  * Returns a palloc'd array that should be freed by the caller.
    3078                 :  * *nvxids is the number of valid entries.
    3079                 :  *
    3080                 :  * Note that because backends set or clear delayChkptFlags without holding any
    3081                 :  * lock, the result is somewhat indeterminate, but we don't really care.  Even
    3082                 :  * in a multiprocessor with delayed writes to shared memory, it should be
    3083                 :  * certain that setting of delayChkptFlags will propagate to shared memory
    3084                 :  * when the backend takes a lock, so we cannot fail to see a virtual xact as
    3085                 :  * delayChkptFlags if it's already inserted its commit record.  Whether it
    3086 ECB             :  * takes a little while for clearing of delayChkptFlags to propagate is
    3087                 :  * unimportant for correctness.
    3088                 :  */
    3089                 : VirtualTransactionId *
    3090 CBC        4670 : GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
    3091                 : {
    3092                 :     VirtualTransactionId *vxids;
    3093            4670 :     ProcArrayStruct *arrayP = procArray;
    3094 GIC        4670 :     int         count = 0;
    3095                 :     int         index;
    3096                 : 
    3097 CBC        4670 :     Assert(type != 0);
    3098                 : 
    3099 ECB             :     /* allocate what's certainly enough result space */
    3100                 :     vxids = (VirtualTransactionId *)
    3101 CBC        4670 :         palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
    3102                 : 
    3103            4670 :     LWLockAcquire(ProcArrayLock, LW_SHARED);
    3104 ECB             : 
    3105 GIC        9695 :     for (index = 0; index < arrayP->numProcs; index++)
    3106 ECB             :     {
    3107 GIC        5025 :         int         pgprocno = arrayP->pgprocnos[index];
    3108            5025 :         PGPROC     *proc = &allProcs[pgprocno];
    3109                 : 
    3110 CBC        5025 :         if ((proc->delayChkptFlags & type) != 0)
    3111 ECB             :         {
    3112                 :             VirtualTransactionId vxid;
    3113                 : 
    3114 GIC           8 :             GET_VXID_FROM_PGPROC(vxid, *proc);
    3115               8 :             if (VirtualTransactionIdIsValid(vxid))
    3116 CBC           8 :                 vxids[count++] = vxid;
    3117                 :         }
    3118 ECB             :     }
    3119                 : 
    3120 GIC        4670 :     LWLockRelease(ProcArrayLock);
    3121                 : 
    3122            4670 :     *nvxids = count;
    3123            4670 :     return vxids;
    3124                 : }
    3125                 : 
    3126                 : /*
    3127                 :  * HaveVirtualXIDsDelayingChkpt -- Are any of the specified VXIDs delaying?
    3128                 :  *
    3129                 :  * This is used with the results of GetVirtualXIDsDelayingChkpt to see if any
    3130                 :  * of the specified VXIDs are still in critical sections of code.
    3131                 :  *
    3132 ECB             :  * Note: this is O(N^2) in the number of vxacts that are/were delaying, but
    3133                 :  * those numbers should be small enough for it not to be a problem.
    3134                 :  */
    3135                 : bool
    3136 GIC           9 : HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
    3137                 : {
    3138 CBC           9 :     bool        result = false;
    3139 GIC           9 :     ProcArrayStruct *arrayP = procArray;
    3140 ECB             :     int         index;
    3141                 : 
    3142 CBC           9 :     Assert(type != 0);
    3143                 : 
    3144               9 :     LWLockAcquire(ProcArrayLock, LW_SHARED);
    3145 ECB             : 
    3146 GIC         105 :     for (index = 0; index < arrayP->numProcs; index++)
    3147                 :     {
    3148 CBC          98 :         int         pgprocno = arrayP->pgprocnos[index];
    3149 GIC          98 :         PGPROC     *proc = &allProcs[pgprocno];
    3150 ECB             :         VirtualTransactionId vxid;
    3151                 : 
    3152 GIC          98 :         GET_VXID_FROM_PGPROC(vxid, *proc);
    3153                 : 
    3154              98 :         if ((proc->delayChkptFlags & type) != 0 &&
    3155 CBC           5 :             VirtualTransactionIdIsValid(vxid))
    3156                 :         {
    3157 ECB             :             int         i;
    3158                 : 
    3159 CBC          10 :             for (i = 0; i < nvxids; i++)
    3160 ECB             :             {
    3161 GIC           7 :                 if (VirtualTransactionIdEquals(vxid, vxids[i]))
    3162                 :                 {
    3163 CBC           2 :                     result = true;
    3164               2 :                     break;
    3165                 :                 }
    3166                 :             }
    3167 GIC           5 :             if (result)
    3168 CBC           2 :                 break;
    3169                 :         }
    3170 ECB             :     }
    3171                 : 
    3172 GIC           9 :     LWLockRelease(ProcArrayLock);
    3173                 : 
    3174               9 :     return result;
    3175                 : }
    3176                 : 
    3177                 : /*
    3178                 :  * BackendPidGetProc -- get a backend's PGPROC given its PID
    3179                 :  *
    3180                 :  * Returns NULL if not found.  Note that it is up to the caller to be
    3181 ECB             :  * sure that the question remains meaningful for long enough for the
    3182                 :  * answer to be used ...
    3183                 :  */
    3184                 : PGPROC *
    3185 CBC        4163 : BackendPidGetProc(int pid)
    3186 EUB             : {
    3187                 :     PGPROC     *result;
    3188 ECB             : 
    3189 GIC        4163 :     if (pid == 0)               /* never match dummy PGPROCs */
    3190 LBC           0 :         return NULL;
    3191                 : 
    3192 CBC        4163 :     LWLockAcquire(ProcArrayLock, LW_SHARED);
    3193                 : 
    3194            4163 :     result = BackendPidGetProcWithLock(pid);
    3195                 : 
    3196 GIC        4163 :     LWLockRelease(ProcArrayLock);
    3197                 : 
    3198            4163 :     return result;
    3199                 : }
    3200                 : 
    3201                 : /*
    3202                 :  * BackendPidGetProcWithLock -- get a backend's PGPROC given its PID
    3203                 :  *
    3204 ECB             :  * Same as above, except caller must be holding ProcArrayLock.  The found
    3205                 :  * entry, if any, can be assumed to be valid as long as the lock remains held.
    3206                 :  */
    3207                 : PGPROC *
    3208 GIC        7355 : BackendPidGetProcWithLock(int pid)
    3209                 : {
    3210 CBC        7355 :     PGPROC     *result = NULL;
    3211 GBC        7355 :     ProcArrayStruct *arrayP = procArray;
    3212                 :     int         index;
    3213 ECB             : 
    3214 GIC        7355 :     if (pid == 0)               /* never match dummy PGPROCs */
    3215 LBC           0 :         return NULL;
    3216                 : 
    3217 CBC       22070 :     for (index = 0; index < arrayP->numProcs; index++)
    3218                 :     {
    3219           20370 :         PGPROC     *proc = &allProcs[arrayP->pgprocnos[index]];
    3220 ECB             : 
    3221 GIC       20370 :         if (proc->pid == pid)
    3222                 :         {
    3223            5655 :             result = proc;
    3224 CBC        5655 :             break;
    3225                 :         }
    3226                 :     }
    3227                 : 
    3228 GIC        7355 :     return result;
    3229                 : }
    3230                 : 
    3231                 : /*
    3232                 :  * BackendXidGetPid -- get a backend's pid given its XID
    3233                 :  *
    3234                 :  * Returns 0 if not found or it's a prepared transaction.  Note that
    3235                 :  * it is up to the caller to be sure that the question remains
    3236                 :  * meaningful for long enough for the answer to be used ...
    3237                 :  *
    3238                 :  * Only main transaction Ids are considered.  This function is mainly
    3239                 :  * useful for determining what backend owns a lock.
    3240                 :  *
    3241 ECB             :  * Beware that not every xact has an XID assigned.  However, as long as you
    3242                 :  * only call this using an XID found on disk, you're safe.
    3243                 :  */
    3244                 : int
    3245 CBC          30 : BackendXidGetPid(TransactionId xid)
    3246                 : {
    3247 GIC          30 :     int         result = 0;
    3248 CBC          30 :     ProcArrayStruct *arrayP = procArray;
    3249 GBC          30 :     TransactionId *other_xids = ProcGlobal->xids;
    3250                 :     int         index;
    3251 ECB             : 
    3252 GIC          30 :     if (xid == InvalidTransactionId)    /* never match invalid xid */
    3253 LBC           0 :         return 0;
    3254                 : 
    3255 CBC          30 :     LWLockAcquire(ProcArrayLock, LW_SHARED);
    3256 ECB             : 
    3257 GIC          92 :     for (index = 0; index < arrayP->numProcs; index++)
    3258 ECB             :     {
    3259 GIC          84 :         int         pgprocno = arrayP->pgprocnos[index];
    3260 CBC          84 :         PGPROC     *proc = &allProcs[pgprocno];
    3261 ECB             : 
    3262 GIC          84 :         if (other_xids[index] == xid)
    3263                 :         {
    3264              22 :             result = proc->pid;
    3265 CBC          22 :             break;
    3266                 :         }
    3267 ECB             :     }
    3268                 : 
    3269 GIC          30 :     LWLockRelease(ProcArrayLock);
    3270                 : 
    3271              30 :     return result;
    3272                 : }
    3273                 : 
    3274                 : /*
    3275                 :  * IsBackendPid -- is a given pid a running backend
    3276 ECB             :  *
    3277                 :  * This is not called by the backend, but is called by external modules.
    3278                 :  */
    3279                 : bool
    3280 GIC           2 : IsBackendPid(int pid)
    3281                 : {
    3282               2 :     return (BackendPidGetProc(pid) != NULL);
    3283                 : }
    3284                 : 
    3285                 : 
    3286                 : /*
    3287                 :  * GetCurrentVirtualXIDs -- returns an array of currently active VXIDs.
    3288                 :  *
    3289                 :  * The array is palloc'd. The number of valid entries is returned into *nvxids.
    3290                 :  *
    3291                 :  * The arguments allow filtering the set of VXIDs returned.  Our own process
    3292                 :  * is always skipped.  In addition:
    3293                 :  *  If limitXmin is not InvalidTransactionId, skip processes with
    3294                 :  *      xmin > limitXmin.
    3295                 :  *  If excludeXmin0 is true, skip processes with xmin = 0.
    3296                 :  *  If allDbs is false, skip processes attached to other databases.
    3297                 :  *  If excludeVacuum isn't zero, skip processes for which
    3298                 :  *      (statusFlags & excludeVacuum) is not zero.
    3299                 :  *
    3300                 :  * Note: the purpose of the limitXmin and excludeXmin0 parameters is to
    3301                 :  * allow skipping backends whose oldest live snapshot is no older than
    3302                 :  * some snapshot we have.  Since we examine the procarray with only shared
    3303                 :  * lock, there are race conditions: a backend could set its xmin just after
    3304                 :  * we look.  Indeed, on multiprocessors with weak memory ordering, the
    3305                 :  * other backend could have set its xmin *before* we look.  We know however
    3306                 :  * that such a backend must have held shared ProcArrayLock overlapping our
    3307                 :  * own hold of ProcArrayLock, else we would see its xmin update.  Therefore,
    3308                 :  * any snapshot the other backend is taking concurrently with our scan cannot
    3309 ECB             :  * consider any transactions as still running that we think are committed
    3310                 :  * (since backends must hold ProcArrayLock exclusive to commit).
    3311                 :  */
    3312                 : VirtualTransactionId *
    3313 GIC         321 : GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0,
    3314 ECB             :                       bool allDbs, int excludeVacuum,
    3315                 :                       int *nvxids)
    3316                 : {
    3317                 :     VirtualTransactionId *vxids;
    3318 GIC         321 :     ProcArrayStruct *arrayP = procArray;
    3319             321 :     int         count = 0;
    3320 ECB             :     int         index;
    3321                 : 
    3322                 :     /* allocate what's certainly enough result space */
    3323                 :     vxids = (VirtualTransactionId *)
    3324 CBC         321 :         palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
    3325                 : 
    3326             321 :     LWLockAcquire(ProcArrayLock, LW_SHARED);
    3327 ECB             : 
    3328 CBC        2041 :     for (index = 0; index < arrayP->numProcs; index++)
    3329                 :     {
    3330            1720 :         int         pgprocno = arrayP->pgprocnos[index];
    3331            1720 :         PGPROC     *proc = &allProcs[pgprocno];
    3332 GIC        1720 :         uint8       statusFlags = ProcGlobal->statusFlags[index];
    3333 ECB             : 
    3334 CBC        1720 :         if (proc == MyProc)
    3335 GIC         321 :             continue;
    3336 ECB             : 
    3337 GIC        1399 :         if (excludeVacuum & statusFlags)
    3338              12 :             continue;
    3339 ECB             : 
    3340 GIC        1387 :         if (allDbs || proc->databaseId == MyDatabaseId)
    3341 ECB             :         {
    3342                 :             /* Fetch xmin just once - might change on us */
    3343 GIC         685 :             TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin);
    3344                 : 
    3345             685 :             if (excludeXmin0 && !TransactionIdIsValid(pxmin))
    3346             434 :                 continue;
    3347                 : 
    3348 ECB             :             /*
    3349                 :              * InvalidTransactionId precedes all other XIDs, so a proc that
    3350                 :              * hasn't set xmin yet will not be rejected by this test.
    3351                 :              */
    3352 GIC         502 :             if (!TransactionIdIsValid(limitXmin) ||
    3353 CBC         251 :                 TransactionIdPrecedesOrEquals(pxmin, limitXmin))
    3354 ECB             :             {
    3355                 :                 VirtualTransactionId vxid;
    3356                 : 
    3357 GIC         239 :                 GET_VXID_FROM_PGPROC(vxid, *proc);
    3358             239 :                 if (VirtualTransactionIdIsValid(vxid))
    3359             239 :                     vxids[count++] = vxid;
    3360 ECB             :             }
    3361                 :         }
    3362                 :     }
    3363                 : 
    3364 GIC         321 :     LWLockRelease(ProcArrayLock);
    3365                 : 
    3366             321 :     *nvxids = count;
    3367             321 :     return vxids;
    3368                 : }
    3369                 : 
    3370                 : /*
    3371                 :  * GetConflictingVirtualXIDs -- returns an array of currently active VXIDs.
    3372                 :  *
    3373                 :  * Usage is limited to conflict resolution during recovery on standby servers.
    3374                 :  * limitXmin is supplied as either a cutoff with snapshotConflictHorizon
    3375                 :  * semantics, or InvalidTransactionId in cases where caller cannot accurately
    3376                 :  * determine a safe snapshotConflictHorizon value.
    3377                 :  *
    3378                 :  * If limitXmin is InvalidTransactionId then we want to kill everybody,
    3379                 :  * so we're not worried if they have a snapshot or not, nor does it really
    3380                 :  * matter what type of lock we hold.  Caller must avoid calling here with
    3381                 :  * snapshotConflictHorizon style cutoffs that were set to InvalidTransactionId
    3382                 :  * during original execution, since that actually indicates that there is
    3383                 :  * definitely no need for a recovery conflict (the snapshotConflictHorizon
    3384                 :  * convention for InvalidTransactionId values is the opposite of our own!).
    3385                 :  *
    3386                 :  * All callers that are checking xmins always now supply a valid and useful
    3387                 :  * value for limitXmin. The limitXmin is always lower than the lowest
    3388                 :  * numbered KnownAssignedXid that is not already a FATAL error. This is
    3389                 :  * because we only care about cleanup records that are cleaning up tuple
    3390                 :  * versions from committed transactions. In that case they will only occur
    3391                 :  * at the point where the record is less than the lowest running xid. That
    3392                 :  * allows us to say that if any backend takes a snapshot concurrently with
    3393                 :  * us then the conflict assessment made here would never include the snapshot
    3394                 :  * that is being derived. So we take LW_SHARED on the ProcArray and allow
    3395                 :  * concurrent snapshots when limitXmin is valid. We might think about adding
    3396                 :  *   Assert(limitXmin < lowest(KnownAssignedXids))
    3397                 :  * but that would not be true in the case of FATAL errors lagging in array,
    3398                 :  * but we already know those are bogus anyway, so we skip that test.
    3399                 :  *
    3400                 :  * If dbOid is valid we skip backends attached to other databases.
    3401                 :  *
    3402                 :  * Be careful to *not* pfree the result from this function. We reuse
    3403                 :  * this array sufficiently often that we use malloc for the result.
    3404                 :  */
    3405                 : VirtualTransactionId *
    3406            8625 : GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
    3407 ECB             : {
    3408                 :     static VirtualTransactionId *vxids;
    3409 GIC        8625 :     ProcArrayStruct *arrayP = procArray;
    3410 CBC        8625 :     int         count = 0;
    3411 ECB             :     int         index;
    3412                 : 
    3413                 :     /*
    3414                 :      * If first time through, get workspace to remember main XIDs in. We
    3415                 :      * malloc it permanently to avoid repeated palloc/pfree overhead. Allow
    3416                 :      * result space, remembering room for a terminator.
    3417                 :      */
    3418 GIC        8625 :     if (vxids == NULL)
    3419 ECB             :     {
    3420 GIC          20 :         vxids = (VirtualTransactionId *)
    3421 CBC          20 :             malloc(sizeof(VirtualTransactionId) * (arrayP->maxProcs + 1));
    3422              20 :         if (vxids == NULL)
    3423 LBC           0 :             ereport(ERROR,
    3424 EUB             :                     (errcode(ERRCODE_OUT_OF_MEMORY),
    3425                 :                      errmsg("out of memory")));
    3426                 :     }
    3427                 : 
    3428 GIC        8625 :     LWLockAcquire(ProcArrayLock, LW_SHARED);
    3429 ECB             : 
    3430 GIC        8778 :     for (index = 0; index < arrayP->numProcs; index++)
    3431 ECB             :     {
    3432 GIC         153 :         int         pgprocno = arrayP->pgprocnos[index];
    3433 CBC         153 :         PGPROC     *proc = &allProcs[pgprocno];
    3434 ECB             : 
    3435                 :         /* Exclude prepared transactions */
    3436 GIC         153 :         if (proc->pid == 0)
    3437 LBC           0 :             continue;
    3438 EUB             : 
    3439 GIC         153 :         if (!OidIsValid(dbOid) ||
    3440 CBC         146 :             proc->databaseId == dbOid)
    3441 ECB             :         {
    3442                 :             /* Fetch xmin just once - can't change on us, but good coding */
    3443 GIC          15 :             TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin);
    3444 ECB             : 
    3445                 :             /*
    3446                 :              * We ignore an invalid pxmin because this means that backend has
    3447                 :              * no snapshot currently. We hold a Share lock to avoid contention
    3448                 :              * with users taking snapshots.  That is not a problem because the
    3449                 :              * current xmin is always at least one higher than the latest
    3450                 :              * removed xid, so any new snapshot would never conflict with the
    3451                 :              * test here.
    3452                 :              */
    3453 GIC          15 :             if (!TransactionIdIsValid(limitXmin) ||
    3454 CBC           1 :                 (TransactionIdIsValid(pxmin) && !TransactionIdFollows(pxmin, limitXmin)))
    3455 ECB             :             {
    3456                 :                 VirtualTransactionId vxid;
    3457                 : 
    3458 GIC           2 :                 GET_VXID_FROM_PGPROC(vxid, *proc);
    3459 CBC           2 :                 if (VirtualTransactionIdIsValid(vxid))
    3460               2 :                     vxids[count++] = vxid;
    3461 ECB             :             }
    3462                 :         }
    3463                 :     }
    3464                 : 
    3465 GIC        8625 :     LWLockRelease(ProcArrayLock);
    3466 ECB             : 
    3467                 :     /* add the terminator */
    3468 GIC        8625 :     vxids[count].backendId = InvalidBackendId;
    3469 CBC        8625 :     vxids[count].localTransactionId = InvalidLocalTransactionId;
    3470 ECB             : 
    3471 GIC        8625 :     return vxids;
    3472 ECB             : }
    3473                 : 
    3474                 : /*
    3475                 :  * CancelVirtualTransaction - used in recovery conflict processing
    3476                 :  *
    3477                 :  * Returns pid of the process signaled, or 0 if not found.
    3478                 :  */
    3479                 : pid_t
    3480 GIC           3 : CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
    3481 ECB             : {
    3482 GIC           3 :     return SignalVirtualTransaction(vxid, sigmode, true);
    3483 ECB             : }
    3484                 : 
    3485                 : pid_t
    3486 GIC           5 : SignalVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode,
    3487 ECB             :                          bool conflictPending)
    3488                 : {
    3489 GIC           5 :     ProcArrayStruct *arrayP = procArray;
    3490 ECB             :     int         index;
    3491 GIC           5 :     pid_t       pid = 0;
    3492 ECB             : 
    3493 GIC           5 :     LWLockAcquire(ProcArrayLock, LW_SHARED);
    3494 ECB             : 
    3495 GIC           5 :     for (index = 0; index < arrayP->numProcs; index++)
    3496 ECB             :     {
    3497 GIC           5 :         int         pgprocno = arrayP->pgprocnos[index];
    3498 CBC           5 :         PGPROC     *proc = &allProcs[pgprocno];
    3499 ECB             :         VirtualTransactionId procvxid;
    3500                 : 
    3501 GIC           5 :         GET_VXID_FROM_PGPROC(procvxid, *proc);
    3502 ECB             : 
    3503 GIC           5 :         if (procvxid.backendId == vxid.backendId &&
    3504 CBC           5 :             procvxid.localTransactionId == vxid.localTransactionId)
    3505 ECB             :         {
    3506 GIC           5 :             proc->recoveryConflictPending = conflictPending;
    3507 CBC           5 :             pid = proc->pid;
    3508               5 :             if (pid != 0)
    3509 ECB             :             {
    3510                 :                 /*
    3511                 :                  * Kill the pid if it's still here. If not, that's what we
    3512                 :                  * wanted so ignore any errors.
    3513                 :                  */
    3514 GIC           5 :                 (void) SendProcSignal(pid, sigmode, vxid.backendId);
    3515 ECB             :             }
    3516 GIC           5 :             break;
    3517 ECB             :         }
    3518                 :     }
    3519                 : 
    3520 GIC           5 :     LWLockRelease(ProcArrayLock);
    3521 ECB             : 
    3522 GIC           5 :     return pid;
    3523 ECB             : }
    3524                 : 
    3525                 : /*
    3526                 :  * MinimumActiveBackends --- count backends (other than myself) that are
    3527                 :  *      in active transactions.  Return true if the count exceeds the
    3528                 :  *      minimum threshold passed.  This is used as a heuristic to decide if
    3529                 :  *      a pre-XLOG-flush delay is worthwhile during commit.
    3530                 :  *
    3531                 :  * Do not count backends that are blocked waiting for locks, since they are
    3532                 :  * not going to get to run until someone else commits.
    3533                 :  */
    3534                 : bool
    3535 UIC           0 : MinimumActiveBackends(int min)
    3536 EUB             : {
    3537 UIC           0 :     ProcArrayStruct *arrayP = procArray;
    3538 UBC           0 :     int         count = 0;
    3539 EUB             :     int         index;
    3540                 : 
    3541                 :     /* Quick short-circuit if no minimum is specified */
    3542 UIC           0 :     if (min == 0)
    3543 UBC           0 :         return true;
    3544 EUB             : 
    3545                 :     /*
    3546                 :      * Note: for speed, we don't acquire ProcArrayLock.  This is a little bit
    3547                 :      * bogus, but since we are only testing fields for zero or nonzero, it
    3548                 :      * should be OK.  The result is only used for heuristic purposes anyway...
    3549                 :      */
    3550 UIC           0 :     for (index = 0; index < arrayP->numProcs; index++)
    3551 EUB             :     {
    3552 UIC           0 :         int         pgprocno = arrayP->pgprocnos[index];
    3553 UBC           0 :         PGPROC     *proc = &allProcs[pgprocno];
    3554 EUB             : 
    3555                 :         /*
    3556                 :          * Since we're not holding a lock, need to be prepared to deal with
    3557                 :          * garbage, as someone could have incremented numProcs but not yet
    3558                 :          * filled the structure.
    3559                 :          *
    3560                 :          * If someone just decremented numProcs, 'proc' could also point to a
    3561                 :          * PGPROC entry that's no longer in the array. It still points to a
    3562                 :          * PGPROC struct, though, because freed PGPROC entries just go to the
    3563                 :          * free list and are recycled. Its contents are nonsense in that case,
    3564                 :          * but that's acceptable for this function.
    3565                 :          */
    3566 UIC           0 :         if (pgprocno == -1)
    3567 UBC           0 :             continue;           /* do not count deleted entries */
    3568               0 :         if (proc == MyProc)
    3569               0 :             continue;           /* do not count myself */
    3570               0 :         if (proc->xid == InvalidTransactionId)
    3571               0 :             continue;           /* do not count if no XID assigned */
    3572               0 :         if (proc->pid == 0)
    3573               0 :             continue;           /* do not count prepared xacts */
    3574               0 :         if (proc->waitLock != NULL)
    3575               0 :             continue;           /* do not count if blocked on a lock */
    3576               0 :         count++;
    3577               0 :         if (count >= min)
    3578               0 :             break;
    3579 EUB             :     }
    3580                 : 
    3581 UIC           0 :     return count >= min;
    3582 EUB             : }
    3583                 : 
    3584                 : /*
    3585                 :  * CountDBBackends --- count backends that are using specified database
    3586                 :  */
    3587                 : int
    3588 GIC          11 : CountDBBackends(Oid databaseid)
    3589 ECB             : {
    3590 GIC          11 :     ProcArrayStruct *arrayP = procArray;
    3591 CBC          11 :     int         count = 0;
    3592 ECB             :     int         index;
    3593                 : 
    3594 GIC          11 :     LWLockAcquire(ProcArrayLock, LW_SHARED);
    3595 ECB             : 
    3596 GIC          20 :     for (index = 0; index < arrayP->numProcs; index++)
    3597 ECB             :     {
    3598 GIC           9 :         int         pgprocno = arrayP->pgprocnos[index];
    3599 CBC           9 :         PGPROC     *proc = &allProcs[pgprocno];
    3600 ECB             : 
    3601 GIC           9 :         if (proc->pid == 0)
    3602 LBC           0 :             continue;           /* do not count prepared xacts */
    3603 GBC           9 :         if (!OidIsValid(databaseid) ||
    3604 CBC           9 :             proc->databaseId == databaseid)
    3605               2 :             count++;
    3606 ECB             :     }
    3607                 : 
    3608 GIC          11 :     LWLockRelease(ProcArrayLock);
    3609 ECB             : 
    3610 GIC          11 :     return count;
    3611 ECB             : }
    3612                 : 
    3613                 : /*
    3614                 :  * CountDBConnections --- counts database backends ignoring any background
    3615                 :  *      worker processes
    3616                 :  */
    3617                 : int
    3618 UIC           0 : CountDBConnections(Oid databaseid)
    3619 EUB             : {
    3620 UIC           0 :     ProcArrayStruct *arrayP = procArray;
    3621 UBC           0 :     int         count = 0;
    3622 EUB             :     int         index;
    3623                 : 
    3624 UIC           0 :     LWLockAcquire(ProcArrayLock, LW_SHARED);
    3625 EUB             : 
    3626 UIC           0 :     for (index = 0; index < arrayP->numProcs; index++)
    3627 EUB             :     {
    3628 UIC           0 :         int         pgprocno = arrayP->pgprocnos[index];
    3629 UBC           0 :         PGPROC     *proc = &allProcs[pgprocno];
    3630 EUB             : 
    3631 UIC           0 :         if (proc->pid == 0)
    3632 UBC           0 :             continue;           /* do not count prepared xacts */
    3633               0 :         if (proc->isBackgroundWorker)
    3634               0 :             continue;           /* do not count background workers */
    3635               0 :         if (!OidIsValid(databaseid) ||
    3636               0 :             proc->databaseId == databaseid)
    3637               0 :             count++;
    3638 EUB             :     }
    3639                 : 
    3640 UIC           0 :     LWLockRelease(ProcArrayLock);
    3641 EUB             : 
    3642 UIC           0 :     return count;
    3643 EUB             : }
    3644                 : 
    3645                 : /*
    3646                 :  * CancelDBBackends --- cancel backends that are using specified database
    3647                 :  */
    3648                 : void
    3649 GIC          10 : CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
    3650 ECB             : {
    3651 GIC          10 :     ProcArrayStruct *arrayP = procArray;
    3652 ECB             :     int         index;
    3653                 : 
    3654                 :     /* tell all backends to die */
    3655 GIC          10 :     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
    3656 ECB             : 
    3657 GIC          21 :     for (index = 0; index < arrayP->numProcs; index++)
    3658 ECB             :     {
    3659 GIC          11 :         int         pgprocno = arrayP->pgprocnos[index];
    3660 CBC          11 :         PGPROC     *proc = &allProcs[pgprocno];
    3661 ECB             : 
    3662 GIC          11 :         if (databaseid == InvalidOid || proc->databaseId == databaseid)
    3663 ECB             :         {
    3664                 :             VirtualTransactionId procvxid;
    3665                 :             pid_t       pid;
    3666                 : 
    3667 GIC          10 :             GET_VXID_FROM_PGPROC(procvxid, *proc);
    3668 ECB             : 
    3669 GIC          10 :             proc->recoveryConflictPending = conflictPending;
    3670 CBC          10 :             pid = proc->pid;
    3671              10 :             if (pid != 0)
    3672 ECB             :             {
    3673                 :                 /*
    3674                 :                  * Kill the pid if it's still here. If not, that's what we
    3675                 :                  * wanted so ignore any errors.
    3676                 :                  */
    3677 GIC          10 :                 (void) SendProcSignal(pid, sigmode, procvxid.backendId);
    3678 ECB             :             }
    3679                 :         }
    3680                 :     }
    3681                 : 
    3682 GIC          10 :     LWLockRelease(ProcArrayLock);
    3683 CBC          10 : }
    3684 ECB             : 
    3685                 : /*
    3686                 :  * CountUserBackends --- count backends that are used by specified user
    3687                 :  */
    3688                 : int
    3689 UIC           0 : CountUserBackends(Oid roleid)
    3690 EUB             : {
    3691 UIC           0 :     ProcArrayStruct *arrayP = procArray;
    3692 UBC           0 :     int         count = 0;
    3693 EUB             :     int         index;
    3694                 : 
    3695 UIC           0 :     LWLockAcquire(ProcArrayLock, LW_SHARED);
    3696 EUB             : 
    3697 UIC           0 :     for (index = 0; index < arrayP->numProcs; index++)
    3698 EUB             :     {
    3699 UIC           0 :         int         pgprocno = arrayP->pgprocnos[index];
    3700 UBC           0 :         PGPROC     *proc = &allProcs[pgprocno];
    3701 EUB             : 
    3702 UIC           0 :         if (proc->pid == 0)
    3703 UBC           0 :             continue;           /* do not count prepared xacts */
    3704               0 :         if (proc->isBackgroundWorker)
    3705               0 :             continue;           /* do not count background workers */
    3706               0 :         if (proc->roleId == roleid)
    3707               0 :             count++;
    3708 EUB             :     }
    3709                 : 
    3710 UIC           0 :     LWLockRelease(ProcArrayLock);
    3711 EUB             : 
    3712 UIC           0 :     return count;
    3713 EUB             : }
    3714                 : 
    3715                 : /*
    3716                 :  * CountOtherDBBackends -- check for other backends running in the given DB
    3717                 :  *
    3718                 :  * If there are other backends in the DB, we will wait a maximum of 5 seconds
    3719                 :  * for them to exit.  Autovacuum backends are encouraged to exit early by
    3720                 :  * sending them SIGTERM, but normal user backends are just waited for.
    3721                 :  *
    3722                 :  * The current backend is always ignored; it is caller's responsibility to
    3723                 :  * check whether the current backend uses the given DB, if it's important.
    3724                 :  *
    3725                 :  * Returns true if there are (still) other backends in the DB, false if not.
    3726                 :  * Also, *nbackends and *nprepared are set to the number of other backends
    3727                 :  * and prepared transactions in the DB, respectively.
    3728                 :  *
    3729                 :  * This function is used to interlock DROP DATABASE and related commands
    3730                 :  * against there being any active backends in the target DB --- dropping the
    3731                 :  * DB while active backends remain would be a Bad Thing.  Note that we cannot
    3732                 :  * detect here the possibility of a newly-started backend that is trying to
    3733                 :  * connect to the doomed database, so additional interlocking is needed during
    3734                 :  * backend startup.  The caller should normally hold an exclusive lock on the
    3735                 :  * target DB before calling this, which is one reason we mustn't wait
    3736                 :  * indefinitely.
    3737                 :  */
    3738                 : bool
    3739 GIC         822 : CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
    3740 ECB             : {
    3741 GIC         822 :     ProcArrayStruct *arrayP = procArray;
    3742 ECB             : 
    3743                 : #define MAXAUTOVACPIDS  10      /* max autovacs to SIGTERM per iteration */
    3744                 :     int         autovac_pids[MAXAUTOVACPIDS];
    3745                 :     int         tries;
    3746                 : 
    3747                 :     /* 50 tries with 100ms sleep between tries makes 5 sec total wait */
    3748 GIC         822 :     for (tries = 0; tries < 50; tries++)
    3749 ECB             :     {
    3750 GIC         822 :         int         nautovacs = 0;
    3751 CBC         822 :         bool        found = false;
    3752 ECB             :         int         index;
    3753                 : 
    3754 GIC         822 :         CHECK_FOR_INTERRUPTS();
    3755 ECB             : 
    3756 GIC         822 :         *nbackends = *nprepared = 0;
    3757 ECB             : 
    3758 GIC         822 :         LWLockAcquire(ProcArrayLock, LW_SHARED);
    3759 ECB             : 
    3760 GIC        2107 :         for (index = 0; index < arrayP->numProcs; index++)
    3761 ECB             :         {
    3762 GIC        1285 :             int         pgprocno = arrayP->pgprocnos[index];
    3763 CBC        1285 :             PGPROC     *proc = &allProcs[pgprocno];
    3764            1285 :             uint8       statusFlags = ProcGlobal->statusFlags[index];
    3765 ECB             : 
    3766 GIC        1285 :             if (proc->databaseId != databaseId)
    3767 CBC         679 :                 continue;
    3768             606 :             if (proc == MyProc)
    3769             606 :                 continue;
    3770 ECB             : 
    3771 UIC           0 :             found = true;
    3772 EUB             : 
    3773 UIC           0 :             if (proc->pid == 0)
    3774 UBC           0 :                 (*nprepared)++;
    3775 EUB             :             else
    3776                 :             {
    3777 UIC           0 :                 (*nbackends)++;
    3778 UBC           0 :                 if ((statusFlags & PROC_IS_AUTOVACUUM) &&
    3779 EUB             :                     nautovacs < MAXAUTOVACPIDS)
    3780 UIC           0 :                     autovac_pids[nautovacs++] = proc->pid;
    3781 EUB             :             }
    3782                 :         }
    3783                 : 
    3784 GIC         822 :         LWLockRelease(ProcArrayLock);
    3785 ECB             : 
    3786 GIC         822 :         if (!found)
    3787 CBC         822 :             return false;       /* no conflicting backends, so done */
    3788 ECB             : 
    3789                 :         /*
    3790                 :          * Send SIGTERM to any conflicting autovacuums before sleeping. We
    3791                 :          * postpone this step until after the loop because we don't want to
    3792                 :          * hold ProcArrayLock while issuing kill(). We have no idea what might
    3793                 :          * block kill() inside the kernel...
    3794                 :          */
    3795 UIC           0 :         for (index = 0; index < nautovacs; index++)
    3796 UBC           0 :             (void) kill(autovac_pids[index], SIGTERM);  /* ignore any error */
    3797 EUB             : 
    3798                 :         /* sleep, then try again */
    3799 UIC           0 :         pg_usleep(100 * 1000L); /* 100ms */
    3800 EUB             :     }
    3801                 : 
    3802 UIC           0 :     return true;                /* timed out, still conflicts */
    3803 EUB             : }
    3804                 : 
    3805                 : /*
    3806                 :  * Terminate existing connections to the specified database. This routine
    3807                 :  * is used by the DROP DATABASE command when user has asked to forcefully
    3808                 :  * drop the database.
    3809                 :  *
    3810                 :  * The current backend is always ignored; it is caller's responsibility to
    3811                 :  * check whether the current backend uses the given DB, if it's important.
    3812                 :  *
    3813                 :  * It doesn't allow to terminate the connections even if there is a one
    3814                 :  * backend with the prepared transaction in the target database.
    3815                 :  */
    3816                 : void
    3817 GIC           1 : TerminateOtherDBBackends(Oid databaseId)
    3818 ECB             : {
    3819 GIC           1 :     ProcArrayStruct *arrayP = procArray;
    3820 CBC           1 :     List       *pids = NIL;
    3821               1 :     int         nprepared = 0;
    3822 ECB             :     int         i;
    3823                 : 
    3824 GIC           1 :     LWLockAcquire(ProcArrayLock, LW_SHARED);
    3825 ECB             : 
    3826 GIC           4 :     for (i = 0; i < procArray->numProcs; i++)
    3827 ECB             :     {
    3828 GIC           3 :         int         pgprocno = arrayP->pgprocnos[i];
    3829 CBC           3 :         PGPROC     *proc = &allProcs[pgprocno];
    3830 ECB             : 
    3831 GIC           3 :         if (proc->databaseId != databaseId)
    3832 CBC           3 :             continue;
    3833 LBC           0 :         if (proc == MyProc)
    3834 UBC           0 :             continue;
    3835 EUB             : 
    3836 UIC           0 :         if (proc->pid != 0)
    3837 UBC           0 :             pids = lappend_int(pids, proc->pid);
    3838 EUB             :         else
    3839 UIC           0 :             nprepared++;
    3840 EUB             :     }
    3841                 : 
    3842 GIC           1 :     LWLockRelease(ProcArrayLock);
    3843 ECB             : 
    3844 GIC           1 :     if (nprepared > 0)
    3845 LBC           0 :         ereport(ERROR,
    3846 EUB             :                 (errcode(ERRCODE_OBJECT_IN_USE),
    3847                 :                  errmsg("database \"%s\" is being used by prepared transactions",
    3848                 :                         get_database_name(databaseId)),
    3849                 :                  errdetail_plural("There is %d prepared transaction using the database.",
    3850                 :                                   "There are %d prepared transactions using the database.",
    3851                 :                                   nprepared,
    3852                 :                                   nprepared)));
    3853                 : 
    3854 GIC           1 :     if (pids)
    3855 ECB             :     {
    3856                 :         ListCell   *lc;
    3857                 : 
    3858                 :         /*
    3859                 :          * Check whether we have the necessary rights to terminate other
    3860                 :          * sessions.  We don't terminate any session until we ensure that we
    3861                 :          * have rights on all the sessions to be terminated.  These checks are
    3862                 :          * the same as we do in pg_terminate_backend.
    3863                 :          *
    3864                 :          * In this case we don't raise some warnings - like "PID %d is not a
    3865                 :          * PostgreSQL server process", because for us already finished session
    3866                 :          * is not a problem.
    3867                 :          */
    3868 UIC           0 :         foreach(lc, pids)
    3869 EUB             :         {
    3870 UIC           0 :             int         pid = lfirst_int(lc);
    3871 UBC           0 :             PGPROC     *proc = BackendPidGetProc(pid);
    3872 EUB             : 
    3873 UIC           0 :             if (proc != NULL)
    3874 EUB             :             {
    3875                 :                 /* Only allow superusers to signal superuser-owned backends. */
    3876 UIC           0 :                 if (superuser_arg(proc->roleId) && !superuser())
    3877 UBC           0 :                     ereport(ERROR,
    3878 EUB             :                             (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
    3879                 :                              errmsg("must be a superuser to terminate superuser process")));
    3880                 : 
    3881                 :                 /* Users can signal backends they have role membership in. */
    3882 UIC           0 :                 if (!has_privs_of_role(GetUserId(), proc->roleId) &&
    3883 UBC           0 :                     !has_privs_of_role(GetUserId(), ROLE_PG_SIGNAL_BACKEND))
    3884               0 :                     ereport(ERROR,
    3885 EUB             :                             (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
    3886                 :                              errmsg("permission denied to terminate process"),
    3887                 :                              errdetail("Only roles with privileges of the role whose process is being terminated or with privileges of the \"%s\" role may terminate this process.",
    3888                 :                                        "pg_signal_backend")));
    3889                 :             }
    3890                 :         }
    3891                 : 
    3892                 :         /*
    3893                 :          * There's a race condition here: once we release the ProcArrayLock,
    3894                 :          * it's possible for the session to exit before we issue kill.  That
    3895                 :          * race condition possibility seems too unlikely to worry about.  See
    3896                 :          * pg_signal_backend.
    3897                 :          */
    3898 UIC           0 :         foreach(lc, pids)
    3899                 :         {
    3900               0 :             int         pid = lfirst_int(lc);
    3901 UBC           0 :             PGPROC     *proc = BackendPidGetProc(pid);
    3902                 : 
    3903               0 :             if (proc != NULL)
    3904 EUB             :             {
    3905                 :                 /*
    3906                 :                  * If we have setsid(), signal the backend's whole process
    3907                 :                  * group
    3908                 :                  */
    3909                 : #ifdef HAVE_SETSID
    3910 UIC           0 :                 (void) kill(-pid, SIGTERM);
    3911                 : #else
    3912                 :                 (void) kill(pid, SIGTERM);
    3913 EUB             : #endif
    3914                 :             }
    3915                 :         }
    3916                 :     }
    3917 GIC           1 : }
    3918                 : 
    3919                 : /*
    3920 ECB             :  * ProcArraySetReplicationSlotXmin
    3921                 :  *
    3922                 :  * Install limits to future computations of the xmin horizon to prevent vacuum
    3923                 :  * and HOT pruning from removing affected rows still needed by clients with
    3924                 :  * replication slots.
    3925                 :  */
    3926                 : void
    3927 GIC        2189 : ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin,
    3928                 :                                 bool already_locked)
    3929                 : {
    3930 CBC        2189 :     Assert(!already_locked || LWLockHeldByMe(ProcArrayLock));
    3931                 : 
    3932 GIC        2189 :     if (!already_locked)
    3933 CBC        1847 :         LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
    3934                 : 
    3935            2189 :     procArray->replication_slot_xmin = xmin;
    3936            2189 :     procArray->replication_slot_catalog_xmin = catalog_xmin;
    3937                 : 
    3938            2189 :     if (!already_locked)
    3939            1847 :         LWLockRelease(ProcArrayLock);
    3940                 : 
    3941            2189 :     elog(DEBUG1, "xmin required by slots: data %u, catalog %u",
    3942 ECB             :          xmin, catalog_xmin);
    3943 GIC        2189 : }
    3944 ECB             : 
    3945                 : /*
    3946                 :  * ProcArrayGetReplicationSlotXmin
    3947                 :  *
    3948                 :  * Return the current slot xmin limits. That's useful to be able to remove
    3949                 :  * data that's older than those limits.
    3950                 :  */
    3951                 : void
    3952 GIC          21 : ProcArrayGetReplicationSlotXmin(TransactionId *xmin,
    3953                 :                                 TransactionId *catalog_xmin)
    3954                 : {
    3955 CBC          21 :     LWLockAcquire(ProcArrayLock, LW_SHARED);
    3956                 : 
    3957 GIC          21 :     if (xmin != NULL)
    3958 LBC           0 :         *xmin = procArray->replication_slot_xmin;
    3959                 : 
    3960 CBC          21 :     if (catalog_xmin != NULL)
    3961 GBC          21 :         *catalog_xmin = procArray->replication_slot_catalog_xmin;
    3962                 : 
    3963 CBC          21 :     LWLockRelease(ProcArrayLock);
    3964              21 : }
    3965                 : 
    3966 ECB             : /*
    3967                 :  * XidCacheRemoveRunningXids
    3968                 :  *
    3969                 :  * Remove a bunch of TransactionIds from the list of known-running
    3970                 :  * subtransactions for my backend.  Both the specified xid and those in
    3971                 :  * the xids[] array (of length nxids) are removed from the subxids cache.
    3972                 :  * latestXid must be the latest XID among the group.
    3973                 :  */
    3974                 : void
    3975 GIC         610 : XidCacheRemoveRunningXids(TransactionId xid,
    3976                 :                           int nxids, const TransactionId *xids,
    3977                 :                           TransactionId latestXid)
    3978 ECB             : {
    3979                 :     int         i,
    3980                 :                 j;
    3981                 :     XidCacheStatus *mysubxidstat;
    3982                 : 
    3983 GIC         610 :     Assert(TransactionIdIsValid(xid));
    3984                 : 
    3985                 :     /*
    3986 ECB             :      * We must hold ProcArrayLock exclusively in order to remove transactions
    3987                 :      * from the PGPROC array.  (See src/backend/access/transam/README.)  It's
    3988                 :      * possible this could be relaxed since we know this routine is only used
    3989                 :      * to abort subtransactions, but pending closer analysis we'd best be
    3990                 :      * conservative.
    3991                 :      *
    3992                 :      * Note that we do not have to be careful about memory ordering of our own
    3993                 :      * reads wrt. GetNewTransactionId() here - only this process can modify
    3994                 :      * relevant fields of MyProc/ProcGlobal->xids[].  But we do have to be
    3995                 :      * careful about our own writes being well ordered.
    3996                 :      */
    3997 GIC         610 :     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
    3998                 : 
    3999             610 :     mysubxidstat = &ProcGlobal->subxidStates[MyProc->pgxactoff];
    4000 ECB             : 
    4001                 :     /*
    4002                 :      * Under normal circumstances xid and xids[] will be in increasing order,
    4003                 :      * as will be the entries in subxids.  Scan backwards to avoid O(N^2)
    4004                 :      * behavior when removing a lot of xids.
    4005                 :      */
    4006 GIC         640 :     for (i = nxids - 1; i >= 0; i--)
    4007                 :     {
    4008              30 :         TransactionId anxid = xids[i];
    4009 ECB             : 
    4010 GIC          30 :         for (j = MyProc->subxidStatus.count - 1; j >= 0; j--)
    4011 ECB             :         {
    4012 GIC          30 :             if (TransactionIdEquals(MyProc->subxids.xids[j], anxid))
    4013 ECB             :             {
    4014 GIC          30 :                 MyProc->subxids.xids[j] = MyProc->subxids.xids[MyProc->subxidStatus.count - 1];
    4015 CBC          30 :                 pg_write_barrier();
    4016 GIC          30 :                 mysubxidstat->count--;
    4017 CBC          30 :                 MyProc->subxidStatus.count--;
    4018              30 :                 break;
    4019 ECB             :             }
    4020                 :         }
    4021                 : 
    4022                 :         /*
    4023                 :          * Ordinarily we should have found it, unless the cache has
    4024                 :          * overflowed. However it's also possible for this routine to be
    4025                 :          * invoked multiple times for the same subtransaction, in case of an
    4026                 :          * error during AbortSubTransaction.  So instead of Assert, emit a
    4027                 :          * debug warning.
    4028                 :          */
    4029 GIC          30 :         if (j < 0 && !MyProc->subxidStatus.overflowed)
    4030 UIC           0 :             elog(WARNING, "did not find subXID %u in MyProc", anxid);
    4031                 :     }
    4032 ECB             : 
    4033 GBC         610 :     for (j = MyProc->subxidStatus.count - 1; j >= 0; j--)
    4034                 :     {
    4035 GIC         610 :         if (TransactionIdEquals(MyProc->subxids.xids[j], xid))
    4036 ECB             :         {
    4037 GIC         610 :             MyProc->subxids.xids[j] = MyProc->subxids.xids[MyProc->subxidStatus.count - 1];
    4038 CBC         610 :             pg_write_barrier();
    4039 GIC         610 :             mysubxidstat->count--;
    4040 CBC         610 :             MyProc->subxidStatus.count--;
    4041             610 :             break;
    4042 ECB             :         }
    4043                 :     }
    4044                 :     /* Ordinarily we should have found it, unless the cache has overflowed */
    4045 GIC         610 :     if (j < 0 && !MyProc->subxidStatus.overflowed)
    4046 UIC           0 :         elog(WARNING, "did not find subXID %u in MyProc", xid);
    4047                 : 
    4048 ECB             :     /* Also advance global latestCompletedXid while holding the lock */
    4049 GBC         610 :     MaintainLatestCompletedXid(latestXid);
    4050                 : 
    4051                 :     /* ... and xactCompletionCount */
    4052 CBC         610 :     ShmemVariableCache->xactCompletionCount++;
    4053                 : 
    4054 GIC         610 :     LWLockRelease(ProcArrayLock);
    4055 CBC         610 : }
    4056                 : 
    4057 ECB             : #ifdef XIDCACHE_DEBUG
    4058                 : 
    4059                 : /*
    4060                 :  * Print stats about effectiveness of XID cache
    4061                 :  */
    4062                 : static void
    4063                 : DisplayXidCache(void)
    4064                 : {
    4065                 :     fprintf(stderr,
    4066                 :             "XidCache: xmin: %ld, known: %ld, myxact: %ld, latest: %ld, mainxid: %ld, childxid: %ld, knownassigned: %ld, nooflo: %ld, slow: %ld\n",
    4067                 :             xc_by_recent_xmin,
    4068                 :             xc_by_known_xact,
    4069                 :             xc_by_my_xact,
    4070                 :             xc_by_latest_xid,
    4071                 :             xc_by_main_xid,
    4072                 :             xc_by_child_xid,
    4073                 :             xc_by_known_assigned,
    4074                 :             xc_no_overflow,
    4075                 :             xc_slow_answer);
    4076                 : }
    4077                 : #endif                          /* XIDCACHE_DEBUG */
    4078                 : 
    4079                 : /*
    4080                 :  * If rel != NULL, return test state appropriate for relation, otherwise
    4081                 :  * return state usable for all relations.  The latter may consider XIDs as
    4082                 :  * not-yet-visible-to-everyone that a state for a specific relation would
    4083                 :  * already consider visible-to-everyone.
    4084                 :  *
    4085                 :  * This needs to be called while a snapshot is active or registered, otherwise
    4086                 :  * there are wraparound and other dangers.
    4087                 :  *
    4088                 :  * See comment for GlobalVisState for details.
    4089                 :  */
    4090                 : GlobalVisState *
    4091 GIC    14036985 : GlobalVisTestFor(Relation rel)
    4092                 : {
    4093        14036985 :     GlobalVisState *state = NULL;
    4094 ECB             : 
    4095                 :     /* XXX: we should assert that a snapshot is pushed or registered */
    4096 CBC    14036985 :     Assert(RecentXmin);
    4097                 : 
    4098 GIC    14036985 :     switch (GlobalVisHorizonKindForRel(rel))
    4099 ECB             :     {
    4100 GIC       55912 :         case VISHORIZON_SHARED:
    4101 CBC       55912 :             state = &GlobalVisSharedRels;
    4102 GIC       55912 :             break;
    4103 CBC     2646677 :         case VISHORIZON_CATALOG:
    4104         2646677 :             state = &GlobalVisCatalogRels;
    4105         2646677 :             break;
    4106        11292629 :         case VISHORIZON_DATA:
    4107        11292629 :             state = &GlobalVisDataRels;
    4108        11292629 :             break;
    4109           41767 :         case VISHORIZON_TEMP:
    4110           41767 :             state = &GlobalVisTempRels;
    4111           41767 :             break;
    4112 ECB             :     }
    4113                 : 
    4114 CBC    14036985 :     Assert(FullTransactionIdIsValid(state->definitely_needed) &&
    4115                 :            FullTransactionIdIsValid(state->maybe_needed));
    4116                 : 
    4117        14036985 :     return state;
    4118                 : }
    4119                 : 
    4120 ECB             : /*
    4121                 :  * Return true if it's worth updating the accurate maybe_needed boundary.
    4122                 :  *
    4123                 :  * As it is somewhat expensive to determine xmin horizons, we don't want to
    4124                 :  * repeatedly do so when there is a low likelihood of it being beneficial.
    4125                 :  *
    4126                 :  * The current heuristic is that we update only if RecentXmin has changed
    4127                 :  * since the last update. If the oldest currently running transaction has not
    4128                 :  * finished, it is unlikely that recomputing the horizon would be useful.
    4129                 :  */
    4130                 : static bool
    4131 GIC      426533 : GlobalVisTestShouldUpdate(GlobalVisState *state)
    4132                 : {
    4133                 :     /* hasn't been updated yet */
    4134 CBC      426533 :     if (!TransactionIdIsValid(ComputeXidHorizonsResultLastXmin))
    4135 GIC        6917 :         return true;
    4136                 : 
    4137 ECB             :     /*
    4138                 :      * If the maybe_needed/definitely_needed boundaries are the same, it's
    4139                 :      * unlikely to be beneficial to refresh boundaries.
    4140                 :      */
    4141 GIC      419616 :     if (FullTransactionIdFollowsOrEquals(state->maybe_needed,
    4142                 :                                          state->definitely_needed))
    4143              15 :         return false;
    4144 ECB             : 
    4145                 :     /* does the last snapshot built have a different xmin? */
    4146 CBC      419601 :     return RecentXmin != ComputeXidHorizonsResultLastXmin;
    4147                 : }
    4148                 : 
    4149 ECB             : static void
    4150 GIC      204530 : GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons)
    4151                 : {
    4152                 :     GlobalVisSharedRels.maybe_needed =
    4153 CBC      204530 :         FullXidRelativeTo(horizons->latest_completed,
    4154                 :                           horizons->shared_oldest_nonremovable);
    4155                 :     GlobalVisCatalogRels.maybe_needed =
    4156          204530 :         FullXidRelativeTo(horizons->latest_completed,
    4157                 :                           horizons->catalog_oldest_nonremovable);
    4158                 :     GlobalVisDataRels.maybe_needed =
    4159          204530 :         FullXidRelativeTo(horizons->latest_completed,
    4160                 :                           horizons->data_oldest_nonremovable);
    4161                 :     GlobalVisTempRels.maybe_needed =
    4162          204530 :         FullXidRelativeTo(horizons->latest_completed,
    4163                 :                           horizons->temp_oldest_nonremovable);
    4164                 : 
    4165 ECB             :     /*
    4166                 :      * In longer running transactions it's possible that transactions we
    4167                 :      * previously needed to treat as running aren't around anymore. So update
    4168                 :      * definitely_needed to not be earlier than maybe_needed.
    4169                 :      */
    4170                 :     GlobalVisSharedRels.definitely_needed =
    4171 GIC      204530 :         FullTransactionIdNewer(GlobalVisSharedRels.maybe_needed,
    4172                 :                                GlobalVisSharedRels.definitely_needed);
    4173                 :     GlobalVisCatalogRels.definitely_needed =
    4174 CBC      204530 :         FullTransactionIdNewer(GlobalVisCatalogRels.maybe_needed,
    4175                 :                                GlobalVisCatalogRels.definitely_needed);
    4176                 :     GlobalVisDataRels.definitely_needed =
    4177          204530 :         FullTransactionIdNewer(GlobalVisDataRels.maybe_needed,
    4178                 :                                GlobalVisDataRels.definitely_needed);
    4179 GIC      204530 :     GlobalVisTempRels.definitely_needed = GlobalVisTempRels.maybe_needed;
    4180 ECB             : 
    4181 GIC      204530 :     ComputeXidHorizonsResultLastXmin = RecentXmin;
    4182 CBC      204530 : }
    4183                 : 
    4184 ECB             : /*
    4185                 :  * Update boundaries in GlobalVis{Shared,Catalog, Data}Rels
    4186                 :  * using ComputeXidHorizons().
    4187                 :  */
    4188                 : static void
    4189 GIC      121945 : GlobalVisUpdate(void)
    4190                 : {
    4191                 :     ComputeXidHorizonsResult horizons;
    4192 ECB             : 
    4193                 :     /* updates the horizons as a side-effect */
    4194 GIC      121945 :     ComputeXidHorizons(&horizons);
    4195          121945 : }
    4196                 : 
    4197 ECB             : /*
    4198                 :  * Return true if no snapshot still considers fxid to be running.
    4199                 :  *
    4200                 :  * The state passed needs to have been initialized for the relation fxid is
    4201                 :  * from (NULL is also OK), otherwise the result may not be correct.
    4202                 :  *
    4203                 :  * See comment for GlobalVisState for details.
    4204                 :  */
    4205                 : bool
    4206 GIC     9950611 : GlobalVisTestIsRemovableFullXid(GlobalVisState *state,
    4207                 :                                 FullTransactionId fxid)
    4208                 : {
    4209 ECB             :     /*
    4210                 :      * If fxid is older than maybe_needed bound, it definitely is visible to
    4211                 :      * everyone.
    4212                 :      */
    4213 GIC     9950611 :     if (FullTransactionIdPrecedes(fxid, state->maybe_needed))
    4214         2832131 :         return true;
    4215                 : 
    4216 ECB             :     /*
    4217                 :      * If fxid is >= definitely_needed bound, it is very likely to still be
    4218                 :      * considered running.
    4219                 :      */
    4220 GIC     7118480 :     if (FullTransactionIdFollowsOrEquals(fxid, state->definitely_needed))
    4221         6691968 :         return false;
    4222                 : 
    4223 ECB             :     /*
    4224                 :      * fxid is between maybe_needed and definitely_needed, i.e. there might or
    4225                 :      * might not exist a snapshot considering fxid running. If it makes sense,
    4226                 :      * update boundaries and recheck.
    4227                 :      */
    4228 GIC      426512 :     if (GlobalVisTestShouldUpdate(state))
    4229                 :     {
    4230          121939 :         GlobalVisUpdate();
    4231 ECB             : 
    4232 GIC      121939 :         Assert(FullTransactionIdPrecedes(fxid, state->definitely_needed));
    4233 ECB             : 
    4234 GIC      121939 :         return FullTransactionIdPrecedes(fxid, state->maybe_needed);
    4235 ECB             :     }
    4236                 :     else
    4237 CBC      304573 :         return false;
    4238                 : }
    4239                 : 
    4240 ECB             : /*
    4241                 :  * Wrapper around GlobalVisTestIsRemovableFullXid() for 32bit xids.
    4242                 :  *
    4243                 :  * It is crucial that this only gets called for xids from a source that
    4244                 :  * protects against xid wraparounds (e.g. from a table and thus protected by
    4245                 :  * relfrozenxid).
    4246                 :  */
    4247                 : bool
    4248 GIC     9949850 : GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
    4249                 : {
    4250                 :     FullTransactionId fxid;
    4251 ECB             : 
    4252                 :     /*
    4253                 :      * Convert 32 bit argument to FullTransactionId. We can do so safely
    4254                 :      * because we know the xid has to, at the very least, be between
    4255                 :      * [oldestXid, nextXid), i.e. within 2 billion of xid. To avoid taking a
    4256                 :      * lock to determine either, we can just compare with
    4257                 :      * state->definitely_needed, which was based on those value at the time
    4258                 :      * the current snapshot was built.
    4259                 :      */
    4260 GIC     9949850 :     fxid = FullXidRelativeTo(state->definitely_needed, xid);
    4261                 : 
    4262         9949850 :     return GlobalVisTestIsRemovableFullXid(state, fxid);
    4263 ECB             : }
    4264                 : 
    4265                 : /*
    4266                 :  * Return FullTransactionId below which all transactions are not considered
    4267                 :  * running anymore.
    4268                 :  *
    4269                 :  * Note: This is less efficient than testing with
    4270                 :  * GlobalVisTestIsRemovableFullXid as it likely requires building an accurate
    4271                 :  * cutoff, even in the case all the XIDs compared with the cutoff are outside
    4272                 :  * [maybe_needed, definitely_needed).
    4273                 :  */
    4274                 : FullTransactionId
    4275 GIC          21 : GlobalVisTestNonRemovableFullHorizon(GlobalVisState *state)
    4276                 : {
    4277                 :     /* acquire accurate horizon if not already done */
    4278 CBC          21 :     if (GlobalVisTestShouldUpdate(state))
    4279 GIC           6 :         GlobalVisUpdate();
    4280                 : 
    4281 CBC          21 :     return state->maybe_needed;
    4282 ECB             : }
    4283                 : 
    4284                 : /* Convenience wrapper around GlobalVisTestNonRemovableFullHorizon */
    4285                 : TransactionId
    4286 GIC          21 : GlobalVisTestNonRemovableHorizon(GlobalVisState *state)
    4287                 : {
    4288                 :     FullTransactionId cutoff;
    4289 ECB             : 
    4290 GIC          21 :     cutoff = GlobalVisTestNonRemovableFullHorizon(state);
    4291                 : 
    4292              21 :     return XidFromFullTransactionId(cutoff);
    4293 ECB             : }
    4294                 : 
    4295                 : /*
    4296                 :  * Convenience wrapper around GlobalVisTestFor() and
    4297                 :  * GlobalVisTestIsRemovableFullXid(), see their comments.
    4298                 :  */
    4299                 : bool
    4300 GIC         761 : GlobalVisCheckRemovableFullXid(Relation rel, FullTransactionId fxid)
    4301                 : {
    4302                 :     GlobalVisState *state;
    4303 ECB             : 
    4304 GIC         761 :     state = GlobalVisTestFor(rel);
    4305                 : 
    4306             761 :     return GlobalVisTestIsRemovableFullXid(state, fxid);
    4307 ECB             : }
    4308                 : 
    4309                 : /*
    4310                 :  * Convenience wrapper around GlobalVisTestFor() and
    4311                 :  * GlobalVisTestIsRemovableXid(), see their comments.
    4312                 :  */
    4313                 : bool
    4314 GIC           6 : GlobalVisCheckRemovableXid(Relation rel, TransactionId xid)
    4315                 : {
    4316                 :     GlobalVisState *state;
    4317 ECB             : 
    4318 GIC           6 :     state = GlobalVisTestFor(rel);
    4319                 : 
    4320               6 :     return GlobalVisTestIsRemovableXid(state, xid);
    4321 ECB             : }
    4322                 : 
    4323                 : /*
    4324                 :  * Safely retract *xid by retreat_by, store the result in *xid.
    4325                 :  *
    4326                 :  * Need to be careful to prevent *xid from retreating below
    4327                 :  * FirstNormalTransactionId during epoch 0. This is important to prevent
    4328                 :  * generating xids that cannot be converted to a FullTransactionId without
    4329                 :  * wrapping around.
    4330                 :  *
    4331                 :  * If retreat_by would lead to a too old xid, FirstNormalTransactionId is
    4332                 :  * returned instead.
    4333                 :  */
    4334                 : static void
    4335 GIC      696287 : TransactionIdRetreatSafely(TransactionId *xid, int retreat_by, FullTransactionId rel)
    4336                 : {
    4337          696287 :     TransactionId original_xid = *xid;
    4338 ECB             :     FullTransactionId fxid;
    4339                 :     uint64      fxid_i;
    4340                 : 
    4341 GIC      696287 :     Assert(TransactionIdIsNormal(original_xid));
    4342          696287 :     Assert(retreat_by >= 0); /* relevant GUCs are stored as ints */
    4343          696287 :     AssertTransactionIdInAllowableRange(original_xid);
    4344 ECB             : 
    4345 CBC      696287 :     if (retreat_by == 0)
    4346          696287 :         return;
    4347                 : 
    4348 LBC           0 :     fxid = FullXidRelativeTo(rel, original_xid);
    4349               0 :     fxid_i = U64FromFullTransactionId(fxid);
    4350                 : 
    4351 UBC           0 :     if ((fxid_i - FirstNormalTransactionId) <= retreat_by)
    4352               0 :         *xid = FirstNormalTransactionId;
    4353                 :     else
    4354 EUB             :     {
    4355 UBC           0 :         *xid = TransactionIdRetreatedBy(original_xid, retreat_by);
    4356 UIC           0 :         Assert(TransactionIdIsNormal(*xid));
    4357               0 :         Assert(NormalTransactionIdPrecedes(*xid, original_xid));
    4358 EUB             :     }
    4359                 : }
    4360                 : 
    4361                 : /*
    4362                 :  * Convert a 32 bit transaction id into 64 bit transaction id, by assuming it
    4363                 :  * is within MaxTransactionId / 2 of XidFromFullTransactionId(rel).
    4364                 :  *
    4365                 :  * Be very careful about when to use this function. It can only safely be used
    4366                 :  * when there is a guarantee that xid is within MaxTransactionId / 2 xids of
    4367                 :  * rel. That e.g. can be guaranteed if the caller assures a snapshot is
    4368                 :  * held by the backend and xid is from a table (where vacuum/freezing ensures
    4369                 :  * the xid has to be within that range), or if xid is from the procarray and
    4370                 :  * prevents xid wraparound that way.
    4371                 :  */
    4372                 : static inline FullTransactionId
    4373 GIC    13230009 : FullXidRelativeTo(FullTransactionId rel, TransactionId xid)
    4374                 : {
    4375        13230009 :     TransactionId rel_xid = XidFromFullTransactionId(rel);
    4376 ECB             : 
    4377 GIC    13230009 :     Assert(TransactionIdIsValid(xid));
    4378 CBC    13230009 :     Assert(TransactionIdIsValid(rel_xid));
    4379                 : 
    4380 ECB             :     /* not guaranteed to find issues, but likely to catch mistakes */
    4381 CBC    13230009 :     AssertTransactionIdInAllowableRange(xid);
    4382                 : 
    4383 GIC    26460018 :     return FullTransactionIdFromU64(U64FromFullTransactionId(rel)
    4384 CBC    13230009 :                                     + (int32) (xid - rel_xid));
    4385                 : }
    4386 ECB             : 
    4387                 : 
    4388                 : /* ----------------------------------------------
    4389                 :  *      KnownAssignedTransactionIds sub-module
    4390                 :  * ----------------------------------------------
    4391                 :  */
    4392                 : 
    4393                 : /*
    4394                 :  * In Hot Standby mode, we maintain a list of transactions that are (or were)
    4395                 :  * running on the primary at the current point in WAL.  These XIDs must be
    4396                 :  * treated as running by standby transactions, even though they are not in
    4397                 :  * the standby server's PGPROC array.
    4398                 :  *
    4399                 :  * We record all XIDs that we know have been assigned.  That includes all the
    4400                 :  * XIDs seen in WAL records, plus all unobserved XIDs that we can deduce have
    4401                 :  * been assigned.  We can deduce the existence of unobserved XIDs because we
    4402                 :  * know XIDs are assigned in sequence, with no gaps.  The KnownAssignedXids
    4403                 :  * list expands as new XIDs are observed or inferred, and contracts when
    4404                 :  * transaction completion records arrive.
    4405                 :  *
    4406                 :  * During hot standby we do not fret too much about the distinction between
    4407                 :  * top-level XIDs and subtransaction XIDs. We store both together in the
    4408                 :  * KnownAssignedXids list.  In backends, this is copied into snapshots in
    4409                 :  * GetSnapshotData(), taking advantage of the fact that XidInMVCCSnapshot()
    4410                 :  * doesn't care about the distinction either.  Subtransaction XIDs are
    4411                 :  * effectively treated as top-level XIDs and in the typical case pg_subtrans
    4412                 :  * links are *not* maintained (which does not affect visibility).
    4413                 :  *
    4414                 :  * We have room in KnownAssignedXids and in snapshots to hold maxProcs *
    4415                 :  * (1 + PGPROC_MAX_CACHED_SUBXIDS) XIDs, so every primary transaction must
    4416                 :  * report its subtransaction XIDs in a WAL XLOG_XACT_ASSIGNMENT record at
    4417                 :  * least every PGPROC_MAX_CACHED_SUBXIDS.  When we receive one of these
    4418                 :  * records, we mark the subXIDs as children of the top XID in pg_subtrans,
    4419                 :  * and then remove them from KnownAssignedXids.  This prevents overflow of
    4420                 :  * KnownAssignedXids and snapshots, at the cost that status checks for these
    4421                 :  * subXIDs will take a slower path through TransactionIdIsInProgress().
    4422                 :  * This means that KnownAssignedXids is not necessarily complete for subXIDs,
    4423                 :  * though it should be complete for top-level XIDs; this is the same situation
    4424                 :  * that holds with respect to the PGPROC entries in normal running.
    4425                 :  *
    4426                 :  * When we throw away subXIDs from KnownAssignedXids, we need to keep track of
    4427                 :  * that, similarly to tracking overflow of a PGPROC's subxids array.  We do
    4428                 :  * that by remembering the lastOverflowedXid, ie the last thrown-away subXID.
    4429                 :  * As long as that is within the range of interesting XIDs, we have to assume
    4430                 :  * that subXIDs are missing from snapshots.  (Note that subXID overflow occurs
    4431                 :  * on primary when 65th subXID arrives, whereas on standby it occurs when 64th
    4432                 :  * subXID arrives - that is not an error.)
    4433                 :  *
    4434                 :  * Should a backend on primary somehow disappear before it can write an abort
    4435                 :  * record, then we just leave those XIDs in KnownAssignedXids. They actually
    4436                 :  * aborted but we think they were running; the distinction is irrelevant
    4437                 :  * because either way any changes done by the transaction are not visible to
    4438                 :  * backends in the standby.  We prune KnownAssignedXids when
    4439                 :  * XLOG_RUNNING_XACTS arrives, to forestall possible overflow of the
    4440                 :  * array due to such dead XIDs.
    4441                 :  */
    4442                 : 
    4443                 : /*
    4444                 :  * RecordKnownAssignedTransactionIds
    4445                 :  *      Record the given XID in KnownAssignedXids, as well as any preceding
    4446                 :  *      unobserved XIDs.
    4447                 :  *
    4448                 :  * RecordKnownAssignedTransactionIds() should be run for *every* WAL record
    4449                 :  * associated with a transaction. Must be called for each record after we
    4450                 :  * have executed StartupCLOG() et al, since we must ExtendCLOG() etc..
    4451                 :  *
    4452                 :  * Called during recovery in analogy with and in place of GetNewTransactionId()
    4453                 :  */
    4454                 : void
    4455 GIC     2260738 : RecordKnownAssignedTransactionIds(TransactionId xid)
    4456                 : {
    4457         2260738 :     Assert(standbyState >= STANDBY_INITIALIZED);
    4458 CBC     2260738 :     Assert(TransactionIdIsValid(xid));
    4459 GIC     2260738 :     Assert(TransactionIdIsValid(latestObservedXid));
    4460 ECB             : 
    4461 CBC     2260738 :     elog(trace_recovery(DEBUG4), "record known xact %u latestObservedXid %u",
    4462 ECB             :          xid, latestObservedXid);
    4463                 : 
    4464                 :     /*
    4465                 :      * When a newly observed xid arrives, it is frequently the case that it is
    4466                 :      * *not* the next xid in sequence. When this occurs, we must treat the
    4467                 :      * intervening xids as running also.
    4468                 :      */
    4469 GIC     2260738 :     if (TransactionIdFollows(xid, latestObservedXid))
    4470                 :     {
    4471                 :         TransactionId next_expected_xid;
    4472 ECB             : 
    4473                 :         /*
    4474                 :          * Extend subtrans like we do in GetNewTransactionId() during normal
    4475                 :          * operation using individual extend steps. Note that we do not need
    4476                 :          * to extend clog since its extensions are WAL logged.
    4477                 :          *
    4478                 :          * This part has to be done regardless of standbyState since we
    4479                 :          * immediately start assigning subtransactions to their toplevel
    4480                 :          * transactions.
    4481                 :          */
    4482 GIC       19416 :         next_expected_xid = latestObservedXid;
    4483           39120 :         while (TransactionIdPrecedes(next_expected_xid, xid))
    4484                 :         {
    4485 CBC       19704 :             TransactionIdAdvance(next_expected_xid);
    4486           19704 :             ExtendSUBTRANS(next_expected_xid);
    4487                 :         }
    4488           19416 :         Assert(next_expected_xid == xid);
    4489 ECB             : 
    4490                 :         /*
    4491                 :          * If the KnownAssignedXids machinery isn't up yet, there's nothing
    4492                 :          * more to do since we don't track assigned xids yet.
    4493                 :          */
    4494 GIC       19416 :         if (standbyState <= STANDBY_INITIALIZED)
    4495                 :         {
    4496               4 :             latestObservedXid = xid;
    4497 CBC           4 :             return;
    4498                 :         }
    4499 ECB             : 
    4500                 :         /*
    4501                 :          * Add (latestObservedXid, xid] onto the KnownAssignedXids array.
    4502                 :          */
    4503 GIC       19412 :         next_expected_xid = latestObservedXid;
    4504           19412 :         TransactionIdAdvance(next_expected_xid);
    4505           19412 :         KnownAssignedXidsAdd(next_expected_xid, xid, false);
    4506 ECB             : 
    4507                 :         /*
    4508                 :          * Now we can advance latestObservedXid
    4509                 :          */
    4510 GIC       19412 :         latestObservedXid = xid;
    4511                 : 
    4512                 :         /* ShmemVariableCache->nextXid must be beyond any observed xid */
    4513 CBC       19412 :         AdvanceNextFullTransactionIdPastXid(latestObservedXid);
    4514                 :     }
    4515                 : }
    4516 ECB             : 
    4517                 : /*
    4518                 :  * ExpireTreeKnownAssignedTransactionIds
    4519                 :  *      Remove the given XIDs from KnownAssignedXids.
    4520                 :  *
    4521                 :  * Called during recovery in analogy with and in place of ProcArrayEndTransaction()
    4522                 :  */
    4523                 : void
    4524 GIC       18179 : ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids,
    4525                 :                                       TransactionId *subxids, TransactionId max_xid)
    4526                 : {
    4527 CBC       18179 :     Assert(standbyState >= STANDBY_INITIALIZED);
    4528                 : 
    4529                 :     /*
    4530 ECB             :      * Uses same locking as transaction commit
    4531                 :      */
    4532 GIC       18179 :     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
    4533                 : 
    4534           18179 :     KnownAssignedXidsRemoveTree(xid, nsubxids, subxids);
    4535 ECB             : 
    4536                 :     /* As in ProcArrayEndTransaction, advance latestCompletedXid */
    4537 CBC       18179 :     MaintainLatestCompletedXidRecovery(max_xid);
    4538                 : 
    4539                 :     /* ... and xactCompletionCount */
    4540           18179 :     ShmemVariableCache->xactCompletionCount++;
    4541                 : 
    4542 GIC       18179 :     LWLockRelease(ProcArrayLock);
    4543 CBC       18179 : }
    4544                 : 
    4545 ECB             : /*
    4546                 :  * ExpireAllKnownAssignedTransactionIds
    4547                 :  *      Remove all entries in KnownAssignedXids and reset lastOverflowedXid.
    4548                 :  */
    4549                 : void
    4550 GIC          71 : ExpireAllKnownAssignedTransactionIds(void)
    4551                 : {
    4552              71 :     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
    4553 CBC          71 :     KnownAssignedXidsRemovePreceding(InvalidTransactionId);
    4554                 : 
    4555 ECB             :     /*
    4556                 :      * Reset lastOverflowedXid.  Currently, lastOverflowedXid has no use after
    4557                 :      * the call of this function.  But do this for unification with what
    4558                 :      * ExpireOldKnownAssignedTransactionIds() do.
    4559                 :      */
    4560 GIC          71 :     procArray->lastOverflowedXid = InvalidTransactionId;
    4561              71 :     LWLockRelease(ProcArrayLock);
    4562              71 : }
    4563 ECB             : 
    4564                 : /*
    4565                 :  * ExpireOldKnownAssignedTransactionIds
    4566                 :  *      Remove KnownAssignedXids entries preceding the given XID and
    4567                 :  *      potentially reset lastOverflowedXid.
    4568                 :  */
    4569                 : void
    4570 GIC         195 : ExpireOldKnownAssignedTransactionIds(TransactionId xid)
    4571                 : {
    4572             195 :     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
    4573 ECB             : 
    4574                 :     /*
    4575                 :      * Reset lastOverflowedXid if we know all transactions that have been
    4576                 :      * possibly running are being gone.  Not doing so could cause an incorrect
    4577                 :      * lastOverflowedXid value, which makes extra snapshots be marked as
    4578                 :      * suboverflowed.
    4579                 :      */
    4580 GIC         195 :     if (TransactionIdPrecedes(procArray->lastOverflowedXid, xid))
    4581             192 :         procArray->lastOverflowedXid = InvalidTransactionId;
    4582             195 :     KnownAssignedXidsRemovePreceding(xid);
    4583 CBC         195 :     LWLockRelease(ProcArrayLock);
    4584             195 : }
    4585 ECB             : 
    4586                 : /*
    4587                 :  * KnownAssignedTransactionIdsIdleMaintenance
    4588                 :  *      Opportunistically do maintenance work when the startup process
    4589                 :  *      is about to go idle.
    4590                 :  */
    4591                 : void
    4592 GIC       12995 : KnownAssignedTransactionIdsIdleMaintenance(void)
    4593                 : {
    4594           12995 :     KnownAssignedXidsCompress(KAX_STARTUP_PROCESS_IDLE, false);
    4595 CBC       12995 : }
    4596                 : 
    4597 ECB             : 
    4598                 : /*
    4599                 :  * Private module functions to manipulate KnownAssignedXids
    4600                 :  *
    4601                 :  * There are 5 main uses of the KnownAssignedXids data structure:
    4602                 :  *
    4603                 :  *  * backends taking snapshots - all valid XIDs need to be copied out
    4604                 :  *  * backends seeking to determine presence of a specific XID
    4605                 :  *  * startup process adding new known-assigned XIDs
    4606                 :  *  * startup process removing specific XIDs as transactions end
    4607                 :  *  * startup process pruning array when special WAL records arrive
    4608                 :  *
    4609                 :  * This data structure is known to be a hot spot during Hot Standby, so we
    4610                 :  * go to some lengths to make these operations as efficient and as concurrent
    4611                 :  * as possible.
    4612                 :  *
    4613                 :  * The XIDs are stored in an array in sorted order --- TransactionIdPrecedes
    4614                 :  * order, to be exact --- to allow binary search for specific XIDs.  Note:
    4615                 :  * in general TransactionIdPrecedes would not provide a total order, but
    4616                 :  * we know that the entries present at any instant should not extend across
    4617                 :  * a large enough fraction of XID space to wrap around (the primary would
    4618                 :  * shut down for fear of XID wrap long before that happens).  So it's OK to
    4619                 :  * use TransactionIdPrecedes as a binary-search comparator.
    4620                 :  *
    4621                 :  * It's cheap to maintain the sortedness during insertions, since new known
    4622                 :  * XIDs are always reported in XID order; we just append them at the right.
    4623                 :  *
    4624                 :  * To keep individual deletions cheap, we need to allow gaps in the array.
    4625                 :  * This is implemented by marking array elements as valid or invalid using
    4626                 :  * the parallel boolean array KnownAssignedXidsValid[].  A deletion is done
    4627                 :  * by setting KnownAssignedXidsValid[i] to false, *without* clearing the
    4628                 :  * XID entry itself.  This preserves the property that the XID entries are
    4629                 :  * sorted, so we can do binary searches easily.  Periodically we compress
    4630                 :  * out the unused entries; that's much cheaper than having to compress the
    4631                 :  * array immediately on every deletion.
    4632                 :  *
    4633                 :  * The actually valid items in KnownAssignedXids[] and KnownAssignedXidsValid[]
    4634                 :  * are those with indexes tail <= i < head; items outside this subscript range
    4635                 :  * have unspecified contents.  When head reaches the end of the array, we
    4636                 :  * force compression of unused entries rather than wrapping around, since
    4637                 :  * allowing wraparound would greatly complicate the search logic.  We maintain
    4638                 :  * an explicit tail pointer so that pruning of old XIDs can be done without
    4639                 :  * immediately moving the array contents.  In most cases only a small fraction
    4640                 :  * of the array contains valid entries at any instant.
    4641                 :  *
    4642                 :  * Although only the startup process can ever change the KnownAssignedXids
    4643                 :  * data structure, we still need interlocking so that standby backends will
    4644                 :  * not observe invalid intermediate states.  The convention is that backends
    4645                 :  * must hold shared ProcArrayLock to examine the array.  To remove XIDs from
    4646                 :  * the array, the startup process must hold ProcArrayLock exclusively, for
    4647                 :  * the usual transactional reasons (compare commit/abort of a transaction
    4648                 :  * during normal running).  Compressing unused entries out of the array
    4649                 :  * likewise requires exclusive lock.  To add XIDs to the array, we just insert
    4650                 :  * them into slots to the right of the head pointer and then advance the head
    4651                 :  * pointer.  This wouldn't require any lock at all, except that on machines
    4652                 :  * with weak memory ordering we need to be careful that other processors
    4653                 :  * see the array element changes before they see the head pointer change.
    4654                 :  * We handle this by using a spinlock to protect reads and writes of the
    4655                 :  * head/tail pointers.  (We could dispense with the spinlock if we were to
    4656                 :  * create suitable memory access barrier primitives and use those instead.)
    4657                 :  * The spinlock must be taken to read or write the head/tail pointers unless
    4658                 :  * the caller holds ProcArrayLock exclusively.
    4659                 :  *
    4660                 :  * Algorithmic analysis:
    4661                 :  *
    4662                 :  * If we have a maximum of M slots, with N XIDs currently spread across
    4663                 :  * S elements then we have N <= S <= M always.
    4664                 :  *
    4665                 :  *  * Adding a new XID is O(1) and needs little locking (unless compression
    4666                 :  *      must happen)
    4667                 :  *  * Compressing the array is O(S) and requires exclusive lock
    4668                 :  *  * Removing an XID is O(logS) and requires exclusive lock
    4669                 :  *  * Taking a snapshot is O(S) and requires shared lock
    4670                 :  *  * Checking for an XID is O(logS) and requires shared lock
    4671                 :  *
    4672                 :  * In comparison, using a hash table for KnownAssignedXids would mean that
    4673                 :  * taking snapshots would be O(M). If we can maintain S << M then the
    4674                 :  * sorted array technique will deliver significantly faster snapshots.
    4675                 :  * If we try to keep S too small then we will spend too much time compressing,
    4676                 :  * so there is an optimal point for any workload mix. We use a heuristic to
    4677                 :  * decide when to compress the array, though trimming also helps reduce
    4678                 :  * frequency of compressing. The heuristic requires us to track the number of
    4679                 :  * currently valid XIDs in the array (N).  Except in special cases, we'll
    4680                 :  * compress when S >= 2N.  Bounding S at 2N in turn bounds the time for
    4681                 :  * taking a snapshot to be O(N), which it would have to be anyway.
    4682                 :  */
    4683                 : 
    4684                 : 
    4685                 : /*
    4686                 :  * Compress KnownAssignedXids by shifting valid data down to the start of the
    4687                 :  * array, removing any gaps.
    4688                 :  *
    4689                 :  * A compression step is forced if "reason" is KAX_NO_SPACE, otherwise
    4690                 :  * we do it only if a heuristic indicates it's a good time to do it.
    4691                 :  *
    4692                 :  * Compression requires holding ProcArrayLock in exclusive mode.
    4693                 :  * Caller must pass haveLock = true if it already holds the lock.
    4694                 :  */
    4695                 : static void
    4696 GIC       31390 : KnownAssignedXidsCompress(KAXCompressReason reason, bool haveLock)
    4697                 : {
    4698           31390 :     ProcArrayStruct *pArray = procArray;
    4699 ECB             :     int         head,
    4700                 :                 tail,
    4701                 :                 nelements;
    4702                 :     int         compress_index;
    4703                 :     int         i;
    4704                 : 
    4705                 :     /* Counters for compression heuristics */
    4706                 :     static unsigned int transactionEndsCounter;
    4707                 :     static TimestampTz lastCompressTs;
    4708                 : 
    4709                 :     /* Tuning constants */
    4710                 : #define KAX_COMPRESS_FREQUENCY 128  /* in transactions */
    4711                 : #define KAX_COMPRESS_IDLE_INTERVAL 1000 /* in ms */
    4712                 : 
    4713                 :     /*
    4714                 :      * Since only the startup process modifies the head/tail pointers, we
    4715                 :      * don't need a lock to read them here.
    4716                 :      */
    4717 GIC       31390 :     head = pArray->headKnownAssignedXids;
    4718           31390 :     tail = pArray->tailKnownAssignedXids;
    4719           31390 :     nelements = head - tail;
    4720 ECB             : 
    4721                 :     /*
    4722                 :      * If we can choose whether to compress, use a heuristic to avoid
    4723                 :      * compressing too often or not often enough.  "Compress" here simply
    4724                 :      * means moving the values to the beginning of the array, so it is not as
    4725                 :      * complex or costly as typical data compression algorithms.
    4726                 :      */
    4727 GIC       31390 :     if (nelements == pArray->numKnownAssignedXids)
    4728                 :     {
    4729                 :         /*
    4730 ECB             :          * When there are no gaps between head and tail, don't bother to
    4731                 :          * compress, except in the KAX_NO_SPACE case where we must compress to
    4732                 :          * create some space after the head.
    4733                 :          */
    4734 GIC       15008 :         if (reason != KAX_NO_SPACE)
    4735           15008 :             return;
    4736                 :     }
    4737 CBC       16382 :     else if (reason == KAX_TRANSACTION_END)
    4738 ECB             :     {
    4739                 :         /*
    4740                 :          * Consider compressing only once every so many commits.  Frequency
    4741                 :          * determined by benchmarks.
    4742                 :          */
    4743 GIC       10929 :         if ((transactionEndsCounter++) % KAX_COMPRESS_FREQUENCY != 0)
    4744           10835 :             return;
    4745                 : 
    4746 ECB             :         /*
    4747                 :          * Furthermore, compress only if the used part of the array is less
    4748                 :          * than 50% full (see comments above).
    4749                 :          */
    4750 GIC          94 :         if (nelements < 2 * pArray->numKnownAssignedXids)
    4751               3 :             return;
    4752                 :     }
    4753 CBC        5453 :     else if (reason == KAX_STARTUP_PROCESS_IDLE)
    4754 ECB             :     {
    4755                 :         /*
    4756                 :          * We're about to go idle for lack of new WAL, so we might as well
    4757                 :          * compress.  But not too often, to avoid ProcArray lock contention
    4758                 :          * with readers.
    4759                 :          */
    4760 GIC        5447 :         if (lastCompressTs != 0)
    4761                 :         {
    4762                 :             TimestampTz compress_after;
    4763 ECB             : 
    4764 GIC        5447 :             compress_after = TimestampTzPlusMilliseconds(lastCompressTs,
    4765                 :                                                          KAX_COMPRESS_IDLE_INTERVAL);
    4766            5447 :             if (GetCurrentTimestamp() < compress_after)
    4767 CBC        5430 :                 return;
    4768                 :         }
    4769 ECB             :     }
    4770                 : 
    4771                 :     /* Need to compress, so get the lock if we don't have it. */
    4772 GIC         114 :     if (!haveLock)
    4773              17 :         LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
    4774                 : 
    4775 ECB             :     /*
    4776                 :      * We compress the array by reading the valid values from tail to head,
    4777                 :      * re-aligning data to 0th element.
    4778                 :      */
    4779 GIC         114 :     compress_index = 0;
    4780            6592 :     for (i = tail; i < head; i++)
    4781                 :     {
    4782 CBC        6478 :         if (KnownAssignedXidsValid[i])
    4783 ECB             :         {
    4784 GIC         341 :             KnownAssignedXids[compress_index] = KnownAssignedXids[i];
    4785 CBC         341 :             KnownAssignedXidsValid[compress_index] = true;
    4786 GIC         341 :             compress_index++;
    4787 ECB             :         }
    4788                 :     }
    4789 CBC         114 :     Assert(compress_index == pArray->numKnownAssignedXids);
    4790                 : 
    4791 GIC         114 :     pArray->tailKnownAssignedXids = 0;
    4792 CBC         114 :     pArray->headKnownAssignedXids = compress_index;
    4793                 : 
    4794             114 :     if (!haveLock)
    4795              17 :         LWLockRelease(ProcArrayLock);
    4796                 : 
    4797 ECB             :     /* Update timestamp for maintenance.  No need to hold lock for this. */
    4798 CBC         114 :     lastCompressTs = GetCurrentTimestamp();
    4799                 : }
    4800                 : 
    4801 ECB             : /*
    4802                 :  * Add xids into KnownAssignedXids at the head of the array.
    4803                 :  *
    4804                 :  * xids from from_xid to to_xid, inclusive, are added to the array.
    4805                 :  *
    4806                 :  * If exclusive_lock is true then caller already holds ProcArrayLock in
    4807                 :  * exclusive mode, so we need no extra locking here.  Else caller holds no
    4808                 :  * lock, so we need to be sure we maintain sufficient interlocks against
    4809                 :  * concurrent readers.  (Only the startup process ever calls this, so no need
    4810                 :  * to worry about concurrent writers.)
    4811                 :  */
    4812                 : static void
    4813 GIC       19414 : KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
    4814                 :                      bool exclusive_lock)
    4815                 : {
    4816 CBC       19414 :     ProcArrayStruct *pArray = procArray;
    4817                 :     TransactionId next_xid;
    4818                 :     int         head,
    4819 ECB             :                 tail;
    4820                 :     int         nxids;
    4821                 :     int         i;
    4822                 : 
    4823 GIC       19414 :     Assert(TransactionIdPrecedesOrEquals(from_xid, to_xid));
    4824                 : 
    4825                 :     /*
    4826 ECB             :      * Calculate how many array slots we'll need.  Normally this is cheap; in
    4827                 :      * the unusual case where the XIDs cross the wrap point, we do it the hard
    4828                 :      * way.
    4829                 :      */
    4830 GIC       19414 :     if (to_xid >= from_xid)
    4831           19414 :         nxids = to_xid - from_xid + 1;
    4832                 :     else
    4833 ECB             :     {
    4834 LBC           0 :         nxids = 1;
    4835 UIC           0 :         next_xid = from_xid;
    4836               0 :         while (TransactionIdPrecedes(next_xid, to_xid))
    4837 EUB             :         {
    4838 UBC           0 :             nxids++;
    4839               0 :             TransactionIdAdvance(next_xid);
    4840                 :         }
    4841 EUB             :     }
    4842                 : 
    4843                 :     /*
    4844                 :      * Since only the startup process modifies the head/tail pointers, we
    4845                 :      * don't need a lock to read them here.
    4846                 :      */
    4847 GIC       19414 :     head = pArray->headKnownAssignedXids;
    4848           19414 :     tail = pArray->tailKnownAssignedXids;
    4849                 : 
    4850 CBC       19414 :     Assert(head >= 0 && head <= pArray->maxKnownAssignedXids);
    4851           19414 :     Assert(tail >= 0 && tail < pArray->maxKnownAssignedXids);
    4852                 : 
    4853 ECB             :     /*
    4854                 :      * Verify that insertions occur in TransactionId sequence.  Note that even
    4855                 :      * if the last existing element is marked invalid, it must still have a
    4856                 :      * correctly sequenced XID value.
    4857                 :      */
    4858 GIC       32320 :     if (head > tail &&
    4859           12906 :         TransactionIdFollowsOrEquals(KnownAssignedXids[head - 1], from_xid))
    4860                 :     {
    4861 LBC           0 :         KnownAssignedXidsDisplay(LOG);
    4862               0 :         elog(ERROR, "out-of-order XID insertion in KnownAssignedXids");
    4863                 :     }
    4864 EUB             : 
    4865                 :     /*
    4866                 :      * If our xids won't fit in the remaining space, compress out free space
    4867                 :      */
    4868 GIC       19414 :     if (head + nxids > pArray->maxKnownAssignedXids)
    4869                 :     {
    4870 UIC           0 :         KnownAssignedXidsCompress(KAX_NO_SPACE, exclusive_lock);
    4871 ECB             : 
    4872 UIC           0 :         head = pArray->headKnownAssignedXids;
    4873 EUB             :         /* note: we no longer care about the tail pointer */
    4874                 : 
    4875                 :         /*
    4876                 :          * If it still won't fit then we're out of memory
    4877                 :          */
    4878 UIC           0 :         if (head + nxids > pArray->maxKnownAssignedXids)
    4879               0 :             elog(ERROR, "too many KnownAssignedXids");
    4880                 :     }
    4881 EUB             : 
    4882                 :     /* Now we can insert the xids into the space starting at head */
    4883 GIC       19414 :     next_xid = from_xid;
    4884           39116 :     for (i = 0; i < nxids; i++)
    4885                 :     {
    4886 CBC       19702 :         KnownAssignedXids[head] = next_xid;
    4887           19702 :         KnownAssignedXidsValid[head] = true;
    4888 GIC       19702 :         TransactionIdAdvance(next_xid);
    4889 CBC       19702 :         head++;
    4890 ECB             :     }
    4891                 : 
    4892                 :     /* Adjust count of number of valid entries */
    4893 GIC       19414 :     pArray->numKnownAssignedXids += nxids;
    4894                 : 
    4895                 :     /*
    4896 ECB             :      * Now update the head pointer.  We use a spinlock to protect this
    4897                 :      * pointer, not because the update is likely to be non-atomic, but to
    4898                 :      * ensure that other processors see the above array updates before they
    4899                 :      * see the head pointer change.
    4900                 :      *
    4901                 :      * If we're holding ProcArrayLock exclusively, there's no need to take the
    4902                 :      * spinlock.
    4903                 :      */
    4904 GIC       19414 :     if (exclusive_lock)
    4905               2 :         pArray->headKnownAssignedXids = head;
    4906                 :     else
    4907 ECB             :     {
    4908 CBC       19412 :         SpinLockAcquire(&pArray->known_assigned_xids_lck);
    4909 GIC       19412 :         pArray->headKnownAssignedXids = head;
    4910           19412 :         SpinLockRelease(&pArray->known_assigned_xids_lck);
    4911 ECB             :     }
    4912 CBC       19414 : }
    4913 ECB             : 
    4914                 : /*
    4915                 :  * KnownAssignedXidsSearch
    4916                 :  *
    4917                 :  * Searches KnownAssignedXids for a specific xid and optionally removes it.
    4918                 :  * Returns true if it was found, false if not.
    4919                 :  *
    4920                 :  * Caller must hold ProcArrayLock in shared or exclusive mode.
    4921                 :  * Exclusive lock must be held for remove = true.
    4922                 :  */
    4923                 : static bool
    4924 GIC       20841 : KnownAssignedXidsSearch(TransactionId xid, bool remove)
    4925                 : {
    4926           20841 :     ProcArrayStruct *pArray = procArray;
    4927 ECB             :     int         first,
    4928                 :                 last;
    4929                 :     int         head;
    4930                 :     int         tail;
    4931 GIC       20841 :     int         result_index = -1;
    4932                 : 
    4933           20841 :     if (remove)
    4934 ECB             :     {
    4935                 :         /* we hold ProcArrayLock exclusively, so no need for spinlock */
    4936 CBC       20841 :         tail = pArray->tailKnownAssignedXids;
    4937 GIC       20841 :         head = pArray->headKnownAssignedXids;
    4938                 :     }
    4939 ECB             :     else
    4940                 :     {
    4941                 :         /* take spinlock to ensure we see up-to-date array contents */
    4942 UIC           0 :         SpinLockAcquire(&pArray->known_assigned_xids_lck);
    4943               0 :         tail = pArray->tailKnownAssignedXids;
    4944               0 :         head = pArray->headKnownAssignedXids;
    4945 UBC           0 :         SpinLockRelease(&pArray->known_assigned_xids_lck);
    4946 EUB             :     }
    4947                 : 
    4948                 :     /*
    4949                 :      * Standard binary search.  Note we can ignore the KnownAssignedXidsValid
    4950                 :      * array here, since even invalid entries will contain sorted XIDs.
    4951                 :      */
    4952 GIC       20841 :     first = tail;
    4953           20841 :     last = head - 1;
    4954           75440 :     while (first <= last)
    4955 ECB             :     {
    4956                 :         int         mid_index;
    4957                 :         TransactionId mid_xid;
    4958                 : 
    4959 GIC       74249 :         mid_index = (first + last) / 2;
    4960           74249 :         mid_xid = KnownAssignedXids[mid_index];
    4961                 : 
    4962 CBC       74249 :         if (xid == mid_xid)
    4963 ECB             :         {
    4964 GIC       19650 :             result_index = mid_index;
    4965 CBC       19650 :             break;
    4966                 :         }
    4967           54599 :         else if (TransactionIdPrecedes(xid, mid_xid))
    4968           11139 :             last = mid_index - 1;
    4969                 :         else
    4970           43460 :             first = mid_index + 1;
    4971 ECB             :     }
    4972                 : 
    4973 CBC       20841 :     if (result_index < 0)
    4974 GIC        1191 :         return false;           /* not in array */
    4975                 : 
    4976 CBC       19650 :     if (!KnownAssignedXidsValid[result_index])
    4977              30 :         return false;           /* in array, but invalid */
    4978                 : 
    4979           19620 :     if (remove)
    4980 ECB             :     {
    4981 GIC       19620 :         KnownAssignedXidsValid[result_index] = false;
    4982 ECB             : 
    4983 GIC       19620 :         pArray->numKnownAssignedXids--;
    4984 CBC       19620 :         Assert(pArray->numKnownAssignedXids >= 0);
    4985                 : 
    4986 ECB             :         /*
    4987                 :          * If we're removing the tail element then advance tail pointer over
    4988                 :          * any invalid elements.  This will speed future searches.
    4989                 :          */
    4990 GIC       19620 :         if (result_index == tail)
    4991                 :         {
    4992            7694 :             tail++;
    4993 CBC       13483 :             while (tail < head && !KnownAssignedXidsValid[tail])
    4994 GIC        5789 :                 tail++;
    4995 CBC        7694 :             if (tail >= head)
    4996 ECB             :             {
    4997                 :                 /* Array is empty, so we can reset both pointers */
    4998 CBC        6501 :                 pArray->headKnownAssignedXids = 0;
    4999 GIC        6501 :                 pArray->tailKnownAssignedXids = 0;
    5000                 :             }
    5001 ECB             :             else
    5002                 :             {
    5003 GIC        1193 :                 pArray->tailKnownAssignedXids = tail;
    5004                 :             }
    5005                 :         }
    5006 ECB             :     }
    5007                 : 
    5008 GIC       19620 :     return true;
    5009                 : }
    5010                 : 
    5011 ECB             : /*
    5012                 :  * Is the specified XID present in KnownAssignedXids[]?
    5013                 :  *
    5014                 :  * Caller must hold ProcArrayLock in shared or exclusive mode.
    5015                 :  */
    5016                 : static bool
    5017 UIC           0 : KnownAssignedXidExists(TransactionId xid)
    5018                 : {
    5019               0 :     Assert(TransactionIdIsValid(xid));
    5020 EUB             : 
    5021 UIC           0 :     return KnownAssignedXidsSearch(xid, false);
    5022 EUB             : }
    5023                 : 
    5024                 : /*
    5025                 :  * Remove the specified XID from KnownAssignedXids[].
    5026                 :  *
    5027                 :  * Caller must hold ProcArrayLock in exclusive mode.
    5028                 :  */
    5029                 : static void
    5030 GIC       20841 : KnownAssignedXidsRemove(TransactionId xid)
    5031                 : {
    5032           20841 :     Assert(TransactionIdIsValid(xid));
    5033 ECB             : 
    5034 GIC       20841 :     elog(trace_recovery(DEBUG4), "remove KnownAssignedXid %u", xid);
    5035 ECB             : 
    5036                 :     /*
    5037                 :      * Note: we cannot consider it an error to remove an XID that's not
    5038                 :      * present.  We intentionally remove subxact IDs while processing
    5039                 :      * XLOG_XACT_ASSIGNMENT, to avoid array overflow.  Then those XIDs will be
    5040                 :      * removed again when the top-level xact commits or aborts.
    5041                 :      *
    5042                 :      * It might be possible to track such XIDs to distinguish this case from
    5043                 :      * actual errors, but it would be complicated and probably not worth it.
    5044                 :      * So, just ignore the search result.
    5045                 :      */
    5046 GIC       20841 :     (void) KnownAssignedXidsSearch(xid, true);
    5047           20841 : }
    5048                 : 
    5049 ECB             : /*
    5050                 :  * KnownAssignedXidsRemoveTree
    5051                 :  *      Remove xid (if it's not InvalidTransactionId) and all the subxids.
    5052                 :  *
    5053                 :  * Caller must hold ProcArrayLock in exclusive mode.
    5054                 :  */
    5055                 : static void
    5056 GIC       18200 : KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids,
    5057                 :                             TransactionId *subxids)
    5058                 : {
    5059 ECB             :     int         i;
    5060                 : 
    5061 GIC       18200 :     if (TransactionIdIsValid(xid))
    5062           18179 :         KnownAssignedXidsRemove(xid);
    5063                 : 
    5064 CBC       20862 :     for (i = 0; i < nsubxids; i++)
    5065            2662 :         KnownAssignedXidsRemove(subxids[i]);
    5066                 : 
    5067 ECB             :     /* Opportunistically compress the array */
    5068 CBC       18200 :     KnownAssignedXidsCompress(KAX_TRANSACTION_END, true);
    5069 GIC       18200 : }
    5070                 : 
    5071 ECB             : /*
    5072                 :  * Prune KnownAssignedXids up to, but *not* including xid. If xid is invalid
    5073                 :  * then clear the whole table.
    5074                 :  *
    5075                 :  * Caller must hold ProcArrayLock in exclusive mode.
    5076                 :  */
    5077                 : static void
    5078 GIC         266 : KnownAssignedXidsRemovePreceding(TransactionId removeXid)
    5079                 : {
    5080             266 :     ProcArrayStruct *pArray = procArray;
    5081 CBC         266 :     int         count = 0;
    5082                 :     int         head,
    5083 ECB             :                 tail,
    5084                 :                 i;
    5085                 : 
    5086 GIC         266 :     if (!TransactionIdIsValid(removeXid))
    5087                 :     {
    5088              71 :         elog(trace_recovery(DEBUG4), "removing all KnownAssignedXids");
    5089 CBC          71 :         pArray->numKnownAssignedXids = 0;
    5090 GIC          71 :         pArray->headKnownAssignedXids = pArray->tailKnownAssignedXids = 0;
    5091 CBC          71 :         return;
    5092 ECB             :     }
    5093                 : 
    5094 CBC         195 :     elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", removeXid);
    5095                 : 
    5096                 :     /*
    5097 ECB             :      * Mark entries invalid starting at the tail.  Since array is sorted, we
    5098                 :      * can stop as soon as we reach an entry >= removeXid.
    5099                 :      */
    5100 GIC         195 :     tail = pArray->tailKnownAssignedXids;
    5101             195 :     head = pArray->headKnownAssignedXids;
    5102                 : 
    5103 CBC         195 :     for (i = tail; i < head; i++)
    5104 ECB             :     {
    5105 GIC          35 :         if (KnownAssignedXidsValid[i])
    5106 ECB             :         {
    5107 GIC          35 :             TransactionId knownXid = KnownAssignedXids[i];
    5108 ECB             : 
    5109 GIC          35 :             if (TransactionIdFollowsOrEquals(knownXid, removeXid))
    5110 CBC          35 :                 break;
    5111                 : 
    5112 LBC           0 :             if (!StandbyTransactionIdIsPrepared(knownXid))
    5113 ECB             :             {
    5114 UIC           0 :                 KnownAssignedXidsValid[i] = false;
    5115 UBC           0 :                 count++;
    5116                 :             }
    5117 EUB             :         }
    5118                 :     }
    5119                 : 
    5120 GIC         195 :     pArray->numKnownAssignedXids -= count;
    5121             195 :     Assert(pArray->numKnownAssignedXids >= 0);
    5122                 : 
    5123 ECB             :     /*
    5124                 :      * Advance the tail pointer if we've marked the tail item invalid.
    5125                 :      */
    5126 GIC         195 :     for (i = tail; i < head; i++)
    5127                 :     {
    5128              35 :         if (KnownAssignedXidsValid[i])
    5129 CBC          35 :             break;
    5130                 :     }
    5131             195 :     if (i >= head)
    5132 ECB             :     {
    5133                 :         /* Array is empty, so we can reset both pointers */
    5134 CBC         160 :         pArray->headKnownAssignedXids = 0;
    5135 GIC         160 :         pArray->tailKnownAssignedXids = 0;
    5136                 :     }
    5137 ECB             :     else
    5138                 :     {
    5139 GIC          35 :         pArray->tailKnownAssignedXids = i;
    5140                 :     }
    5141                 : 
    5142 ECB             :     /* Opportunistically compress the array */
    5143 GIC         195 :     KnownAssignedXidsCompress(KAX_PRUNE, true);
    5144                 : }
    5145                 : 
    5146 ECB             : /*
    5147                 :  * KnownAssignedXidsGet - Get an array of xids by scanning KnownAssignedXids.
    5148                 :  * We filter out anything >= xmax.
    5149                 :  *
    5150                 :  * Returns the number of XIDs stored into xarray[].  Caller is responsible
    5151                 :  * that array is large enough.
    5152                 :  *
    5153                 :  * Caller must hold ProcArrayLock in (at least) shared mode.
    5154                 :  */
    5155                 : static int
    5156 UIC           0 : KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax)
    5157                 : {
    5158               0 :     TransactionId xtmp = InvalidTransactionId;
    5159 EUB             : 
    5160 UIC           0 :     return KnownAssignedXidsGetAndSetXmin(xarray, &xtmp, xmax);
    5161 EUB             : }
    5162                 : 
    5163                 : /*
    5164                 :  * KnownAssignedXidsGetAndSetXmin - as KnownAssignedXidsGet, plus
    5165                 :  * we reduce *xmin to the lowest xid value seen if not already lower.
    5166                 :  *
    5167                 :  * Caller must hold ProcArrayLock in (at least) shared mode.
    5168                 :  */
    5169                 : static int
    5170 GIC         826 : KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin,
    5171                 :                                TransactionId xmax)
    5172                 : {
    5173 CBC         826 :     int         count = 0;
    5174                 :     int         head,
    5175                 :                 tail;
    5176 ECB             :     int         i;
    5177                 : 
    5178                 :     /*
    5179                 :      * Fetch head just once, since it may change while we loop. We can stop
    5180                 :      * once we reach the initially seen head, since we are certain that an xid
    5181                 :      * cannot enter and then leave the array while we hold ProcArrayLock.  We
    5182                 :      * might miss newly-added xids, but they should be >= xmax so irrelevant
    5183                 :      * anyway.
    5184                 :      *
    5185                 :      * Must take spinlock to ensure we see up-to-date array contents.
    5186                 :      */
    5187 GIC         826 :     SpinLockAcquire(&procArray->known_assigned_xids_lck);
    5188             826 :     tail = procArray->tailKnownAssignedXids;
    5189             826 :     head = procArray->headKnownAssignedXids;
    5190 CBC         826 :     SpinLockRelease(&procArray->known_assigned_xids_lck);
    5191 ECB             : 
    5192 CBC         845 :     for (i = tail; i < head; i++)
    5193 ECB             :     {
    5194                 :         /* Skip any gaps in the array */
    5195 CBC          58 :         if (KnownAssignedXidsValid[i])
    5196                 :         {
    5197 GIC          52 :             TransactionId knownXid = KnownAssignedXids[i];
    5198 ECB             : 
    5199                 :             /*
    5200                 :              * Update xmin if required.  Only the first XID need be checked,
    5201                 :              * since the array is sorted.
    5202                 :              */
    5203 GIC         104 :             if (count == 0 &&
    5204              52 :                 TransactionIdPrecedes(knownXid, *xmin))
    5205              13 :                 *xmin = knownXid;
    5206 ECB             : 
    5207                 :             /*
    5208                 :              * Filter out anything >= xmax, again relying on sorted property
    5209                 :              * of array.
    5210                 :              */
    5211 GIC         104 :             if (TransactionIdIsValid(xmax) &&
    5212              52 :                 TransactionIdFollowsOrEquals(knownXid, xmax))
    5213              39 :                 break;
    5214 ECB             : 
    5215                 :             /* Add knownXid into output array */
    5216 CBC          13 :             xarray[count++] = knownXid;
    5217                 :         }
    5218                 :     }
    5219 ECB             : 
    5220 GIC         826 :     return count;
    5221                 : }
    5222                 : 
    5223 ECB             : /*
    5224                 :  * Get oldest XID in the KnownAssignedXids array, or InvalidTransactionId
    5225                 :  * if nothing there.
    5226                 :  */
    5227                 : static TransactionId
    5228 GIC         116 : KnownAssignedXidsGetOldestXmin(void)
    5229                 : {
    5230                 :     int         head,
    5231 ECB             :                 tail;
    5232                 :     int         i;
    5233                 : 
    5234                 :     /*
    5235                 :      * Fetch head just once, since it may change while we loop.
    5236                 :      */
    5237 GIC         116 :     SpinLockAcquire(&procArray->known_assigned_xids_lck);
    5238             116 :     tail = procArray->tailKnownAssignedXids;
    5239             116 :     head = procArray->headKnownAssignedXids;
    5240 CBC         116 :     SpinLockRelease(&procArray->known_assigned_xids_lck);
    5241 ECB             : 
    5242 CBC         116 :     for (i = tail; i < head; i++)
    5243 ECB             :     {
    5244                 :         /* Skip any gaps in the array */
    5245 CBC           5 :         if (KnownAssignedXidsValid[i])
    5246 GIC           5 :             return KnownAssignedXids[i];
    5247                 :     }
    5248 ECB             : 
    5249 CBC         111 :     return InvalidTransactionId;
    5250                 : }
    5251                 : 
    5252 ECB             : /*
    5253                 :  * Display KnownAssignedXids to provide debug trail
    5254                 :  *
    5255                 :  * Currently this is only called within startup process, so we need no
    5256                 :  * special locking.
    5257                 :  *
    5258                 :  * Note this is pretty expensive, and much of the expense will be incurred
    5259                 :  * even if the elog message will get discarded.  It's not currently called
    5260                 :  * in any performance-critical places, however, so no need to be tenser.
    5261                 :  */
    5262                 : static void
    5263 GIC          73 : KnownAssignedXidsDisplay(int trace_level)
    5264                 : {
    5265              73 :     ProcArrayStruct *pArray = procArray;
    5266 ECB             :     StringInfoData buf;
    5267                 :     int         head,
    5268                 :                 tail,
    5269                 :                 i;
    5270 GIC          73 :     int         nxids = 0;
    5271                 : 
    5272              73 :     tail = pArray->tailKnownAssignedXids;
    5273 CBC          73 :     head = pArray->headKnownAssignedXids;
    5274                 : 
    5275              73 :     initStringInfo(&buf);
    5276 ECB             : 
    5277 GIC          77 :     for (i = tail; i < head; i++)
    5278 ECB             :     {
    5279 GIC           4 :         if (KnownAssignedXidsValid[i])
    5280 ECB             :         {
    5281 GIC           4 :             nxids++;
    5282 CBC           4 :             appendStringInfo(&buf, "[%d]=%u ", i, KnownAssignedXids[i]);
    5283                 :         }
    5284 ECB             :     }
    5285                 : 
    5286 GIC          73 :     elog(trace_level, "%d KnownAssignedXids (num=%d tail=%d head=%d) %s",
    5287                 :          nxids,
    5288                 :          pArray->numKnownAssignedXids,
    5289 ECB             :          pArray->tailKnownAssignedXids,
    5290                 :          pArray->headKnownAssignedXids,
    5291                 :          buf.data);
    5292                 : 
    5293 GIC          73 :     pfree(buf.data);
    5294              73 : }
    5295                 : 
    5296 ECB             : /*
    5297                 :  * KnownAssignedXidsReset
    5298                 :  *      Resets KnownAssignedXids to be empty
    5299                 :  */
    5300                 : static void
    5301 UIC           0 : KnownAssignedXidsReset(void)
    5302                 : {
    5303               0 :     ProcArrayStruct *pArray = procArray;
    5304 EUB             : 
    5305 UIC           0 :     LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
    5306 EUB             : 
    5307 UIC           0 :     pArray->numKnownAssignedXids = 0;
    5308 UBC           0 :     pArray->tailKnownAssignedXids = 0;
    5309 UIC           0 :     pArray->headKnownAssignedXids = 0;
    5310 EUB             : 
    5311 UBC           0 :     LWLockRelease(ProcArrayLock);
    5312               0 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a