LCOV - differential code coverage report
Current view: top level - src/backend/access/transam - clog.c (source / functions) Coverage Total Hit UNC UIC UBC GBC GIC GNC CBC EUB ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 69.4 % 242 168 1 40 33 2 98 11 57 39 107 4
Current Date: 2023-04-08 17:13:01 Functions: 81.8 % 22 18 4 18 4 18
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 (180,240] days: 91.7 % 12 11 1 11
Legend: Lines: hit not hit (240..) days: 68.3 % 230 157 40 33 2 98 57 39 107
Function coverage date bins:
(240..) days: 40.9 % 44 18 4 18 4 18

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*-------------------------------------------------------------------------
                                  2                 :  *
                                  3                 :  * clog.c
                                  4                 :  *      PostgreSQL transaction-commit-log manager
                                  5                 :  *
                                  6                 :  * This module replaces the old "pg_log" access code, which treated pg_log
                                  7                 :  * essentially like a relation, in that it went through the regular buffer
                                  8                 :  * manager.  The problem with that was that there wasn't any good way to
                                  9                 :  * recycle storage space for transactions so old that they'll never be
                                 10                 :  * looked up again.  Now we use specialized access code so that the commit
                                 11                 :  * log can be broken into relatively small, independent segments.
                                 12                 :  *
                                 13                 :  * XLOG interactions: this module generates an XLOG record whenever a new
                                 14                 :  * CLOG page is initialized to zeroes.  Other writes of CLOG come from
                                 15                 :  * recording of transaction commit or abort in xact.c, which generates its
                                 16                 :  * own XLOG records for these events and will re-perform the status update
                                 17                 :  * on redo; so we need make no additional XLOG entry here.  For synchronous
                                 18                 :  * transaction commits, the XLOG is guaranteed flushed through the XLOG commit
                                 19                 :  * record before we are called to log a commit, so the WAL rule "write xlog
                                 20                 :  * before data" is satisfied automatically.  However, for async commits we
                                 21                 :  * must track the latest LSN affecting each CLOG page, so that we can flush
                                 22                 :  * XLOG that far and satisfy the WAL rule.  We don't have to worry about this
                                 23                 :  * for aborts (whether sync or async), since the post-crash assumption would
                                 24                 :  * be that such transactions failed anyway.
                                 25                 :  *
                                 26                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
                                 27                 :  * Portions Copyright (c) 1994, Regents of the University of California
                                 28                 :  *
                                 29                 :  * src/backend/access/transam/clog.c
                                 30                 :  *
                                 31                 :  *-------------------------------------------------------------------------
                                 32                 :  */
                                 33                 : #include "postgres.h"
                                 34                 : 
                                 35                 : #include "access/clog.h"
                                 36                 : #include "access/slru.h"
                                 37                 : #include "access/transam.h"
                                 38                 : #include "access/xlog.h"
                                 39                 : #include "access/xloginsert.h"
                                 40                 : #include "access/xlogutils.h"
                                 41                 : #include "miscadmin.h"
                                 42                 : #include "pg_trace.h"
                                 43                 : #include "pgstat.h"
                                 44                 : #include "storage/proc.h"
                                 45                 : #include "storage/sync.h"
                                 46                 : 
                                 47                 : /*
                                 48                 :  * Defines for CLOG page sizes.  A page is the same BLCKSZ as is used
                                 49                 :  * everywhere else in Postgres.
                                 50                 :  *
                                 51                 :  * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF,
                                 52                 :  * CLOG page numbering also wraps around at 0xFFFFFFFF/CLOG_XACTS_PER_PAGE,
                                 53                 :  * and CLOG segment numbering at
                                 54                 :  * 0xFFFFFFFF/CLOG_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT.  We need take no
                                 55                 :  * explicit notice of that fact in this module, except when comparing segment
                                 56                 :  * and page numbers in TruncateCLOG (see CLOGPagePrecedes).
                                 57                 :  */
                                 58                 : 
                                 59                 : /* We need two bits per xact, so four xacts fit in a byte */
                                 60                 : #define CLOG_BITS_PER_XACT  2
                                 61                 : #define CLOG_XACTS_PER_BYTE 4
                                 62                 : #define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE)
                                 63                 : #define CLOG_XACT_BITMASK   ((1 << CLOG_BITS_PER_XACT) - 1)
                                 64                 : 
                                 65                 : #define TransactionIdToPage(xid)    ((xid) / (TransactionId) CLOG_XACTS_PER_PAGE)
                                 66                 : #define TransactionIdToPgIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_PAGE)
                                 67                 : #define TransactionIdToByte(xid)    (TransactionIdToPgIndex(xid) / CLOG_XACTS_PER_BYTE)
                                 68                 : #define TransactionIdToBIndex(xid)  ((xid) % (TransactionId) CLOG_XACTS_PER_BYTE)
                                 69                 : 
                                 70                 : /* We store the latest async LSN for each group of transactions */
                                 71                 : #define CLOG_XACTS_PER_LSN_GROUP    32  /* keep this a power of 2 */
                                 72                 : #define CLOG_LSNS_PER_PAGE  (CLOG_XACTS_PER_PAGE / CLOG_XACTS_PER_LSN_GROUP)
                                 73                 : 
                                 74                 : #define GetLSNIndex(slotno, xid)    ((slotno) * CLOG_LSNS_PER_PAGE + \
                                 75                 :     ((xid) % (TransactionId) CLOG_XACTS_PER_PAGE) / CLOG_XACTS_PER_LSN_GROUP)
                                 76                 : 
                                 77                 : /*
                                 78                 :  * The number of subtransactions below which we consider to apply clog group
                                 79                 :  * update optimization.  Testing reveals that the number higher than this can
                                 80                 :  * hurt performance.
                                 81                 :  */
                                 82                 : #define THRESHOLD_SUBTRANS_CLOG_OPT 5
                                 83                 : 
                                 84                 : /*
                                 85                 :  * Link to shared-memory data structures for CLOG control
                                 86                 :  */
                                 87                 : static SlruCtlData XactCtlData;
                                 88                 : 
                                 89                 : #define XactCtl (&XactCtlData)
                                 90                 : 
                                 91                 : 
                                 92                 : static int  ZeroCLOGPage(int pageno, bool writeXlog);
                                 93                 : static bool CLOGPagePrecedes(int page1, int page2);
                                 94                 : static void WriteZeroPageXlogRec(int pageno);
                                 95                 : static void WriteTruncateXlogRec(int pageno, TransactionId oldestXact,
                                 96                 :                                  Oid oldestXactDb);
                                 97                 : static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
                                 98                 :                                        TransactionId *subxids, XidStatus status,
                                 99                 :                                        XLogRecPtr lsn, int pageno,
                                100                 :                                        bool all_xact_same_page);
                                101                 : static void TransactionIdSetStatusBit(TransactionId xid, XidStatus status,
                                102                 :                                       XLogRecPtr lsn, int slotno);
                                103                 : static void set_status_by_pages(int nsubxids, TransactionId *subxids,
                                104                 :                                 XidStatus status, XLogRecPtr lsn);
                                105                 : static bool TransactionGroupUpdateXidStatus(TransactionId xid,
                                106                 :                                             XidStatus status, XLogRecPtr lsn, int pageno);
                                107                 : static void TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids,
                                108                 :                                                TransactionId *subxids, XidStatus status,
                                109                 :                                                XLogRecPtr lsn, int pageno);
                                110                 : 
                                111                 : 
                                112                 : /*
                                113                 :  * TransactionIdSetTreeStatus
                                114                 :  *
                                115                 :  * Record the final state of transaction entries in the commit log for
                                116                 :  * a transaction and its subtransaction tree. Take care to ensure this is
                                117                 :  * efficient, and as atomic as possible.
                                118                 :  *
                                119                 :  * xid is a single xid to set status for. This will typically be
                                120                 :  * the top level transactionid for a top level commit or abort. It can
                                121                 :  * also be a subtransaction when we record transaction aborts.
                                122                 :  *
                                123                 :  * subxids is an array of xids of length nsubxids, representing subtransactions
                                124                 :  * in the tree of xid. In various cases nsubxids may be zero.
                                125                 :  *
                                126                 :  * lsn must be the WAL location of the commit record when recording an async
                                127                 :  * commit.  For a synchronous commit it can be InvalidXLogRecPtr, since the
                                128                 :  * caller guarantees the commit record is already flushed in that case.  It
                                129                 :  * should be InvalidXLogRecPtr for abort cases, too.
                                130                 :  *
                                131                 :  * In the commit case, atomicity is limited by whether all the subxids are in
                                132                 :  * the same CLOG page as xid.  If they all are, then the lock will be grabbed
                                133                 :  * only once, and the status will be set to committed directly.  Otherwise
                                134                 :  * we must
                                135                 :  *   1. set sub-committed all subxids that are not on the same page as the
                                136                 :  *      main xid
                                137                 :  *   2. atomically set committed the main xid and the subxids on the same page
                                138                 :  *   3. go over the first bunch again and set them committed
                                139                 :  * Note that as far as concurrent checkers are concerned, main transaction
                                140                 :  * commit as a whole is still atomic.
                                141                 :  *
                                142                 :  * Example:
                                143                 :  *      TransactionId t commits and has subxids t1, t2, t3, t4
                                144                 :  *      t is on page p1, t1 is also on p1, t2 and t3 are on p2, t4 is on p3
                                145                 :  *      1. update pages2-3:
                                146                 :  *                  page2: set t2,t3 as sub-committed
                                147                 :  *                  page3: set t4 as sub-committed
                                148                 :  *      2. update page1:
                                149                 :  *                  page1: set t,t1 as committed
                                150                 :  *      3. update pages2-3:
                                151                 :  *                  page2: set t2,t3 as committed
                                152                 :  *                  page3: set t4 as committed
                                153                 :  *
                                154                 :  * NB: this is a low-level routine and is NOT the preferred entry point
                                155                 :  * for most uses; functions in transam.c are the intended callers.
                                156                 :  *
                                157                 :  * XXX Think about issuing POSIX_FADV_WILLNEED on pages that we will need,
                                158                 :  * but aren't yet in cache, as well as hinting pages not to fall out of
                                159                 :  * cache yet.
 7897 tgl                       160 ECB             :  */
                                161                 : void
 5284 alvherre                  162 GIC      318992 : TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
 2118 tgl                       163 ECB             :                            TransactionId *subxids, XidStatus status, XLogRecPtr lsn)
                                164                 : {
 2118 tgl                       165 GIC      318992 :     int         pageno = TransactionIdToPage(xid);  /* get page of parent */
 5050 bruce                     166 ECB             :     int         i;
                                167                 : 
 5284 alvherre                  168 GIC      318992 :     Assert(status == TRANSACTION_STATUS_COMMITTED ||
                                169                 :            status == TRANSACTION_STATUS_ABORTED);
                                170                 : 
                                171                 :     /*
                                172                 :      * See how many subxids, if any, are on the same page as the parent, if
 5050 bruce                     173 ECB             :      * any.
                                174                 :      */
 5284 alvherre                  175 CBC      323134 :     for (i = 0; i < nsubxids; i++)
 5284 alvherre                  176 EUB             :     {
 5284 alvherre                  177 GIC        4142 :         if (TransactionIdToPage(subxids[i]) != pageno)
 5284 alvherre                  178 UIC           0 :             break;
                                179                 :     }
                                180                 : 
                                181                 :     /*
 5284 alvherre                  182 ECB             :      * Do all items fit on a single page?
                                183                 :      */
 5284 alvherre                  184 GIC      318992 :     if (i == nsubxids)
                                185                 :     {
                                186                 :         /*
 5284 alvherre                  187 ECB             :          * Set the parent and all subtransactions in a single call
                                188                 :          */
 5284 alvherre                  189 GIC      318992 :         TransactionIdSetPageStatus(xid, nsubxids, subxids, status, lsn,
                                190                 :                                    pageno, true);
                                191                 :     }
 5284 alvherre                  192 EUB             :     else
                                193                 :     {
 5050 bruce                     194 UIC           0 :         int         nsubxids_on_first_page = i;
                                195                 : 
                                196                 :         /*
                                197                 :          * If this is a commit then we care about doing this correctly (i.e.
                                198                 :          * using the subcommitted intermediate status).  By here, we know
                                199                 :          * we're updating more than one page of clog, so we must mark entries
                                200                 :          * that are *not* on the first page so that they show as subcommitted
                                201                 :          * before we then return to update the status to fully committed.
                                202                 :          *
                                203                 :          * To avoid touching the first page twice, skip marking subcommitted
 5284 alvherre                  204 EUB             :          * for the subxids on that first page.
                                205                 :          */
 5284 alvherre                  206 UBC           0 :         if (status == TRANSACTION_STATUS_COMMITTED)
 5284 alvherre                  207 UIC           0 :             set_status_by_pages(nsubxids - nsubxids_on_first_page,
                                208               0 :                                 subxids + nsubxids_on_first_page,
                                209                 :                                 TRANSACTION_STATUS_SUB_COMMITTED, lsn);
                                210                 : 
                                211                 :         /*
                                212                 :          * Now set the parent and subtransactions on same page as the parent,
 5284 alvherre                  213 EUB             :          * if any
                                214                 :          */
 5284 alvherre                  215 UIC           0 :         pageno = TransactionIdToPage(xid);
                                216               0 :         TransactionIdSetPageStatus(xid, nsubxids_on_first_page, subxids, status,
                                217                 :                                    lsn, pageno, false);
                                218                 : 
                                219                 :         /*
                                220                 :          * Now work through the rest of the subxids one clog page at a time,
 5284 alvherre                  221 EUB             :          * starting from the second page onwards, like we did above.
                                222                 :          */
 5284 alvherre                  223 UIC           0 :         set_status_by_pages(nsubxids - nsubxids_on_first_page,
                                224               0 :                             subxids + nsubxids_on_first_page,
 5284 alvherre                  225 ECB             :                             status, lsn);
                                226                 :     }
 5284 alvherre                  227 GIC      318992 : }
                                228                 : 
                                229                 : /*
                                230                 :  * Helper for TransactionIdSetTreeStatus: set the status for a bunch of
                                231                 :  * transactions, chunking in the separate CLOG pages involved. We never
                                232                 :  * pass the whole transaction tree to this function, only subtransactions
                                233                 :  * that are on different pages to the top level transaction id.
 5284 alvherre                  234 EUB             :  */
                                235                 : static void
 5284 alvherre                  236 UIC           0 : set_status_by_pages(int nsubxids, TransactionId *subxids,
 5284 alvherre                  237 EUB             :                     XidStatus status, XLogRecPtr lsn)
                                238                 : {
 5050 bruce                     239 UBC           0 :     int         pageno = TransactionIdToPage(subxids[0]);
 5050 bruce                     240 UIC           0 :     int         offset = 0;
 5050 bruce                     241 UBC           0 :     int         i = 0;
                                242                 : 
 2011 tgl                       243               0 :     Assert(nsubxids > 0);        /* else the pageno fetch above is unsafe */
                                244                 : 
 5284 alvherre                  245               0 :     while (i < nsubxids)
                                246                 :     {
 5050 bruce                     247 UIC           0 :         int         num_on_page = 0;
                                248                 :         int         nextpageno;
                                249                 : 
 2011 tgl                       250 EUB             :         do
 5284 alvherre                  251                 :         {
 2011 tgl                       252 UBC           0 :             nextpageno = TransactionIdToPage(subxids[i]);
                                253               0 :             if (nextpageno != pageno)
                                254               0 :                 break;
 5284 alvherre                  255               0 :             num_on_page++;
 5284 alvherre                  256 UIC           0 :             i++;
 2011 tgl                       257 UBC           0 :         } while (i < nsubxids);
 5284 alvherre                  258 EUB             : 
 5284 alvherre                  259 UIC           0 :         TransactionIdSetPageStatus(InvalidTransactionId,
 5284 alvherre                  260 UBC           0 :                                    num_on_page, subxids + offset,
 2046 rhaas                     261 EUB             :                                    status, lsn, pageno, false);
 5284 alvherre                  262 UIC           0 :         offset = i;
 2011 tgl                       263 UBC           0 :         pageno = nextpageno;
                                264                 :     }
 5284 alvherre                  265 UIC           0 : }
                                266                 : 
                                267                 : /*
                                268                 :  * Record the final state of transaction entries in the commit log for all
                                269                 :  * entries on a single page.  Atomic only on this page.
 5284 alvherre                  270 ECB             :  */
                                271                 : static void
 5284 alvherre                  272 GIC      318992 : TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
                                273                 :                            TransactionId *subxids, XidStatus status,
                                274                 :                            XLogRecPtr lsn, int pageno,
                                275                 :                            bool all_xact_same_page)
                                276                 : {
                                277                 :     /* Can't use group update when PGPROC overflows. */
                                278                 :     StaticAssertDecl(THRESHOLD_SUBTRANS_CLOG_OPT <= PGPROC_MAX_CACHED_SUBXIDS,
                                279                 :                      "group clog threshold less than PGPROC cached subxids");
                                280                 : 
                                281                 :     /*
                                282                 :      * When there is contention on XactSLRULock, we try to group multiple
                                283                 :      * updates; a single leader process will perform transaction status
                                284                 :      * updates for multiple backends so that the number of times XactSLRULock
                                285                 :      * needs to be acquired is reduced.
                                286                 :      *
                                287                 :      * For this optimization to be safe, the XID and subxids in MyProc must be
                                288                 :      * the same as the ones for which we're setting the status.  Check that
                                289                 :      * this is the case.
                                290                 :      *
                                291                 :      * For this optimization to be efficient, we shouldn't have too many
                                292                 :      * sub-XIDs and all of the XIDs for which we're adjusting clog should be
 2046 rhaas                     293 ECB             :      * on the same page.  Check those conditions, too.
                                294                 :      */
  968 andres                    295 CBC      318992 :     if (all_xact_same_page && xid == MyProc->xid &&
 2046 rhaas                     296          297658 :         nsubxids <= THRESHOLD_SUBTRANS_CLOG_OPT &&
  968 andres                    297          297658 :         nsubxids == MyProc->subxidStatus.count &&
  402 tgl                       298 GIC         457 :         (nsubxids == 0 ||
                                299             457 :          memcmp(subxids, MyProc->subxids.xids,
                                300                 :                 nsubxids * sizeof(TransactionId)) == 0))
                                301                 :     {
                                302                 :         /*
                                303                 :          * If we can immediately acquire XactSLRULock, we update the status of
                                304                 :          * our own XID and release the lock.  If not, try use group XID
                                305                 :          * update.  If that doesn't work out, fall back to waiting for the
 2046 rhaas                     306 ECB             :          * lock to perform an update for this transaction only.
                                307                 :          */
 1059 tgl                       308 GIC      297658 :         if (LWLockConditionalAcquire(XactSLRULock, LW_EXCLUSIVE))
 2046 rhaas                     309 ECB             :         {
                                310                 :             /* Got the lock without waiting!  Do the update. */
 2046 rhaas                     311 CBC      297604 :             TransactionIdSetPageStatusInternal(xid, nsubxids, subxids, status,
 2046 rhaas                     312 ECB             :                                                lsn, pageno);
 1059 tgl                       313 GIC      297604 :             LWLockRelease(XactSLRULock);
 2046 rhaas                     314 CBC      297604 :             return;
                                315                 :         }
 2046 rhaas                     316 GIC          54 :         else if (TransactionGroupUpdateXidStatus(xid, status, lsn, pageno))
 2046 rhaas                     317 ECB             :         {
                                318                 :             /* Group update mechanism has done the work. */
 2046 rhaas                     319 GIC          54 :             return;
                                320                 :         }
                                321                 : 
                                322                 :         /* Fall through only if update isn't done yet. */
                                323                 :     }
 2046 rhaas                     324 ECB             : 
                                325                 :     /* Group update not applicable, or couldn't accept this page number. */
 1059 tgl                       326 GIC       21334 :     LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
 2046 rhaas                     327 CBC       21334 :     TransactionIdSetPageStatusInternal(xid, nsubxids, subxids, status,
                                328                 :                                        lsn, pageno);
 1059 tgl                       329 GIC       21334 :     LWLockRelease(XactSLRULock);
                                330                 : }
                                331                 : 
                                332                 : /*
                                333                 :  * Record the final state of transaction entry in the commit log
                                334                 :  *
                                335                 :  * We don't do any locking here; caller must handle that.
 2046 rhaas                     336 ECB             :  */
                                337                 : static void
 2046 rhaas                     338 GIC      318992 : TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids,
                                339                 :                                    TransactionId *subxids, XidStatus status,
                                340                 :                                    XLogRecPtr lsn, int pageno)
                                341                 : {
                                342                 :     int         slotno;
 5050 bruce                     343 ECB             :     int         i;
                                344                 : 
 7897 tgl                       345 GIC      318992 :     Assert(status == TRANSACTION_STATUS_COMMITTED ||
 6856 tgl                       346 ECB             :            status == TRANSACTION_STATUS_ABORTED ||
                                347                 :            (status == TRANSACTION_STATUS_SUB_COMMITTED && !TransactionIdIsValid(xid)));
 1059 tgl                       348 GIC      318992 :     Assert(LWLockHeldByMeInMode(XactSLRULock, LW_EXCLUSIVE));
                                349                 : 
                                350                 :     /*
                                351                 :      * If we're doing an async commit (ie, lsn is valid), then we must wait
                                352                 :      * for any active write on the page slot to complete.  Otherwise our
                                353                 :      * update could reach disk in that write, which will not do since we
                                354                 :      * mustn't let it reach disk until we've done the appropriate WAL flush.
                                355                 :      * But when lsn is invalid, it's OK to scribble on a page while it is
                                356                 :      * write-busy, since we don't care if the update reaches disk sooner than
 5284 alvherre                  357 ECB             :      * we think.
                                358                 :      */
 1059 tgl                       359 GIC      318992 :     slotno = SimpleLruReadPage(XactCtl, pageno, XLogRecPtrIsInvalid(lsn), xid);
                                360                 : 
                                361                 :     /*
                                362                 :      * Set the main transaction id, if any.
                                363                 :      *
                                364                 :      * If we update more than one xid on this page while it is being written
                                365                 :      * out, we might find that some of the bits go to disk and others don't.
                                366                 :      * If we are updating commits on the page with the top-level xid that
                                367                 :      * could break atomicity, so we subcommit the subxids first before we mark
 5050 bruce                     368 ECB             :      * the top-level commit.
                                369                 :      */
 5284 alvherre                  370 GIC      318992 :     if (TransactionIdIsValid(xid))
 5284 alvherre                  371 ECB             :     {
                                372                 :         /* Subtransactions first, if needed ... */
 5284 alvherre                  373 CBC      318992 :         if (status == TRANSACTION_STATUS_COMMITTED)
                                374                 :         {
                                375          316569 :             for (i = 0; i < nsubxids; i++)
 5284 alvherre                  376 ECB             :             {
 1059 tgl                       377 GIC        3824 :                 Assert(XactCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i]));
 5284 alvherre                  378            3824 :                 TransactionIdSetStatusBit(subxids[i],
                                379                 :                                           TRANSACTION_STATUS_SUB_COMMITTED,
                                380                 :                                           lsn, slotno);
                                381                 :             }
                                382                 :         }
 5284 alvherre                  383 ECB             : 
                                384                 :         /* ... then the main transaction */
 5284 alvherre                  385 GIC      318992 :         TransactionIdSetStatusBit(xid, status, lsn, slotno);
                                386                 :     }
 5284 alvherre                  387 ECB             : 
                                388                 :     /* Set the subtransactions */
 5284 alvherre                  389 CBC      323134 :     for (i = 0; i < nsubxids; i++)
 5284 alvherre                  390 ECB             :     {
 1059 tgl                       391 GIC        4142 :         Assert(XactCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i]));
 5284 alvherre                  392            4142 :         TransactionIdSetStatusBit(subxids[i], status, lsn, slotno);
 5284 alvherre                  393 ECB             :     }
                                394                 : 
 1059 tgl                       395 GIC      318992 :     XactCtl->shared->page_dirty[slotno] = true;
 2046 rhaas                     396          318992 : }
                                397                 : 
                                398                 : /*
                                399                 :  * When we cannot immediately acquire XactSLRULock in exclusive mode at
                                400                 :  * commit time, add ourselves to a list of processes that need their XIDs
                                401                 :  * status update.  The first process to add itself to the list will acquire
                                402                 :  * XactSLRULock in exclusive mode and set transaction status as required
                                403                 :  * on behalf of all group members.  This avoids a great deal of contention
                                404                 :  * around XactSLRULock when many processes are trying to commit at once,
                                405                 :  * since the lock need not be repeatedly handed off from one committing
                                406                 :  * process to the next.
                                407                 :  *
                                408                 :  * Returns true when transaction status has been updated in clog; returns
                                409                 :  * false if we decided against applying the optimization because the page
                                410                 :  * number we need to update differs from those processes already waiting.
 2046 rhaas                     411 ECB             :  */
                                412                 : static bool
 2046 rhaas                     413 GIC          54 : TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status,
 2046 rhaas                     414 ECB             :                                 XLogRecPtr lsn, int pageno)
                                415                 : {
 2046 rhaas                     416 GIC          54 :     volatile PROC_HDR *procglobal = ProcGlobal;
                                417              54 :     PGPROC     *proc = MyProc;
                                418                 :     uint32      nextidx;
                                419                 :     uint32      wakeidx;
 2046 rhaas                     420 ECB             : 
                                421                 :     /* We should definitely have an XID whose status needs to be updated. */
 2046 rhaas                     422 GIC          54 :     Assert(TransactionIdIsValid(xid));
                                423                 : 
                                424                 :     /*
                                425                 :      * Add ourselves to the list of processes needing a group XID status
 2046 rhaas                     426 ECB             :      * update.
                                427                 :      */
 2046 rhaas                     428 CBC          54 :     proc->clogGroupMember = true;
                                429              54 :     proc->clogGroupMemberXid = xid;
                                430              54 :     proc->clogGroupMemberXidStatus = status;
 2046 rhaas                     431 GIC          54 :     proc->clogGroupMemberPage = pageno;
 2046 rhaas                     432 CBC          54 :     proc->clogGroupMemberLsn = lsn;
                                433                 : 
 2046 rhaas                     434 GIC          54 :     nextidx = pg_atomic_read_u32(&procglobal->clogGroupFirst);
                                435                 : 
                                436                 :     while (true)
                                437                 :     {
                                438                 :         /*
                                439                 :          * Add the proc to list, if the clog page where we need to update the
                                440                 :          * current transaction status is same as group leader's clog page.
                                441                 :          *
                                442                 :          * There is a race condition here, which is that after doing the below
                                443                 :          * check and before adding this proc's clog update to a group, the
                                444                 :          * group leader might have already finished the group update for this
                                445                 :          * page and becomes group leader of another group. This will lead to a
                                446                 :          * situation where a single group can have different clog page
                                447                 :          * updates.  This isn't likely and will still work, just maybe a bit
 2046 rhaas                     448 ECB             :          * less efficiently.
 2046 rhaas                     449 EUB             :          */
 2046 rhaas                     450 GIC          54 :         if (nextidx != INVALID_PGPROCNO &&
 2046 rhaas                     451 UIC           0 :             ProcGlobal->allProcs[nextidx].clogGroupMemberPage != proc->clogGroupMemberPage)
                                452                 :         {
                                453                 :             /*
                                454                 :              * Ensure that this proc is not a member of any clog group that
  650 akapila                   455 EUB             :              * needs an XID status update.
                                456                 :              */
 2046 rhaas                     457 UBC           0 :             proc->clogGroupMember = false;
  650 akapila                   458 UIC           0 :             pg_atomic_write_u32(&proc->clogGroupNext, INVALID_PGPROCNO);
 2046 rhaas                     459               0 :             return false;
 2046 rhaas                     460 ECB             :         }
                                461                 : 
 2046 rhaas                     462 CBC          54 :         pg_atomic_write_u32(&proc->clogGroupNext, nextidx);
                                463                 : 
                                464              54 :         if (pg_atomic_compare_exchange_u32(&procglobal->clogGroupFirst,
 2046 rhaas                     465 ECB             :                                            &nextidx,
 2046 rhaas                     466 GIC          54 :                                            (uint32) proc->pgprocno))
                                467              54 :             break;
                                468                 :     }
                                469                 : 
                                470                 :     /*
                                471                 :      * If the list was not empty, the leader will update the status of our
                                472                 :      * XID. It is impossible to have followers without a leader because the
                                473                 :      * first process that has added itself to the list will always have
 2046 rhaas                     474 ECB             :      * nextidx as INVALID_PGPROCNO.
                                475                 :      */
 2046 rhaas                     476 GBC          54 :     if (nextidx != INVALID_PGPROCNO)
                                477                 :     {
 2046 rhaas                     478 UIC           0 :         int         extraWaits = 0;
 2046 rhaas                     479 EUB             : 
                                480                 :         /* Sleep until the leader updates our XID status. */
 1058 tgl                       481 UIC           0 :         pgstat_report_wait_start(WAIT_EVENT_XACT_GROUP_UPDATE);
                                482                 :         for (;;)
 2046 rhaas                     483 EUB             :         {
                                484                 :             /* acts as a read barrier */
 2046 rhaas                     485 UBC           0 :             PGSemaphoreLock(proc->sem);
                                486               0 :             if (!proc->clogGroupMember)
 2046 rhaas                     487 UIC           0 :                 break;
 2046 rhaas                     488 UBC           0 :             extraWaits++;
                                489                 :         }
                                490               0 :         pgstat_report_wait_end();
                                491                 : 
 2046 rhaas                     492 UIC           0 :         Assert(pg_atomic_read_u32(&proc->clogGroupNext) == INVALID_PGPROCNO);
 2046 rhaas                     493 EUB             : 
                                494                 :         /* Fix semaphore count for any absorbed wakeups */
 2046 rhaas                     495 UBC           0 :         while (extraWaits-- > 0)
 2046 rhaas                     496 UIC           0 :             PGSemaphoreUnlock(proc->sem);
                                497               0 :         return true;
                                498                 :     }
 2046 rhaas                     499 ECB             : 
                                500                 :     /* We are the leader.  Acquire the lock on behalf of everyone. */
 1059 tgl                       501 GIC          54 :     LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
                                502                 : 
                                503                 :     /*
                                504                 :      * Now that we've got the lock, clear the list of processes waiting for
                                505                 :      * group XID status update, saving a pointer to the head of the list.
 2046 rhaas                     506 ECB             :      * Trying to pop elements one at a time could lead to an ABA problem.
                                507                 :      */
 2046 rhaas                     508 GIC          54 :     nextidx = pg_atomic_exchange_u32(&procglobal->clogGroupFirst,
                                509                 :                                      INVALID_PGPROCNO);
 2046 rhaas                     510 ECB             : 
                                511                 :     /* Remember head of list so we can perform wakeups after dropping lock. */
 2046 rhaas                     512 GIC          54 :     wakeidx = nextidx;
 2046 rhaas                     513 ECB             : 
                                514                 :     /* Walk the list and update the status of all XIDs. */
 2046 rhaas                     515 CBC         108 :     while (nextidx != INVALID_PGPROCNO)
                                516                 :     {
  186 drowley                   517 GNC          54 :         PGPROC     *nextproc = &ProcGlobal->allProcs[nextidx];
                                518                 : 
                                519                 :         /*
                                520                 :          * Transactions with more than THRESHOLD_SUBTRANS_CLOG_OPT sub-XIDs
 1214 akapila                   521 ECB             :          * should not use group XID status update mechanism.
                                522                 :          */
  186 drowley                   523 GNC          54 :         Assert(nextproc->subxidStatus.count <= THRESHOLD_SUBTRANS_CLOG_OPT);
 2046 rhaas                     524 ECB             : 
  186 drowley                   525 GNC          54 :         TransactionIdSetPageStatusInternal(nextproc->clogGroupMemberXid,
                                526              54 :                                            nextproc->subxidStatus.count,
                                527              54 :                                            nextproc->subxids.xids,
                                528                 :                                            nextproc->clogGroupMemberXidStatus,
                                529                 :                                            nextproc->clogGroupMemberLsn,
                                530                 :                                            nextproc->clogGroupMemberPage);
 2046 rhaas                     531 ECB             : 
                                532                 :         /* Move to next proc in list. */
  186 drowley                   533 GNC          54 :         nextidx = pg_atomic_read_u32(&nextproc->clogGroupNext);
                                534                 :     }
 2046 rhaas                     535 ECB             : 
                                536                 :     /* We're done with the lock now. */
 1059 tgl                       537 GIC          54 :     LWLockRelease(XactSLRULock);
                                538                 : 
                                539                 :     /*
                                540                 :      * Now that we've released the lock, go back and wake everybody up.  We
                                541                 :      * don't do this under the lock so as to keep lock hold times to a
 2046 rhaas                     542 ECB             :      * minimum.
                                543                 :      */
 2046 rhaas                     544 CBC         108 :     while (wakeidx != INVALID_PGPROCNO)
                                545                 :     {
  186 drowley                   546 GNC          54 :         PGPROC     *wakeproc = &ProcGlobal->allProcs[wakeidx];
 2046 rhaas                     547 ECB             : 
  186 drowley                   548 GNC          54 :         wakeidx = pg_atomic_read_u32(&wakeproc->clogGroupNext);
                                549              54 :         pg_atomic_write_u32(&wakeproc->clogGroupNext, INVALID_PGPROCNO);
 2046 rhaas                     550 ECB             : 
                                551                 :         /* ensure all previous writes are visible before follower continues. */
 2046 rhaas                     552 CBC          54 :         pg_write_barrier();
                                553                 : 
  186 drowley                   554 GNC          54 :         wakeproc->clogGroupMember = false;
 2046 rhaas                     555 EUB             : 
  186 drowley                   556 GNC          54 :         if (wakeproc != MyProc)
  186 drowley                   557 UNC           0 :             PGSemaphoreUnlock(wakeproc->sem);
 2046 rhaas                     558 ECB             :     }
                                559                 : 
 2046 rhaas                     560 GIC          54 :     return true;
                                561                 : }
                                562                 : 
                                563                 : /*
                                564                 :  * Sets the commit status of a single transaction.
                                565                 :  *
                                566                 :  * Must be called with XactSLRULock held
 5284 alvherre                  567 ECB             :  */
                                568                 : static void
 5284 alvherre                  569 CBC      326958 : TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, int slotno)
 5284 alvherre                  570 ECB             : {
 5284 alvherre                  571 GIC      326958 :     int         byteno = TransactionIdToByte(xid);
                                572          326958 :     int         bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
                                573                 :     char       *byteptr;
                                574                 :     char        byteval;
 5270 alvherre                  575 ECB             :     char        curval;
 5284                           576                 : 
 1059 tgl                       577 GIC      326958 :     byteptr = XactCtl->shared->page_buffer[slotno] + byteno;
 5270 alvherre                  578          326958 :     curval = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
                                579                 : 
                                580                 :     /*
                                581                 :      * When replaying transactions during recovery we still need to perform
                                582                 :      * the two phases of subcommit and then commit. However, some transactions
                                583                 :      * are already correctly marked, so we just treat those as a no-op which
 5270 alvherre                  584 ECB             :      * allows us to keep the following Assert as restrictive as possible.
                                585                 :      */
 5270 alvherre                  586 GBC      326958 :     if (InRecovery && status == TRANSACTION_STATUS_SUB_COMMITTED &&
                                587                 :         curval == TRANSACTION_STATUS_COMMITTED)
 5270 alvherre                  588 UIC           0 :         return;
                                589                 : 
                                590                 :     /*
                                591                 :      * Current state change should be from 0 or subcommitted to target state
 5270 alvherre                  592 ECB             :      * or we should already be there when replaying changes during recovery.
                                593                 :      */
 5270 alvherre                  594 GIC      326958 :     Assert(curval == 0 ||
                                595                 :            (curval == TRANSACTION_STATUS_SUB_COMMITTED &&
                                596                 :             status != TRANSACTION_STATUS_IN_PROGRESS) ||
                                597                 :            curval == status);
 7897 tgl                       598 ECB             : 
 6854                           599                 :     /* note this assumes exclusive access to the clog page */
 6854 tgl                       600 CBC      326958 :     byteval = *byteptr;
                                601          326958 :     byteval &= ~(((1 << CLOG_BITS_PER_XACT) - 1) << bshift);
 6854 tgl                       602 GIC      326958 :     byteval |= (status << bshift);
                                603          326958 :     *byteptr = byteval;
                                604                 : 
                                605                 :     /*
                                606                 :      * Update the group LSN if the transaction completion LSN is higher.
                                607                 :      *
                                608                 :      * Note: lsn will be invalid when supplied during InRecovery processing,
                                609                 :      * so we don't need to do anything special to avoid LSN updates during
                                610                 :      * recovery. After recovery completes the next clog change will set the
 5730 tgl                       611 ECB             :      * LSN correctly.
                                612                 :      */
 5730 tgl                       613 CBC      326958 :     if (!XLogRecPtrIsInvalid(lsn))
                                614                 :     {
                                615           26381 :         int         lsnindex = GetLSNIndex(slotno, xid);
 5730 tgl                       616 ECB             : 
 1059 tgl                       617 GIC       26381 :         if (XactCtl->shared->group_lsn[lsnindex] < lsn)
                                618           23769 :             XactCtl->shared->group_lsn[lsnindex] = lsn;
                                619                 :     }
                                620                 : }
                                621                 : 
                                622                 : /*
                                623                 :  * Interrogate the state of a transaction in the commit log.
                                624                 :  *
                                625                 :  * Aside from the actual commit status, this function returns (into *lsn)
                                626                 :  * an LSN that is late enough to be able to guarantee that if we flush up to
                                627                 :  * that LSN then we will have flushed the transaction's commit record to disk.
                                628                 :  * The result is not necessarily the exact LSN of the transaction's commit
                                629                 :  * record!  For example, for long-past transactions (those whose clog pages
                                630                 :  * already migrated to disk), we'll return InvalidXLogRecPtr.  Also, because
                                631                 :  * we group transactions on the same clog page to conserve storage, we might
                                632                 :  * return the LSN of a later transaction that falls into the same group.
                                633                 :  *
                                634                 :  * NB: this is a low-level routine and is NOT the preferred entry point
                                635                 :  * for most uses; TransactionLogFetch() in transam.c is the intended caller.
 7897 tgl                       636 ECB             :  */
                                637                 : XidStatus
 5730 tgl                       638 CBC     1799999 : TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
 7897 tgl                       639 ECB             : {
 7897 tgl                       640 CBC     1799999 :     int         pageno = TransactionIdToPage(xid);
 7897 tgl                       641 GIC     1799999 :     int         byteno = TransactionIdToByte(xid);
                                642         1799999 :     int         bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
                                643                 :     int         slotno;
                                644                 :     int         lsnindex;
                                645                 :     char       *byteptr;
                                646                 :     XidStatus   status;
                                647                 : 
 6333 tgl                       648 ECB             :     /* lock is acquired by SimpleLruReadPage_ReadOnly */
 7897                           649                 : 
 1059 tgl                       650 GIC     1799999 :     slotno = SimpleLruReadPage_ReadOnly(XactCtl, pageno, xid);
 1059 tgl                       651 CBC     1799999 :     byteptr = XactCtl->shared->page_buffer[slotno] + byteno;
                                652                 : 
 7897                           653         1799999 :     status = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
 7897 tgl                       654 ECB             : 
 5730 tgl                       655 GIC     1799999 :     lsnindex = GetLSNIndex(slotno, xid);
 1059 tgl                       656 CBC     1799999 :     *lsn = XactCtl->shared->group_lsn[lsnindex];
                                657                 : 
                                658         1799999 :     LWLockRelease(XactSLRULock);
                                659                 : 
 7897 tgl                       660 GIC     1799999 :     return status;
                                661                 : }
                                662                 : 
                                663                 : /*
                                664                 :  * Number of shared CLOG buffers.
                                665                 :  *
                                666                 :  * On larger multi-processor systems, it is possible to have many CLOG page
                                667                 :  * requests in flight at one time which could lead to disk access for CLOG
                                668                 :  * page if the required page is not found in memory.  Testing revealed that we
                                669                 :  * can get the best performance by having 128 CLOG buffers, more than that it
                                670                 :  * doesn't improve performance.
                                671                 :  *
                                672                 :  * Unconditionally keeping the number of CLOG buffers to 128 did not seem like
                                673                 :  * a good idea, because it would increase the minimum amount of shared memory
                                674                 :  * required to start, which could be a problem for people running very small
                                675                 :  * configurations.  The following formula seems to represent a reasonable
                                676                 :  * compromise: people with very low values for shared_buffers will get fewer
                                677                 :  * CLOG buffers as well, and everyone else will get 128.
 4111 rhaas                     678 ECB             :  */
                                679                 : Size
 4111 rhaas                     680 CBC        4564 : CLOGShmemBuffers(void)
                                681                 : {
 2557 andres                    682 GIC        4564 :     return Min(128, Max(4, NBuffers / 512));
                                683                 : }
                                684                 : 
                                685                 : /*
                                686                 :  * Initialization of shared memory for CLOG
 7897 tgl                       687 ECB             :  */
                                688                 : Size
 7897 tgl                       689 CBC        2738 : CLOGShmemSize(void)
                                690                 : {
 4111 rhaas                     691 GIC        2738 :     return SimpleLruShmemSize(CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE);
                                692                 : }
 7897 tgl                       693 ECB             : 
                                694                 : void
 7897 tgl                       695 CBC        1826 : CLOGShmemInit(void)
 7897 tgl                       696 ECB             : {
 1059 tgl                       697 CBC        1826 :     XactCtl->PagePrecedes = CLOGPagePrecedes;
 1059 tgl                       698 GIC        1826 :     SimpleLruInit(XactCtl, "Xact", CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE,
  926 tmunro                    699 CBC        1826 :                   XactSLRULock, "pg_xact", LWTRANCHE_XACT_BUFFER,
  926 tmunro                    700 ECB             :                   SYNC_HANDLER_CLOG);
  813 noah                      701 GIC        1826 :     SlruPagePrecedesUnitTests(XactCtl, CLOG_XACTS_PER_PAGE);
 7897 tgl                       702            1826 : }
                                703                 : 
                                704                 : /*
                                705                 :  * This func must be called ONCE on system install.  It creates
                                706                 :  * the initial CLOG segment.  (The CLOG directory is assumed to
                                707                 :  * have been created by initdb, and CLOGShmemInit must have been
                                708                 :  * called already.)
 7897 tgl                       709 ECB             :  */
                                710                 : void
 7897 tgl                       711 GIC         305 : BootStrapCLOG(void)
                                712                 : {
 7897 tgl                       713 ECB             :     int         slotno;
                                714                 : 
 1059 tgl                       715 GIC         305 :     LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
 7897 tgl                       716 ECB             : 
                                717                 :     /* Create and zero the first page of the commit log */
 7897 tgl                       718 GIC         305 :     slotno = ZeroCLOGPage(0, false);
 7897 tgl                       719 ECB             : 
                                720                 :     /* Make sure it's written out */
 1059 tgl                       721 GIC         305 :     SimpleLruWritePage(XactCtl, slotno);
 1059 tgl                       722 CBC         305 :     Assert(!XactCtl->shared->page_dirty[slotno]);
 7897 tgl                       723 ECB             : 
 1059 tgl                       724 GIC         305 :     LWLockRelease(XactSLRULock);
 7897                           725             305 : }
                                726                 : 
                                727                 : /*
                                728                 :  * Initialize (or reinitialize) a page of CLOG to zeroes.
                                729                 :  * If writeXlog is true, also emit an XLOG record saying we did this.
                                730                 :  *
                                731                 :  * The page is not actually written, just set up in shared memory.
                                732                 :  * The slot number of the new page is returned.
                                733                 :  *
                                734                 :  * Control lock must be held at entry, and will be held at exit.
 7897 tgl                       735 ECB             :  */
                                736                 : static int
 7897 tgl                       737 GIC         608 : ZeroCLOGPage(int pageno, bool writeXlog)
                                738                 : {
 6803 tgl                       739 ECB             :     int         slotno;
                                740                 : 
 1059 tgl                       741 CBC         608 :     slotno = SimpleLruZeroPage(XactCtl, pageno);
 7897 tgl                       742 ECB             : 
 7897 tgl                       743 GIC         608 :     if (writeXlog)
 7897 tgl                       744 CBC         303 :         WriteZeroPageXlogRec(pageno);
                                745                 : 
 7897 tgl                       746 GIC         608 :     return slotno;
                                747                 : }
                                748                 : 
                                749                 : /*
                                750                 :  * This must be called ONCE during postmaster or standalone-backend startup,
                                751                 :  * after StartupXLOG has initialized ShmemVariableCache->nextXid.
 7897 tgl                       752 ECB             :  */
                                753                 : void
 7897 tgl                       754 CBC        1176 : StartupCLOG(void)
 7897 tgl                       755 ECB             : {
  971 andres                    756 GIC        1176 :     TransactionId xid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
 6796 tgl                       757 CBC        1176 :     int         pageno = TransactionIdToPage(xid);
                                758                 : 
 1059 tgl                       759 GIC        1176 :     LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
                                760                 : 
                                761                 :     /*
 7897 tgl                       762 ECB             :      * Initialize our idea of the latest page number.
                                763                 :      */
 1059 tgl                       764 CBC        1176 :     XactCtl->shared->latest_page_number = pageno;
 6796 tgl                       765 ECB             : 
 1059 tgl                       766 GIC        1176 :     LWLockRelease(XactSLRULock);
 4176 simon                     767            1176 : }
                                768                 : 
                                769                 : /*
                                770                 :  * This must be called ONCE at the end of startup/recovery.
 4176 simon                     771 ECB             :  */
                                772                 : void
 4176 simon                     773 CBC        1142 : TrimCLOG(void)
 4176 simon                     774 ECB             : {
  971 andres                    775 GIC        1142 :     TransactionId xid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
 4176 simon                     776 CBC        1142 :     int         pageno = TransactionIdToPage(xid);
                                777                 : 
 1059 tgl                       778 GIC        1142 :     LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
                                779                 : 
                                780                 :     /*
                                781                 :      * Zero out the remainder of the current clog page.  Under normal
                                782                 :      * circumstances it should be zeroes already, but it seems at least
                                783                 :      * theoretically possible that XLOG replay will have settled on a nextXID
                                784                 :      * value that is less than the last XID actually used and marked by the
                                785                 :      * previous database lifecycle (since subtransaction commit writes clog
                                786                 :      * but makes no WAL entry).  Let's just be safe. (We need not worry about
                                787                 :      * pages beyond the current one, since those will be zeroed when first
                                788                 :      * used.  For the same reason, there is no need to do anything when
                                789                 :      * nextXid is exactly at a page boundary; and it's likely that the
 6385 bruce                     790 ECB             :      * "current" page doesn't exist yet in that case.)
                                791                 :      */
 6682 tgl                       792 CBC        1142 :     if (TransactionIdToPgIndex(xid) != 0)
 6682 tgl                       793 ECB             :     {
 6682 tgl                       794 GIC        1142 :         int         byteno = TransactionIdToByte(xid);
                                795            1142 :         int         bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
                                796                 :         int         slotno;
 6682 tgl                       797 ECB             :         char       *byteptr;
                                798                 : 
 1059 tgl                       799 GIC        1142 :         slotno = SimpleLruReadPage(XactCtl, pageno, false, xid);
                                800            1142 :         byteptr = XactCtl->shared->page_buffer[slotno] + byteno;
 6682 tgl                       801 ECB             : 
                                802                 :         /* Zero so-far-unused positions in the current byte */
 6682 tgl                       803 CBC        1142 :         *byteptr &= (1 << bshift) - 1;
                                804                 :         /* Zero the rest of the page */
                                805            1142 :         MemSet(byteptr + 1, 0, BLCKSZ - byteno - 1);
                                806                 : 
 1059 tgl                       807 GIC        1142 :         XactCtl->shared->page_dirty[slotno] = true;
 6682 tgl                       808 ECB             :     }
 6796                           809                 : 
 1059 tgl                       810 GIC        1142 :     LWLockRelease(XactSLRULock);
 7897                           811            1142 : }
                                812                 : 
                                813                 : /*
                                814                 :  * Perform a checkpoint --- either during shutdown, or on-the-fly
 7897 tgl                       815 ECB             :  */
                                816                 : void
 7897 tgl                       817 GIC        2363 : CheckPointCLOG(void)
                                818                 : {
                                819                 :     /*
                                820                 :      * Write dirty CLOG pages to disk.  This may result in sync requests
                                821                 :      * queued for later handling by ProcessSyncRequests(), as part of the
                                822                 :      * checkpoint.
  926 tmunro                    823 ECB             :      */
                                824                 :     TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(true);
  926 tmunro                    825 CBC        2363 :     SimpleLruWriteAll(XactCtl, true);
                                826                 :     TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(true);
 7897 tgl                       827 GIC        2363 : }
                                828                 : 
                                829                 : 
                                830                 : /*
                                831                 :  * Make sure that CLOG has room for a newly-allocated XID.
                                832                 :  *
                                833                 :  * NB: this is called while holding XidGenLock.  We want it to be very fast
                                834                 :  * most of the time; even when it's not so fast, no actual I/O need happen
                                835                 :  * unless we're forced to write out a dirty clog or xlog page to make room
                                836                 :  * in shared memory.
 7897 tgl                       837 ECB             :  */
                                838                 : void
 7897 tgl                       839 GIC      301130 : ExtendCLOG(TransactionId newestXact)
                                840                 : {
                                841                 :     int         pageno;
                                842                 : 
                                843                 :     /*
                                844                 :      * No work except at first XID of a page.  But beware: just after
 7896 tgl                       845 ECB             :      * wraparound, the first XID of page zero is FirstNormalTransactionId.
                                846                 :      */
 7896 tgl                       847 CBC      301130 :     if (TransactionIdToPgIndex(newestXact) != 0 &&
                                848                 :         !TransactionIdEquals(newestXact, FirstNormalTransactionId))
 7897                           849          300827 :         return;
                                850                 : 
                                851             303 :     pageno = TransactionIdToPage(newestXact);
                                852                 : 
 1059 tgl                       853 GIC         303 :     LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
 7897 tgl                       854 ECB             : 
                                855                 :     /* Zero the page and make an XLOG entry about it */
 3425 heikki.linnakangas        856 CBC         303 :     ZeroCLOGPage(pageno, true);
                                857                 : 
 1059 tgl                       858 GIC         303 :     LWLockRelease(XactSLRULock);
                                859                 : }
                                860                 : 
                                861                 : 
                                862                 : /*
                                863                 :  * Remove all CLOG segments before the one holding the passed transaction ID
                                864                 :  *
                                865                 :  * Before removing any CLOG data, we must flush XLOG to disk, to ensure
                                866                 :  * that any recently-emitted FREEZE_PAGE records have reached disk; otherwise
                                867                 :  * a crash and restart might leave us with some unfrozen tuples referencing
                                868                 :  * removed CLOG data.  We choose to emit a special TRUNCATE XLOG record too.
                                869                 :  * Replaying the deletion from XLOG is not critical, since the files could
                                870                 :  * just as well be removed later, but doing so prevents a long-running hot
                                871                 :  * standby server from acquiring an unreasonably bloated CLOG directory.
                                872                 :  *
                                873                 :  * Since CLOG segments hold a large number of transactions, the opportunity to
                                874                 :  * actually remove a segment is fairly rare, and so it seems best not to do
                                875                 :  * the XLOG flush unless we have confirmed that there is a removable segment.
 7897 tgl                       876 ECB             :  */
                                877                 : void
 2208 rhaas                     878 GIC         317 : TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid)
                                879                 : {
                                880                 :     int         cutoffPage;
                                881                 : 
                                882                 :     /*
                                883                 :      * The cutoff point is the start of the segment containing oldestXact. We
 6385 bruce                     884 ECB             :      * pass the *page* containing oldestXact to SimpleLruTruncate.
                                885                 :      */
 7897 tgl                       886 GIC         317 :     cutoffPage = TransactionIdToPage(oldestXact);
 6803 tgl                       887 ECB             : 
                                888                 :     /* Check to see if there's any files that could be removed */
 1059 tgl                       889 GIC         317 :     if (!SlruScanDirectory(XactCtl, SlruScanDirCbReportPresence, &cutoffPage))
 6803                           890             317 :         return;                 /* nothing to remove */
                                891                 : 
                                892                 :     /*
                                893                 :      * Advance oldestClogXid before truncating clog, so concurrent xact status
                                894                 :      * lookups can ensure they don't attempt to access truncated-away clog.
                                895                 :      *
                                896                 :      * It's only necessary to do this if we will actually truncate away clog
 2208 rhaas                     897 EUB             :      * pages.
                                898                 :      */
 2208 rhaas                     899 UIC           0 :     AdvanceOldestClogXid(oldestXact);
                                900                 : 
                                901                 :     /*
                                902                 :      * Write XLOG record and flush XLOG to disk. We record the oldest xid
                                903                 :      * we're keeping information about here so we can ensure that it's always
                                904                 :      * ahead of clog truncation in case we crash, and so a standby finds out
 2153 bruce                     905 EUB             :      * the new valid xid before the next checkpoint.
                                906                 :      */
 2208 rhaas                     907 UIC           0 :     WriteTruncateXlogRec(cutoffPage, oldestXact, oldestxid_datoid);
 6803 tgl                       908 EUB             : 
                                909                 :     /* Now we can remove the old CLOG segment(s) */
 1059 tgl                       910 UIC           0 :     SimpleLruTruncate(XactCtl, cutoffPage);
                                911                 : }
                                912                 : 
                                913                 : 
                                914                 : /*
                                915                 :  * Decide whether a CLOG page number is "older" for truncation purposes.
                                916                 :  *
                                917                 :  * We need to use comparison of TransactionIds here in order to do the right
                                918                 :  * thing with wraparound XID arithmetic.  However, TransactionIdPrecedes()
                                919                 :  * would get weird about permanent xact IDs.  So, offset both such that xid1,
                                920                 :  * xid2, and xid2 + CLOG_XACTS_PER_PAGE - 1 are all normal XIDs; this offset
                                921                 :  * is relevant to page 0 and to the page preceding page 0.
                                922                 :  *
                                923                 :  * The page containing oldestXact-2^31 is the important edge case.  The
                                924                 :  * portion of that page equaling or following oldestXact-2^31 is expendable,
                                925                 :  * but the portion preceding oldestXact-2^31 is not.  When oldestXact-2^31 is
                                926                 :  * the first XID of a page and segment, the entire page and segment is
                                927                 :  * expendable, and we could truncate the segment.  Recognizing that case would
                                928                 :  * require making oldestXact, not just the page containing oldestXact,
                                929                 :  * available to this callback.  The benefit would be rare and small, so we
                                930                 :  * don't optimize that edge case.
 7897 tgl                       931 ECB             :  */
                                932                 : static bool
 7897 tgl                       933 GIC       71531 : CLOGPagePrecedes(int page1, int page2)
                                934                 : {
                                935                 :     TransactionId xid1;
 7897 tgl                       936 ECB             :     TransactionId xid2;
                                937                 : 
 7836 tgl                       938 CBC       71531 :     xid1 = ((TransactionId) page1) * CLOG_XACTS_PER_PAGE;
  813 noah                      939           71531 :     xid1 += FirstNormalTransactionId + 1;
 7836 tgl                       940 GIC       71531 :     xid2 = ((TransactionId) page2) * CLOG_XACTS_PER_PAGE;
  813 noah                      941 CBC       71531 :     xid2 += FirstNormalTransactionId + 1;
 7897 tgl                       942 ECB             : 
  813 noah                      943 GIC      119007 :     return (TransactionIdPrecedes(xid1, xid2) &&
                                944           47476 :             TransactionIdPrecedes(xid1, xid2 + CLOG_XACTS_PER_PAGE - 1));
                                945                 : }
                                946                 : 
                                947                 : 
                                948                 : /*
                                949                 :  * Write a ZEROPAGE xlog record
 7897 tgl                       950 ECB             :  */
                                951                 : static void
 7897 tgl                       952 CBC         303 : WriteZeroPageXlogRec(int pageno)
 7897 tgl                       953 ECB             : {
 3062 heikki.linnakangas        954 CBC         303 :     XLogBeginInsert();
                                955             303 :     XLogRegisterData((char *) (&pageno), sizeof(int));
 3062 heikki.linnakangas        956 GIC         303 :     (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE);
 7897 tgl                       957             303 : }
                                958                 : 
                                959                 : /*
                                960                 :  * Write a TRUNCATE xlog record
                                961                 :  *
                                962                 :  * We must flush the xlog record to disk before returning --- see notes
                                963                 :  * in TruncateCLOG().
 5999 tgl                       964 EUB             :  */
                                965                 : static void
 2208 rhaas                     966 UIC           0 : WriteTruncateXlogRec(int pageno, TransactionId oldestXact, Oid oldestXactDb)
                                967                 : {
                                968                 :     XLogRecPtr  recptr;
 2208 rhaas                     969 EUB             :     xl_clog_truncate xlrec;
                                970                 : 
 2208 rhaas                     971 UBC           0 :     xlrec.pageno = pageno;
 2208 rhaas                     972 UIC           0 :     xlrec.oldestXact = oldestXact;
 2208 rhaas                     973 UBC           0 :     xlrec.oldestXactDb = oldestXactDb;
 5999 tgl                       974 EUB             : 
 3062 heikki.linnakangas        975 UBC           0 :     XLogBeginInsert();
 2208 rhaas                     976               0 :     XLogRegisterData((char *) (&xlrec), sizeof(xl_clog_truncate));
 3062 heikki.linnakangas        977               0 :     recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE);
 5999 tgl                       978 UIC           0 :     XLogFlush(recptr);
                                979               0 : }
                                980                 : 
                                981                 : /*
                                982                 :  * CLOG resource manager's routines
 6803 tgl                       983 EUB             :  */
                                984                 : void
 3062 heikki.linnakangas        985 UBC           0 : clog_redo(XLogReaderState *record)
                                986                 : {
 3062 heikki.linnakangas        987 UIC           0 :     uint8       info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
 7897 tgl                       988 EUB             : 
                                989                 :     /* Backup blocks are not used in clog records */
 3062 heikki.linnakangas        990 UBC           0 :     Assert(!XLogRecHasAnyBlockRefs(record));
                                991                 : 
 6803 tgl                       992 UIC           0 :     if (info == CLOG_ZEROPAGE)
                                993                 :     {
                                994                 :         int         pageno;
 6803 tgl                       995 EUB             :         int         slotno;
                                996                 : 
 6803 tgl                       997 UBC           0 :         memcpy(&pageno, XLogRecGetData(record), sizeof(int));
                                998                 : 
 1059                           999               0 :         LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
 6803 tgl                      1000 EUB             : 
 6803 tgl                      1001 UBC           0 :         slotno = ZeroCLOGPage(pageno, false);
 1059 tgl                      1002 UIC           0 :         SimpleLruWritePage(XactCtl, slotno);
 1059 tgl                      1003 UBC           0 :         Assert(!XactCtl->shared->page_dirty[slotno]);
                               1004                 : 
                               1005               0 :         LWLockRelease(XactSLRULock);
                               1006                 :     }
 5999 tgl                      1007 UIC           0 :     else if (info == CLOG_TRUNCATE)
                               1008                 :     {
 2208 rhaas                    1009 EUB             :         xl_clog_truncate xlrec;
                               1010                 : 
 2208 rhaas                    1011 UBC           0 :         memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_clog_truncate));
                               1012                 : 
                               1013               0 :         AdvanceOldestClogXid(xlrec.oldestXact);
                               1014                 : 
 1059 tgl                      1015 UIC           0 :         SimpleLruTruncate(XactCtl, xlrec.pageno);
 5999 tgl                      1016 EUB             :     }
                               1017                 :     else
 5999 tgl                      1018 UIC           0 :         elog(PANIC, "clog_redo: unknown op code %u", info);
 6803                          1019               0 : }
                               1020                 : 
                               1021                 : /*
                               1022                 :  * Entrypoint for sync.c to sync clog files.
  926 tmunro                   1023 EUB             :  */
                               1024                 : int
  926 tmunro                   1025 UBC           0 : clogsyncfiletag(const FileTag *ftag, char *path)
                               1026                 : {
  926 tmunro                   1027 UIC           0 :     return SlruSyncFileTag(XactCtl, ftag, path);
                               1028                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a