Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * clog.c
4 : * PostgreSQL transaction-commit-log manager
5 : *
6 : * This module replaces the old "pg_log" access code, which treated pg_log
7 : * essentially like a relation, in that it went through the regular buffer
8 : * manager. The problem with that was that there wasn't any good way to
9 : * recycle storage space for transactions so old that they'll never be
10 : * looked up again. Now we use specialized access code so that the commit
11 : * log can be broken into relatively small, independent segments.
12 : *
13 : * XLOG interactions: this module generates an XLOG record whenever a new
14 : * CLOG page is initialized to zeroes. Other writes of CLOG come from
15 : * recording of transaction commit or abort in xact.c, which generates its
16 : * own XLOG records for these events and will re-perform the status update
17 : * on redo; so we need make no additional XLOG entry here. For synchronous
18 : * transaction commits, the XLOG is guaranteed flushed through the XLOG commit
19 : * record before we are called to log a commit, so the WAL rule "write xlog
20 : * before data" is satisfied automatically. However, for async commits we
21 : * must track the latest LSN affecting each CLOG page, so that we can flush
22 : * XLOG that far and satisfy the WAL rule. We don't have to worry about this
23 : * for aborts (whether sync or async), since the post-crash assumption would
24 : * be that such transactions failed anyway.
25 : *
26 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
27 : * Portions Copyright (c) 1994, Regents of the University of California
28 : *
29 : * src/backend/access/transam/clog.c
30 : *
31 : *-------------------------------------------------------------------------
32 : */
33 : #include "postgres.h"
34 :
35 : #include "access/clog.h"
36 : #include "access/slru.h"
37 : #include "access/transam.h"
38 : #include "access/xlog.h"
39 : #include "access/xloginsert.h"
40 : #include "access/xlogutils.h"
41 : #include "miscadmin.h"
42 : #include "pg_trace.h"
43 : #include "pgstat.h"
44 : #include "storage/proc.h"
45 : #include "storage/sync.h"
46 :
47 : /*
48 : * Defines for CLOG page sizes. A page is the same BLCKSZ as is used
49 : * everywhere else in Postgres.
50 : *
51 : * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF,
52 : * CLOG page numbering also wraps around at 0xFFFFFFFF/CLOG_XACTS_PER_PAGE,
53 : * and CLOG segment numbering at
54 : * 0xFFFFFFFF/CLOG_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need take no
55 : * explicit notice of that fact in this module, except when comparing segment
56 : * and page numbers in TruncateCLOG (see CLOGPagePrecedes).
57 : */
58 :
59 : /* We need two bits per xact, so four xacts fit in a byte */
60 : #define CLOG_BITS_PER_XACT 2
61 : #define CLOG_XACTS_PER_BYTE 4
62 : #define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE)
63 : #define CLOG_XACT_BITMASK ((1 << CLOG_BITS_PER_XACT) - 1)
64 :
65 : #define TransactionIdToPage(xid) ((xid) / (TransactionId) CLOG_XACTS_PER_PAGE)
66 : #define TransactionIdToPgIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_PAGE)
67 : #define TransactionIdToByte(xid) (TransactionIdToPgIndex(xid) / CLOG_XACTS_PER_BYTE)
68 : #define TransactionIdToBIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_BYTE)
69 :
70 : /* We store the latest async LSN for each group of transactions */
71 : #define CLOG_XACTS_PER_LSN_GROUP 32 /* keep this a power of 2 */
72 : #define CLOG_LSNS_PER_PAGE (CLOG_XACTS_PER_PAGE / CLOG_XACTS_PER_LSN_GROUP)
73 :
74 : #define GetLSNIndex(slotno, xid) ((slotno) * CLOG_LSNS_PER_PAGE + \
75 : ((xid) % (TransactionId) CLOG_XACTS_PER_PAGE) / CLOG_XACTS_PER_LSN_GROUP)
76 :
77 : /*
78 : * The number of subtransactions below which we consider to apply clog group
79 : * update optimization. Testing reveals that the number higher than this can
80 : * hurt performance.
81 : */
82 : #define THRESHOLD_SUBTRANS_CLOG_OPT 5
83 :
84 : /*
85 : * Link to shared-memory data structures for CLOG control
86 : */
87 : static SlruCtlData XactCtlData;
88 :
89 : #define XactCtl (&XactCtlData)
90 :
91 :
92 : static int ZeroCLOGPage(int pageno, bool writeXlog);
93 : static bool CLOGPagePrecedes(int page1, int page2);
94 : static void WriteZeroPageXlogRec(int pageno);
95 : static void WriteTruncateXlogRec(int pageno, TransactionId oldestXact,
96 : Oid oldestXactDb);
97 : static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
98 : TransactionId *subxids, XidStatus status,
99 : XLogRecPtr lsn, int pageno,
100 : bool all_xact_same_page);
101 : static void TransactionIdSetStatusBit(TransactionId xid, XidStatus status,
102 : XLogRecPtr lsn, int slotno);
103 : static void set_status_by_pages(int nsubxids, TransactionId *subxids,
104 : XidStatus status, XLogRecPtr lsn);
105 : static bool TransactionGroupUpdateXidStatus(TransactionId xid,
106 : XidStatus status, XLogRecPtr lsn, int pageno);
107 : static void TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids,
108 : TransactionId *subxids, XidStatus status,
109 : XLogRecPtr lsn, int pageno);
110 :
111 :
112 : /*
113 : * TransactionIdSetTreeStatus
114 : *
115 : * Record the final state of transaction entries in the commit log for
116 : * a transaction and its subtransaction tree. Take care to ensure this is
117 : * efficient, and as atomic as possible.
118 : *
119 : * xid is a single xid to set status for. This will typically be
120 : * the top level transactionid for a top level commit or abort. It can
121 : * also be a subtransaction when we record transaction aborts.
122 : *
123 : * subxids is an array of xids of length nsubxids, representing subtransactions
124 : * in the tree of xid. In various cases nsubxids may be zero.
125 : *
126 : * lsn must be the WAL location of the commit record when recording an async
127 : * commit. For a synchronous commit it can be InvalidXLogRecPtr, since the
128 : * caller guarantees the commit record is already flushed in that case. It
129 : * should be InvalidXLogRecPtr for abort cases, too.
130 : *
131 : * In the commit case, atomicity is limited by whether all the subxids are in
132 : * the same CLOG page as xid. If they all are, then the lock will be grabbed
133 : * only once, and the status will be set to committed directly. Otherwise
134 : * we must
135 : * 1. set sub-committed all subxids that are not on the same page as the
136 : * main xid
137 : * 2. atomically set committed the main xid and the subxids on the same page
138 : * 3. go over the first bunch again and set them committed
139 : * Note that as far as concurrent checkers are concerned, main transaction
140 : * commit as a whole is still atomic.
141 : *
142 : * Example:
143 : * TransactionId t commits and has subxids t1, t2, t3, t4
144 : * t is on page p1, t1 is also on p1, t2 and t3 are on p2, t4 is on p3
145 : * 1. update pages2-3:
146 : * page2: set t2,t3 as sub-committed
147 : * page3: set t4 as sub-committed
148 : * 2. update page1:
149 : * page1: set t,t1 as committed
150 : * 3. update pages2-3:
151 : * page2: set t2,t3 as committed
152 : * page3: set t4 as committed
153 : *
154 : * NB: this is a low-level routine and is NOT the preferred entry point
155 : * for most uses; functions in transam.c are the intended callers.
156 : *
157 : * XXX Think about issuing POSIX_FADV_WILLNEED on pages that we will need,
158 : * but aren't yet in cache, as well as hinting pages not to fall out of
159 : * cache yet.
7897 tgl 160 ECB : */
161 : void
5284 alvherre 162 GIC 318992 : TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
2118 tgl 163 ECB : TransactionId *subxids, XidStatus status, XLogRecPtr lsn)
164 : {
2118 tgl 165 GIC 318992 : int pageno = TransactionIdToPage(xid); /* get page of parent */
5050 bruce 166 ECB : int i;
167 :
5284 alvherre 168 GIC 318992 : Assert(status == TRANSACTION_STATUS_COMMITTED ||
169 : status == TRANSACTION_STATUS_ABORTED);
170 :
171 : /*
172 : * See how many subxids, if any, are on the same page as the parent, if
5050 bruce 173 ECB : * any.
174 : */
5284 alvherre 175 CBC 323134 : for (i = 0; i < nsubxids; i++)
5284 alvherre 176 EUB : {
5284 alvherre 177 GIC 4142 : if (TransactionIdToPage(subxids[i]) != pageno)
5284 alvherre 178 UIC 0 : break;
179 : }
180 :
181 : /*
5284 alvherre 182 ECB : * Do all items fit on a single page?
183 : */
5284 alvherre 184 GIC 318992 : if (i == nsubxids)
185 : {
186 : /*
5284 alvherre 187 ECB : * Set the parent and all subtransactions in a single call
188 : */
5284 alvherre 189 GIC 318992 : TransactionIdSetPageStatus(xid, nsubxids, subxids, status, lsn,
190 : pageno, true);
191 : }
5284 alvherre 192 EUB : else
193 : {
5050 bruce 194 UIC 0 : int nsubxids_on_first_page = i;
195 :
196 : /*
197 : * If this is a commit then we care about doing this correctly (i.e.
198 : * using the subcommitted intermediate status). By here, we know
199 : * we're updating more than one page of clog, so we must mark entries
200 : * that are *not* on the first page so that they show as subcommitted
201 : * before we then return to update the status to fully committed.
202 : *
203 : * To avoid touching the first page twice, skip marking subcommitted
5284 alvherre 204 EUB : * for the subxids on that first page.
205 : */
5284 alvherre 206 UBC 0 : if (status == TRANSACTION_STATUS_COMMITTED)
5284 alvherre 207 UIC 0 : set_status_by_pages(nsubxids - nsubxids_on_first_page,
208 0 : subxids + nsubxids_on_first_page,
209 : TRANSACTION_STATUS_SUB_COMMITTED, lsn);
210 :
211 : /*
212 : * Now set the parent and subtransactions on same page as the parent,
5284 alvherre 213 EUB : * if any
214 : */
5284 alvherre 215 UIC 0 : pageno = TransactionIdToPage(xid);
216 0 : TransactionIdSetPageStatus(xid, nsubxids_on_first_page, subxids, status,
217 : lsn, pageno, false);
218 :
219 : /*
220 : * Now work through the rest of the subxids one clog page at a time,
5284 alvherre 221 EUB : * starting from the second page onwards, like we did above.
222 : */
5284 alvherre 223 UIC 0 : set_status_by_pages(nsubxids - nsubxids_on_first_page,
224 0 : subxids + nsubxids_on_first_page,
5284 alvherre 225 ECB : status, lsn);
226 : }
5284 alvherre 227 GIC 318992 : }
228 :
229 : /*
230 : * Helper for TransactionIdSetTreeStatus: set the status for a bunch of
231 : * transactions, chunking in the separate CLOG pages involved. We never
232 : * pass the whole transaction tree to this function, only subtransactions
233 : * that are on different pages to the top level transaction id.
5284 alvherre 234 EUB : */
235 : static void
5284 alvherre 236 UIC 0 : set_status_by_pages(int nsubxids, TransactionId *subxids,
5284 alvherre 237 EUB : XidStatus status, XLogRecPtr lsn)
238 : {
5050 bruce 239 UBC 0 : int pageno = TransactionIdToPage(subxids[0]);
5050 bruce 240 UIC 0 : int offset = 0;
5050 bruce 241 UBC 0 : int i = 0;
242 :
2011 tgl 243 0 : Assert(nsubxids > 0); /* else the pageno fetch above is unsafe */
244 :
5284 alvherre 245 0 : while (i < nsubxids)
246 : {
5050 bruce 247 UIC 0 : int num_on_page = 0;
248 : int nextpageno;
249 :
2011 tgl 250 EUB : do
5284 alvherre 251 : {
2011 tgl 252 UBC 0 : nextpageno = TransactionIdToPage(subxids[i]);
253 0 : if (nextpageno != pageno)
254 0 : break;
5284 alvherre 255 0 : num_on_page++;
5284 alvherre 256 UIC 0 : i++;
2011 tgl 257 UBC 0 : } while (i < nsubxids);
5284 alvherre 258 EUB :
5284 alvherre 259 UIC 0 : TransactionIdSetPageStatus(InvalidTransactionId,
5284 alvherre 260 UBC 0 : num_on_page, subxids + offset,
2046 rhaas 261 EUB : status, lsn, pageno, false);
5284 alvherre 262 UIC 0 : offset = i;
2011 tgl 263 UBC 0 : pageno = nextpageno;
264 : }
5284 alvherre 265 UIC 0 : }
266 :
267 : /*
268 : * Record the final state of transaction entries in the commit log for all
269 : * entries on a single page. Atomic only on this page.
5284 alvherre 270 ECB : */
271 : static void
5284 alvherre 272 GIC 318992 : TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
273 : TransactionId *subxids, XidStatus status,
274 : XLogRecPtr lsn, int pageno,
275 : bool all_xact_same_page)
276 : {
277 : /* Can't use group update when PGPROC overflows. */
278 : StaticAssertDecl(THRESHOLD_SUBTRANS_CLOG_OPT <= PGPROC_MAX_CACHED_SUBXIDS,
279 : "group clog threshold less than PGPROC cached subxids");
280 :
281 : /*
282 : * When there is contention on XactSLRULock, we try to group multiple
283 : * updates; a single leader process will perform transaction status
284 : * updates for multiple backends so that the number of times XactSLRULock
285 : * needs to be acquired is reduced.
286 : *
287 : * For this optimization to be safe, the XID and subxids in MyProc must be
288 : * the same as the ones for which we're setting the status. Check that
289 : * this is the case.
290 : *
291 : * For this optimization to be efficient, we shouldn't have too many
292 : * sub-XIDs and all of the XIDs for which we're adjusting clog should be
2046 rhaas 293 ECB : * on the same page. Check those conditions, too.
294 : */
968 andres 295 CBC 318992 : if (all_xact_same_page && xid == MyProc->xid &&
2046 rhaas 296 297658 : nsubxids <= THRESHOLD_SUBTRANS_CLOG_OPT &&
968 andres 297 297658 : nsubxids == MyProc->subxidStatus.count &&
402 tgl 298 GIC 457 : (nsubxids == 0 ||
299 457 : memcmp(subxids, MyProc->subxids.xids,
300 : nsubxids * sizeof(TransactionId)) == 0))
301 : {
302 : /*
303 : * If we can immediately acquire XactSLRULock, we update the status of
304 : * our own XID and release the lock. If not, try use group XID
305 : * update. If that doesn't work out, fall back to waiting for the
2046 rhaas 306 ECB : * lock to perform an update for this transaction only.
307 : */
1059 tgl 308 GIC 297658 : if (LWLockConditionalAcquire(XactSLRULock, LW_EXCLUSIVE))
2046 rhaas 309 ECB : {
310 : /* Got the lock without waiting! Do the update. */
2046 rhaas 311 CBC 297604 : TransactionIdSetPageStatusInternal(xid, nsubxids, subxids, status,
2046 rhaas 312 ECB : lsn, pageno);
1059 tgl 313 GIC 297604 : LWLockRelease(XactSLRULock);
2046 rhaas 314 CBC 297604 : return;
315 : }
2046 rhaas 316 GIC 54 : else if (TransactionGroupUpdateXidStatus(xid, status, lsn, pageno))
2046 rhaas 317 ECB : {
318 : /* Group update mechanism has done the work. */
2046 rhaas 319 GIC 54 : return;
320 : }
321 :
322 : /* Fall through only if update isn't done yet. */
323 : }
2046 rhaas 324 ECB :
325 : /* Group update not applicable, or couldn't accept this page number. */
1059 tgl 326 GIC 21334 : LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
2046 rhaas 327 CBC 21334 : TransactionIdSetPageStatusInternal(xid, nsubxids, subxids, status,
328 : lsn, pageno);
1059 tgl 329 GIC 21334 : LWLockRelease(XactSLRULock);
330 : }
331 :
332 : /*
333 : * Record the final state of transaction entry in the commit log
334 : *
335 : * We don't do any locking here; caller must handle that.
2046 rhaas 336 ECB : */
337 : static void
2046 rhaas 338 GIC 318992 : TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids,
339 : TransactionId *subxids, XidStatus status,
340 : XLogRecPtr lsn, int pageno)
341 : {
342 : int slotno;
5050 bruce 343 ECB : int i;
344 :
7897 tgl 345 GIC 318992 : Assert(status == TRANSACTION_STATUS_COMMITTED ||
6856 tgl 346 ECB : status == TRANSACTION_STATUS_ABORTED ||
347 : (status == TRANSACTION_STATUS_SUB_COMMITTED && !TransactionIdIsValid(xid)));
1059 tgl 348 GIC 318992 : Assert(LWLockHeldByMeInMode(XactSLRULock, LW_EXCLUSIVE));
349 :
350 : /*
351 : * If we're doing an async commit (ie, lsn is valid), then we must wait
352 : * for any active write on the page slot to complete. Otherwise our
353 : * update could reach disk in that write, which will not do since we
354 : * mustn't let it reach disk until we've done the appropriate WAL flush.
355 : * But when lsn is invalid, it's OK to scribble on a page while it is
356 : * write-busy, since we don't care if the update reaches disk sooner than
5284 alvherre 357 ECB : * we think.
358 : */
1059 tgl 359 GIC 318992 : slotno = SimpleLruReadPage(XactCtl, pageno, XLogRecPtrIsInvalid(lsn), xid);
360 :
361 : /*
362 : * Set the main transaction id, if any.
363 : *
364 : * If we update more than one xid on this page while it is being written
365 : * out, we might find that some of the bits go to disk and others don't.
366 : * If we are updating commits on the page with the top-level xid that
367 : * could break atomicity, so we subcommit the subxids first before we mark
5050 bruce 368 ECB : * the top-level commit.
369 : */
5284 alvherre 370 GIC 318992 : if (TransactionIdIsValid(xid))
5284 alvherre 371 ECB : {
372 : /* Subtransactions first, if needed ... */
5284 alvherre 373 CBC 318992 : if (status == TRANSACTION_STATUS_COMMITTED)
374 : {
375 316569 : for (i = 0; i < nsubxids; i++)
5284 alvherre 376 ECB : {
1059 tgl 377 GIC 3824 : Assert(XactCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i]));
5284 alvherre 378 3824 : TransactionIdSetStatusBit(subxids[i],
379 : TRANSACTION_STATUS_SUB_COMMITTED,
380 : lsn, slotno);
381 : }
382 : }
5284 alvherre 383 ECB :
384 : /* ... then the main transaction */
5284 alvherre 385 GIC 318992 : TransactionIdSetStatusBit(xid, status, lsn, slotno);
386 : }
5284 alvherre 387 ECB :
388 : /* Set the subtransactions */
5284 alvherre 389 CBC 323134 : for (i = 0; i < nsubxids; i++)
5284 alvherre 390 ECB : {
1059 tgl 391 GIC 4142 : Assert(XactCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i]));
5284 alvherre 392 4142 : TransactionIdSetStatusBit(subxids[i], status, lsn, slotno);
5284 alvherre 393 ECB : }
394 :
1059 tgl 395 GIC 318992 : XactCtl->shared->page_dirty[slotno] = true;
2046 rhaas 396 318992 : }
397 :
398 : /*
399 : * When we cannot immediately acquire XactSLRULock in exclusive mode at
400 : * commit time, add ourselves to a list of processes that need their XIDs
401 : * status update. The first process to add itself to the list will acquire
402 : * XactSLRULock in exclusive mode and set transaction status as required
403 : * on behalf of all group members. This avoids a great deal of contention
404 : * around XactSLRULock when many processes are trying to commit at once,
405 : * since the lock need not be repeatedly handed off from one committing
406 : * process to the next.
407 : *
408 : * Returns true when transaction status has been updated in clog; returns
409 : * false if we decided against applying the optimization because the page
410 : * number we need to update differs from those processes already waiting.
2046 rhaas 411 ECB : */
412 : static bool
2046 rhaas 413 GIC 54 : TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status,
2046 rhaas 414 ECB : XLogRecPtr lsn, int pageno)
415 : {
2046 rhaas 416 GIC 54 : volatile PROC_HDR *procglobal = ProcGlobal;
417 54 : PGPROC *proc = MyProc;
418 : uint32 nextidx;
419 : uint32 wakeidx;
2046 rhaas 420 ECB :
421 : /* We should definitely have an XID whose status needs to be updated. */
2046 rhaas 422 GIC 54 : Assert(TransactionIdIsValid(xid));
423 :
424 : /*
425 : * Add ourselves to the list of processes needing a group XID status
2046 rhaas 426 ECB : * update.
427 : */
2046 rhaas 428 CBC 54 : proc->clogGroupMember = true;
429 54 : proc->clogGroupMemberXid = xid;
430 54 : proc->clogGroupMemberXidStatus = status;
2046 rhaas 431 GIC 54 : proc->clogGroupMemberPage = pageno;
2046 rhaas 432 CBC 54 : proc->clogGroupMemberLsn = lsn;
433 :
2046 rhaas 434 GIC 54 : nextidx = pg_atomic_read_u32(&procglobal->clogGroupFirst);
435 :
436 : while (true)
437 : {
438 : /*
439 : * Add the proc to list, if the clog page where we need to update the
440 : * current transaction status is same as group leader's clog page.
441 : *
442 : * There is a race condition here, which is that after doing the below
443 : * check and before adding this proc's clog update to a group, the
444 : * group leader might have already finished the group update for this
445 : * page and becomes group leader of another group. This will lead to a
446 : * situation where a single group can have different clog page
447 : * updates. This isn't likely and will still work, just maybe a bit
2046 rhaas 448 ECB : * less efficiently.
2046 rhaas 449 EUB : */
2046 rhaas 450 GIC 54 : if (nextidx != INVALID_PGPROCNO &&
2046 rhaas 451 UIC 0 : ProcGlobal->allProcs[nextidx].clogGroupMemberPage != proc->clogGroupMemberPage)
452 : {
453 : /*
454 : * Ensure that this proc is not a member of any clog group that
650 akapila 455 EUB : * needs an XID status update.
456 : */
2046 rhaas 457 UBC 0 : proc->clogGroupMember = false;
650 akapila 458 UIC 0 : pg_atomic_write_u32(&proc->clogGroupNext, INVALID_PGPROCNO);
2046 rhaas 459 0 : return false;
2046 rhaas 460 ECB : }
461 :
2046 rhaas 462 CBC 54 : pg_atomic_write_u32(&proc->clogGroupNext, nextidx);
463 :
464 54 : if (pg_atomic_compare_exchange_u32(&procglobal->clogGroupFirst,
2046 rhaas 465 ECB : &nextidx,
2046 rhaas 466 GIC 54 : (uint32) proc->pgprocno))
467 54 : break;
468 : }
469 :
470 : /*
471 : * If the list was not empty, the leader will update the status of our
472 : * XID. It is impossible to have followers without a leader because the
473 : * first process that has added itself to the list will always have
2046 rhaas 474 ECB : * nextidx as INVALID_PGPROCNO.
475 : */
2046 rhaas 476 GBC 54 : if (nextidx != INVALID_PGPROCNO)
477 : {
2046 rhaas 478 UIC 0 : int extraWaits = 0;
2046 rhaas 479 EUB :
480 : /* Sleep until the leader updates our XID status. */
1058 tgl 481 UIC 0 : pgstat_report_wait_start(WAIT_EVENT_XACT_GROUP_UPDATE);
482 : for (;;)
2046 rhaas 483 EUB : {
484 : /* acts as a read barrier */
2046 rhaas 485 UBC 0 : PGSemaphoreLock(proc->sem);
486 0 : if (!proc->clogGroupMember)
2046 rhaas 487 UIC 0 : break;
2046 rhaas 488 UBC 0 : extraWaits++;
489 : }
490 0 : pgstat_report_wait_end();
491 :
2046 rhaas 492 UIC 0 : Assert(pg_atomic_read_u32(&proc->clogGroupNext) == INVALID_PGPROCNO);
2046 rhaas 493 EUB :
494 : /* Fix semaphore count for any absorbed wakeups */
2046 rhaas 495 UBC 0 : while (extraWaits-- > 0)
2046 rhaas 496 UIC 0 : PGSemaphoreUnlock(proc->sem);
497 0 : return true;
498 : }
2046 rhaas 499 ECB :
500 : /* We are the leader. Acquire the lock on behalf of everyone. */
1059 tgl 501 GIC 54 : LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
502 :
503 : /*
504 : * Now that we've got the lock, clear the list of processes waiting for
505 : * group XID status update, saving a pointer to the head of the list.
2046 rhaas 506 ECB : * Trying to pop elements one at a time could lead to an ABA problem.
507 : */
2046 rhaas 508 GIC 54 : nextidx = pg_atomic_exchange_u32(&procglobal->clogGroupFirst,
509 : INVALID_PGPROCNO);
2046 rhaas 510 ECB :
511 : /* Remember head of list so we can perform wakeups after dropping lock. */
2046 rhaas 512 GIC 54 : wakeidx = nextidx;
2046 rhaas 513 ECB :
514 : /* Walk the list and update the status of all XIDs. */
2046 rhaas 515 CBC 108 : while (nextidx != INVALID_PGPROCNO)
516 : {
186 drowley 517 GNC 54 : PGPROC *nextproc = &ProcGlobal->allProcs[nextidx];
518 :
519 : /*
520 : * Transactions with more than THRESHOLD_SUBTRANS_CLOG_OPT sub-XIDs
1214 akapila 521 ECB : * should not use group XID status update mechanism.
522 : */
186 drowley 523 GNC 54 : Assert(nextproc->subxidStatus.count <= THRESHOLD_SUBTRANS_CLOG_OPT);
2046 rhaas 524 ECB :
186 drowley 525 GNC 54 : TransactionIdSetPageStatusInternal(nextproc->clogGroupMemberXid,
526 54 : nextproc->subxidStatus.count,
527 54 : nextproc->subxids.xids,
528 : nextproc->clogGroupMemberXidStatus,
529 : nextproc->clogGroupMemberLsn,
530 : nextproc->clogGroupMemberPage);
2046 rhaas 531 ECB :
532 : /* Move to next proc in list. */
186 drowley 533 GNC 54 : nextidx = pg_atomic_read_u32(&nextproc->clogGroupNext);
534 : }
2046 rhaas 535 ECB :
536 : /* We're done with the lock now. */
1059 tgl 537 GIC 54 : LWLockRelease(XactSLRULock);
538 :
539 : /*
540 : * Now that we've released the lock, go back and wake everybody up. We
541 : * don't do this under the lock so as to keep lock hold times to a
2046 rhaas 542 ECB : * minimum.
543 : */
2046 rhaas 544 CBC 108 : while (wakeidx != INVALID_PGPROCNO)
545 : {
186 drowley 546 GNC 54 : PGPROC *wakeproc = &ProcGlobal->allProcs[wakeidx];
2046 rhaas 547 ECB :
186 drowley 548 GNC 54 : wakeidx = pg_atomic_read_u32(&wakeproc->clogGroupNext);
549 54 : pg_atomic_write_u32(&wakeproc->clogGroupNext, INVALID_PGPROCNO);
2046 rhaas 550 ECB :
551 : /* ensure all previous writes are visible before follower continues. */
2046 rhaas 552 CBC 54 : pg_write_barrier();
553 :
186 drowley 554 GNC 54 : wakeproc->clogGroupMember = false;
2046 rhaas 555 EUB :
186 drowley 556 GNC 54 : if (wakeproc != MyProc)
186 drowley 557 UNC 0 : PGSemaphoreUnlock(wakeproc->sem);
2046 rhaas 558 ECB : }
559 :
2046 rhaas 560 GIC 54 : return true;
561 : }
562 :
563 : /*
564 : * Sets the commit status of a single transaction.
565 : *
566 : * Must be called with XactSLRULock held
5284 alvherre 567 ECB : */
568 : static void
5284 alvherre 569 CBC 326958 : TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, int slotno)
5284 alvherre 570 ECB : {
5284 alvherre 571 GIC 326958 : int byteno = TransactionIdToByte(xid);
572 326958 : int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
573 : char *byteptr;
574 : char byteval;
5270 alvherre 575 ECB : char curval;
5284 576 :
1059 tgl 577 GIC 326958 : byteptr = XactCtl->shared->page_buffer[slotno] + byteno;
5270 alvherre 578 326958 : curval = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
579 :
580 : /*
581 : * When replaying transactions during recovery we still need to perform
582 : * the two phases of subcommit and then commit. However, some transactions
583 : * are already correctly marked, so we just treat those as a no-op which
5270 alvherre 584 ECB : * allows us to keep the following Assert as restrictive as possible.
585 : */
5270 alvherre 586 GBC 326958 : if (InRecovery && status == TRANSACTION_STATUS_SUB_COMMITTED &&
587 : curval == TRANSACTION_STATUS_COMMITTED)
5270 alvherre 588 UIC 0 : return;
589 :
590 : /*
591 : * Current state change should be from 0 or subcommitted to target state
5270 alvherre 592 ECB : * or we should already be there when replaying changes during recovery.
593 : */
5270 alvherre 594 GIC 326958 : Assert(curval == 0 ||
595 : (curval == TRANSACTION_STATUS_SUB_COMMITTED &&
596 : status != TRANSACTION_STATUS_IN_PROGRESS) ||
597 : curval == status);
7897 tgl 598 ECB :
6854 599 : /* note this assumes exclusive access to the clog page */
6854 tgl 600 CBC 326958 : byteval = *byteptr;
601 326958 : byteval &= ~(((1 << CLOG_BITS_PER_XACT) - 1) << bshift);
6854 tgl 602 GIC 326958 : byteval |= (status << bshift);
603 326958 : *byteptr = byteval;
604 :
605 : /*
606 : * Update the group LSN if the transaction completion LSN is higher.
607 : *
608 : * Note: lsn will be invalid when supplied during InRecovery processing,
609 : * so we don't need to do anything special to avoid LSN updates during
610 : * recovery. After recovery completes the next clog change will set the
5730 tgl 611 ECB : * LSN correctly.
612 : */
5730 tgl 613 CBC 326958 : if (!XLogRecPtrIsInvalid(lsn))
614 : {
615 26381 : int lsnindex = GetLSNIndex(slotno, xid);
5730 tgl 616 ECB :
1059 tgl 617 GIC 26381 : if (XactCtl->shared->group_lsn[lsnindex] < lsn)
618 23769 : XactCtl->shared->group_lsn[lsnindex] = lsn;
619 : }
620 : }
621 :
622 : /*
623 : * Interrogate the state of a transaction in the commit log.
624 : *
625 : * Aside from the actual commit status, this function returns (into *lsn)
626 : * an LSN that is late enough to be able to guarantee that if we flush up to
627 : * that LSN then we will have flushed the transaction's commit record to disk.
628 : * The result is not necessarily the exact LSN of the transaction's commit
629 : * record! For example, for long-past transactions (those whose clog pages
630 : * already migrated to disk), we'll return InvalidXLogRecPtr. Also, because
631 : * we group transactions on the same clog page to conserve storage, we might
632 : * return the LSN of a later transaction that falls into the same group.
633 : *
634 : * NB: this is a low-level routine and is NOT the preferred entry point
635 : * for most uses; TransactionLogFetch() in transam.c is the intended caller.
7897 tgl 636 ECB : */
637 : XidStatus
5730 tgl 638 CBC 1799999 : TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
7897 tgl 639 ECB : {
7897 tgl 640 CBC 1799999 : int pageno = TransactionIdToPage(xid);
7897 tgl 641 GIC 1799999 : int byteno = TransactionIdToByte(xid);
642 1799999 : int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
643 : int slotno;
644 : int lsnindex;
645 : char *byteptr;
646 : XidStatus status;
647 :
6333 tgl 648 ECB : /* lock is acquired by SimpleLruReadPage_ReadOnly */
7897 649 :
1059 tgl 650 GIC 1799999 : slotno = SimpleLruReadPage_ReadOnly(XactCtl, pageno, xid);
1059 tgl 651 CBC 1799999 : byteptr = XactCtl->shared->page_buffer[slotno] + byteno;
652 :
7897 653 1799999 : status = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
7897 tgl 654 ECB :
5730 tgl 655 GIC 1799999 : lsnindex = GetLSNIndex(slotno, xid);
1059 tgl 656 CBC 1799999 : *lsn = XactCtl->shared->group_lsn[lsnindex];
657 :
658 1799999 : LWLockRelease(XactSLRULock);
659 :
7897 tgl 660 GIC 1799999 : return status;
661 : }
662 :
663 : /*
664 : * Number of shared CLOG buffers.
665 : *
666 : * On larger multi-processor systems, it is possible to have many CLOG page
667 : * requests in flight at one time which could lead to disk access for CLOG
668 : * page if the required page is not found in memory. Testing revealed that we
669 : * can get the best performance by having 128 CLOG buffers, more than that it
670 : * doesn't improve performance.
671 : *
672 : * Unconditionally keeping the number of CLOG buffers to 128 did not seem like
673 : * a good idea, because it would increase the minimum amount of shared memory
674 : * required to start, which could be a problem for people running very small
675 : * configurations. The following formula seems to represent a reasonable
676 : * compromise: people with very low values for shared_buffers will get fewer
677 : * CLOG buffers as well, and everyone else will get 128.
4111 rhaas 678 ECB : */
679 : Size
4111 rhaas 680 CBC 4564 : CLOGShmemBuffers(void)
681 : {
2557 andres 682 GIC 4564 : return Min(128, Max(4, NBuffers / 512));
683 : }
684 :
685 : /*
686 : * Initialization of shared memory for CLOG
7897 tgl 687 ECB : */
688 : Size
7897 tgl 689 CBC 2738 : CLOGShmemSize(void)
690 : {
4111 rhaas 691 GIC 2738 : return SimpleLruShmemSize(CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE);
692 : }
7897 tgl 693 ECB :
694 : void
7897 tgl 695 CBC 1826 : CLOGShmemInit(void)
7897 tgl 696 ECB : {
1059 tgl 697 CBC 1826 : XactCtl->PagePrecedes = CLOGPagePrecedes;
1059 tgl 698 GIC 1826 : SimpleLruInit(XactCtl, "Xact", CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE,
926 tmunro 699 CBC 1826 : XactSLRULock, "pg_xact", LWTRANCHE_XACT_BUFFER,
926 tmunro 700 ECB : SYNC_HANDLER_CLOG);
813 noah 701 GIC 1826 : SlruPagePrecedesUnitTests(XactCtl, CLOG_XACTS_PER_PAGE);
7897 tgl 702 1826 : }
703 :
704 : /*
705 : * This func must be called ONCE on system install. It creates
706 : * the initial CLOG segment. (The CLOG directory is assumed to
707 : * have been created by initdb, and CLOGShmemInit must have been
708 : * called already.)
7897 tgl 709 ECB : */
710 : void
7897 tgl 711 GIC 305 : BootStrapCLOG(void)
712 : {
7897 tgl 713 ECB : int slotno;
714 :
1059 tgl 715 GIC 305 : LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
7897 tgl 716 ECB :
717 : /* Create and zero the first page of the commit log */
7897 tgl 718 GIC 305 : slotno = ZeroCLOGPage(0, false);
7897 tgl 719 ECB :
720 : /* Make sure it's written out */
1059 tgl 721 GIC 305 : SimpleLruWritePage(XactCtl, slotno);
1059 tgl 722 CBC 305 : Assert(!XactCtl->shared->page_dirty[slotno]);
7897 tgl 723 ECB :
1059 tgl 724 GIC 305 : LWLockRelease(XactSLRULock);
7897 725 305 : }
726 :
727 : /*
728 : * Initialize (or reinitialize) a page of CLOG to zeroes.
729 : * If writeXlog is true, also emit an XLOG record saying we did this.
730 : *
731 : * The page is not actually written, just set up in shared memory.
732 : * The slot number of the new page is returned.
733 : *
734 : * Control lock must be held at entry, and will be held at exit.
7897 tgl 735 ECB : */
736 : static int
7897 tgl 737 GIC 608 : ZeroCLOGPage(int pageno, bool writeXlog)
738 : {
6803 tgl 739 ECB : int slotno;
740 :
1059 tgl 741 CBC 608 : slotno = SimpleLruZeroPage(XactCtl, pageno);
7897 tgl 742 ECB :
7897 tgl 743 GIC 608 : if (writeXlog)
7897 tgl 744 CBC 303 : WriteZeroPageXlogRec(pageno);
745 :
7897 tgl 746 GIC 608 : return slotno;
747 : }
748 :
749 : /*
750 : * This must be called ONCE during postmaster or standalone-backend startup,
751 : * after StartupXLOG has initialized ShmemVariableCache->nextXid.
7897 tgl 752 ECB : */
753 : void
7897 tgl 754 CBC 1176 : StartupCLOG(void)
7897 tgl 755 ECB : {
971 andres 756 GIC 1176 : TransactionId xid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
6796 tgl 757 CBC 1176 : int pageno = TransactionIdToPage(xid);
758 :
1059 tgl 759 GIC 1176 : LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
760 :
761 : /*
7897 tgl 762 ECB : * Initialize our idea of the latest page number.
763 : */
1059 tgl 764 CBC 1176 : XactCtl->shared->latest_page_number = pageno;
6796 tgl 765 ECB :
1059 tgl 766 GIC 1176 : LWLockRelease(XactSLRULock);
4176 simon 767 1176 : }
768 :
769 : /*
770 : * This must be called ONCE at the end of startup/recovery.
4176 simon 771 ECB : */
772 : void
4176 simon 773 CBC 1142 : TrimCLOG(void)
4176 simon 774 ECB : {
971 andres 775 GIC 1142 : TransactionId xid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
4176 simon 776 CBC 1142 : int pageno = TransactionIdToPage(xid);
777 :
1059 tgl 778 GIC 1142 : LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
779 :
780 : /*
781 : * Zero out the remainder of the current clog page. Under normal
782 : * circumstances it should be zeroes already, but it seems at least
783 : * theoretically possible that XLOG replay will have settled on a nextXID
784 : * value that is less than the last XID actually used and marked by the
785 : * previous database lifecycle (since subtransaction commit writes clog
786 : * but makes no WAL entry). Let's just be safe. (We need not worry about
787 : * pages beyond the current one, since those will be zeroed when first
788 : * used. For the same reason, there is no need to do anything when
789 : * nextXid is exactly at a page boundary; and it's likely that the
6385 bruce 790 ECB : * "current" page doesn't exist yet in that case.)
791 : */
6682 tgl 792 CBC 1142 : if (TransactionIdToPgIndex(xid) != 0)
6682 tgl 793 ECB : {
6682 tgl 794 GIC 1142 : int byteno = TransactionIdToByte(xid);
795 1142 : int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
796 : int slotno;
6682 tgl 797 ECB : char *byteptr;
798 :
1059 tgl 799 GIC 1142 : slotno = SimpleLruReadPage(XactCtl, pageno, false, xid);
800 1142 : byteptr = XactCtl->shared->page_buffer[slotno] + byteno;
6682 tgl 801 ECB :
802 : /* Zero so-far-unused positions in the current byte */
6682 tgl 803 CBC 1142 : *byteptr &= (1 << bshift) - 1;
804 : /* Zero the rest of the page */
805 1142 : MemSet(byteptr + 1, 0, BLCKSZ - byteno - 1);
806 :
1059 tgl 807 GIC 1142 : XactCtl->shared->page_dirty[slotno] = true;
6682 tgl 808 ECB : }
6796 809 :
1059 tgl 810 GIC 1142 : LWLockRelease(XactSLRULock);
7897 811 1142 : }
812 :
813 : /*
814 : * Perform a checkpoint --- either during shutdown, or on-the-fly
7897 tgl 815 ECB : */
816 : void
7897 tgl 817 GIC 2363 : CheckPointCLOG(void)
818 : {
819 : /*
820 : * Write dirty CLOG pages to disk. This may result in sync requests
821 : * queued for later handling by ProcessSyncRequests(), as part of the
822 : * checkpoint.
926 tmunro 823 ECB : */
824 : TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(true);
926 tmunro 825 CBC 2363 : SimpleLruWriteAll(XactCtl, true);
826 : TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(true);
7897 tgl 827 GIC 2363 : }
828 :
829 :
830 : /*
831 : * Make sure that CLOG has room for a newly-allocated XID.
832 : *
833 : * NB: this is called while holding XidGenLock. We want it to be very fast
834 : * most of the time; even when it's not so fast, no actual I/O need happen
835 : * unless we're forced to write out a dirty clog or xlog page to make room
836 : * in shared memory.
7897 tgl 837 ECB : */
838 : void
7897 tgl 839 GIC 301130 : ExtendCLOG(TransactionId newestXact)
840 : {
841 : int pageno;
842 :
843 : /*
844 : * No work except at first XID of a page. But beware: just after
7896 tgl 845 ECB : * wraparound, the first XID of page zero is FirstNormalTransactionId.
846 : */
7896 tgl 847 CBC 301130 : if (TransactionIdToPgIndex(newestXact) != 0 &&
848 : !TransactionIdEquals(newestXact, FirstNormalTransactionId))
7897 849 300827 : return;
850 :
851 303 : pageno = TransactionIdToPage(newestXact);
852 :
1059 tgl 853 GIC 303 : LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
7897 tgl 854 ECB :
855 : /* Zero the page and make an XLOG entry about it */
3425 heikki.linnakangas 856 CBC 303 : ZeroCLOGPage(pageno, true);
857 :
1059 tgl 858 GIC 303 : LWLockRelease(XactSLRULock);
859 : }
860 :
861 :
862 : /*
863 : * Remove all CLOG segments before the one holding the passed transaction ID
864 : *
865 : * Before removing any CLOG data, we must flush XLOG to disk, to ensure
866 : * that any recently-emitted FREEZE_PAGE records have reached disk; otherwise
867 : * a crash and restart might leave us with some unfrozen tuples referencing
868 : * removed CLOG data. We choose to emit a special TRUNCATE XLOG record too.
869 : * Replaying the deletion from XLOG is not critical, since the files could
870 : * just as well be removed later, but doing so prevents a long-running hot
871 : * standby server from acquiring an unreasonably bloated CLOG directory.
872 : *
873 : * Since CLOG segments hold a large number of transactions, the opportunity to
874 : * actually remove a segment is fairly rare, and so it seems best not to do
875 : * the XLOG flush unless we have confirmed that there is a removable segment.
7897 tgl 876 ECB : */
877 : void
2208 rhaas 878 GIC 317 : TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid)
879 : {
880 : int cutoffPage;
881 :
882 : /*
883 : * The cutoff point is the start of the segment containing oldestXact. We
6385 bruce 884 ECB : * pass the *page* containing oldestXact to SimpleLruTruncate.
885 : */
7897 tgl 886 GIC 317 : cutoffPage = TransactionIdToPage(oldestXact);
6803 tgl 887 ECB :
888 : /* Check to see if there's any files that could be removed */
1059 tgl 889 GIC 317 : if (!SlruScanDirectory(XactCtl, SlruScanDirCbReportPresence, &cutoffPage))
6803 890 317 : return; /* nothing to remove */
891 :
892 : /*
893 : * Advance oldestClogXid before truncating clog, so concurrent xact status
894 : * lookups can ensure they don't attempt to access truncated-away clog.
895 : *
896 : * It's only necessary to do this if we will actually truncate away clog
2208 rhaas 897 EUB : * pages.
898 : */
2208 rhaas 899 UIC 0 : AdvanceOldestClogXid(oldestXact);
900 :
901 : /*
902 : * Write XLOG record and flush XLOG to disk. We record the oldest xid
903 : * we're keeping information about here so we can ensure that it's always
904 : * ahead of clog truncation in case we crash, and so a standby finds out
2153 bruce 905 EUB : * the new valid xid before the next checkpoint.
906 : */
2208 rhaas 907 UIC 0 : WriteTruncateXlogRec(cutoffPage, oldestXact, oldestxid_datoid);
6803 tgl 908 EUB :
909 : /* Now we can remove the old CLOG segment(s) */
1059 tgl 910 UIC 0 : SimpleLruTruncate(XactCtl, cutoffPage);
911 : }
912 :
913 :
914 : /*
915 : * Decide whether a CLOG page number is "older" for truncation purposes.
916 : *
917 : * We need to use comparison of TransactionIds here in order to do the right
918 : * thing with wraparound XID arithmetic. However, TransactionIdPrecedes()
919 : * would get weird about permanent xact IDs. So, offset both such that xid1,
920 : * xid2, and xid2 + CLOG_XACTS_PER_PAGE - 1 are all normal XIDs; this offset
921 : * is relevant to page 0 and to the page preceding page 0.
922 : *
923 : * The page containing oldestXact-2^31 is the important edge case. The
924 : * portion of that page equaling or following oldestXact-2^31 is expendable,
925 : * but the portion preceding oldestXact-2^31 is not. When oldestXact-2^31 is
926 : * the first XID of a page and segment, the entire page and segment is
927 : * expendable, and we could truncate the segment. Recognizing that case would
928 : * require making oldestXact, not just the page containing oldestXact,
929 : * available to this callback. The benefit would be rare and small, so we
930 : * don't optimize that edge case.
7897 tgl 931 ECB : */
932 : static bool
7897 tgl 933 GIC 71531 : CLOGPagePrecedes(int page1, int page2)
934 : {
935 : TransactionId xid1;
7897 tgl 936 ECB : TransactionId xid2;
937 :
7836 tgl 938 CBC 71531 : xid1 = ((TransactionId) page1) * CLOG_XACTS_PER_PAGE;
813 noah 939 71531 : xid1 += FirstNormalTransactionId + 1;
7836 tgl 940 GIC 71531 : xid2 = ((TransactionId) page2) * CLOG_XACTS_PER_PAGE;
813 noah 941 CBC 71531 : xid2 += FirstNormalTransactionId + 1;
7897 tgl 942 ECB :
813 noah 943 GIC 119007 : return (TransactionIdPrecedes(xid1, xid2) &&
944 47476 : TransactionIdPrecedes(xid1, xid2 + CLOG_XACTS_PER_PAGE - 1));
945 : }
946 :
947 :
948 : /*
949 : * Write a ZEROPAGE xlog record
7897 tgl 950 ECB : */
951 : static void
7897 tgl 952 CBC 303 : WriteZeroPageXlogRec(int pageno)
7897 tgl 953 ECB : {
3062 heikki.linnakangas 954 CBC 303 : XLogBeginInsert();
955 303 : XLogRegisterData((char *) (&pageno), sizeof(int));
3062 heikki.linnakangas 956 GIC 303 : (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE);
7897 tgl 957 303 : }
958 :
959 : /*
960 : * Write a TRUNCATE xlog record
961 : *
962 : * We must flush the xlog record to disk before returning --- see notes
963 : * in TruncateCLOG().
5999 tgl 964 EUB : */
965 : static void
2208 rhaas 966 UIC 0 : WriteTruncateXlogRec(int pageno, TransactionId oldestXact, Oid oldestXactDb)
967 : {
968 : XLogRecPtr recptr;
2208 rhaas 969 EUB : xl_clog_truncate xlrec;
970 :
2208 rhaas 971 UBC 0 : xlrec.pageno = pageno;
2208 rhaas 972 UIC 0 : xlrec.oldestXact = oldestXact;
2208 rhaas 973 UBC 0 : xlrec.oldestXactDb = oldestXactDb;
5999 tgl 974 EUB :
3062 heikki.linnakangas 975 UBC 0 : XLogBeginInsert();
2208 rhaas 976 0 : XLogRegisterData((char *) (&xlrec), sizeof(xl_clog_truncate));
3062 heikki.linnakangas 977 0 : recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE);
5999 tgl 978 UIC 0 : XLogFlush(recptr);
979 0 : }
980 :
981 : /*
982 : * CLOG resource manager's routines
6803 tgl 983 EUB : */
984 : void
3062 heikki.linnakangas 985 UBC 0 : clog_redo(XLogReaderState *record)
986 : {
3062 heikki.linnakangas 987 UIC 0 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
7897 tgl 988 EUB :
989 : /* Backup blocks are not used in clog records */
3062 heikki.linnakangas 990 UBC 0 : Assert(!XLogRecHasAnyBlockRefs(record));
991 :
6803 tgl 992 UIC 0 : if (info == CLOG_ZEROPAGE)
993 : {
994 : int pageno;
6803 tgl 995 EUB : int slotno;
996 :
6803 tgl 997 UBC 0 : memcpy(&pageno, XLogRecGetData(record), sizeof(int));
998 :
1059 999 0 : LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
6803 tgl 1000 EUB :
6803 tgl 1001 UBC 0 : slotno = ZeroCLOGPage(pageno, false);
1059 tgl 1002 UIC 0 : SimpleLruWritePage(XactCtl, slotno);
1059 tgl 1003 UBC 0 : Assert(!XactCtl->shared->page_dirty[slotno]);
1004 :
1005 0 : LWLockRelease(XactSLRULock);
1006 : }
5999 tgl 1007 UIC 0 : else if (info == CLOG_TRUNCATE)
1008 : {
2208 rhaas 1009 EUB : xl_clog_truncate xlrec;
1010 :
2208 rhaas 1011 UBC 0 : memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_clog_truncate));
1012 :
1013 0 : AdvanceOldestClogXid(xlrec.oldestXact);
1014 :
1059 tgl 1015 UIC 0 : SimpleLruTruncate(XactCtl, xlrec.pageno);
5999 tgl 1016 EUB : }
1017 : else
5999 tgl 1018 UIC 0 : elog(PANIC, "clog_redo: unknown op code %u", info);
6803 1019 0 : }
1020 :
1021 : /*
1022 : * Entrypoint for sync.c to sync clog files.
926 tmunro 1023 EUB : */
1024 : int
926 tmunro 1025 UBC 0 : clogsyncfiletag(const FileTag *ftag, char *path)
1026 : {
926 tmunro 1027 UIC 0 : return SlruSyncFileTag(XactCtl, ftag, path);
1028 : }
|