Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * slru.h
4 : : * Simple LRU buffering for transaction status logfiles
5 : : *
6 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : * src/include/access/slru.h
10 : : *
11 : : *-------------------------------------------------------------------------
12 : : */
13 : : #ifndef SLRU_H
14 : : #define SLRU_H
15 : :
16 : : #include "access/xlogdefs.h"
17 : : #include "storage/lwlock.h"
18 : : #include "storage/sync.h"
19 : :
20 : : /*
21 : : * To avoid overflowing internal arithmetic and the size_t data type, the
22 : : * number of buffers must not exceed this number.
23 : : */
24 : : #define SLRU_MAX_ALLOWED_BUFFERS ((1024 * 1024 * 1024) / BLCKSZ)
25 : :
26 : : /*
27 : : * Define SLRU segment size. A page is the same BLCKSZ as is used everywhere
28 : : * else in Postgres. The segment size can be chosen somewhat arbitrarily;
29 : : * we make it 32 pages by default, or 256Kb, i.e. 1M transactions for CLOG
30 : : * or 64K transactions for SUBTRANS.
31 : : *
32 : : * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF,
33 : : * page numbering also wraps around at 0xFFFFFFFF/xxxx_XACTS_PER_PAGE (where
34 : : * xxxx is CLOG or SUBTRANS, respectively), and segment numbering at
35 : : * 0xFFFFFFFF/xxxx_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need
36 : : * take no explicit notice of that fact in slru.c, except when comparing
37 : : * segment and page numbers in SimpleLruTruncate (see PagePrecedes()).
38 : : */
39 : : #define SLRU_PAGES_PER_SEGMENT 32
40 : :
41 : : /*
42 : : * Page status codes. Note that these do not include the "dirty" bit.
43 : : * page_dirty can be true only in the VALID or WRITE_IN_PROGRESS states;
44 : : * in the latter case it implies that the page has been re-dirtied since
45 : : * the write started.
46 : : */
47 : : typedef enum
48 : : {
49 : : SLRU_PAGE_EMPTY, /* buffer is not in use */
50 : : SLRU_PAGE_READ_IN_PROGRESS, /* page is being read in */
51 : : SLRU_PAGE_VALID, /* page is valid and not being written */
52 : : SLRU_PAGE_WRITE_IN_PROGRESS, /* page is being written out */
53 : : } SlruPageStatus;
54 : :
55 : : /*
56 : : * Shared-memory state
57 : : *
58 : : * ControlLock is used to protect access to the other fields, except
59 : : * latest_page_number, which uses atomics; see comment in slru.c.
60 : : */
61 : : typedef struct SlruSharedData
62 : : {
63 : : /* Number of buffers managed by this SLRU structure */
64 : : int num_slots;
65 : :
66 : : /*
67 : : * Arrays holding info for each buffer slot. Page number is undefined
68 : : * when status is EMPTY, as is page_lru_count.
69 : : */
70 : : char **page_buffer;
71 : : SlruPageStatus *page_status;
72 : : bool *page_dirty;
73 : : int64 *page_number;
74 : : int *page_lru_count;
75 : :
76 : : /* The buffer_locks protects the I/O on each buffer slots */
77 : : LWLockPadded *buffer_locks;
78 : :
79 : : /* Locks to protect the in memory buffer slot access in SLRU bank. */
80 : : LWLockPadded *bank_locks;
81 : :
82 : : /*----------
83 : : * A bank-wise LRU counter is maintained because we do a victim buffer
84 : : * search within a bank. Furthermore, manipulating an individual bank
85 : : * counter avoids frequent cache invalidation since we update it every time
86 : : * we access the page.
87 : : *
88 : : * We mark a page "most recently used" by setting
89 : : * page_lru_count[slotno] = ++bank_cur_lru_count[bankno];
90 : : * The oldest page in the bank is therefore the one with the highest value
91 : : * of
92 : : * bank_cur_lru_count[bankno] - page_lru_count[slotno]
93 : : * The counts will eventually wrap around, but this calculation still
94 : : * works as long as no page's age exceeds INT_MAX counts.
95 : : *----------
96 : : */
97 : : int *bank_cur_lru_count;
98 : :
99 : : /*
100 : : * Optional array of WAL flush LSNs associated with entries in the SLRU
101 : : * pages. If not zero/NULL, we must flush WAL before writing pages (true
102 : : * for pg_xact, false for everything else). group_lsn[] has
103 : : * lsn_groups_per_page entries per buffer slot, each containing the
104 : : * highest LSN known for a contiguous group of SLRU entries on that slot's
105 : : * page.
106 : : */
107 : : XLogRecPtr *group_lsn;
108 : : int lsn_groups_per_page;
109 : :
110 : : /*
111 : : * latest_page_number is the page number of the current end of the log;
112 : : * this is not critical data, since we use it only to avoid swapping out
113 : : * the latest page.
114 : : */
115 : : pg_atomic_uint64 latest_page_number;
116 : :
117 : : /* SLRU's index for statistics purposes (might not be unique) */
118 : : int slru_stats_idx;
119 : : } SlruSharedData;
120 : :
121 : : typedef SlruSharedData *SlruShared;
122 : :
123 : : /*
124 : : * SlruCtlData is an unshared structure that points to the active information
125 : : * in shared memory.
126 : : */
127 : : typedef struct SlruCtlData
128 : : {
129 : : SlruShared shared;
130 : :
131 : : /*
132 : : * Bitmask to determine bank number from page number.
133 : : */
134 : : bits16 bank_mask;
135 : :
136 : : /*
137 : : * If true, use long segment filenames formed from lower 48 bits of the
138 : : * segment number, e.g. pg_xact/000000001234. Otherwise, use short
139 : : * filenames formed from lower 16 bits of the segment number e.g.
140 : : * pg_xact/1234.
141 : : */
142 : : bool long_segment_names;
143 : :
144 : : /*
145 : : * Which sync handler function to use when handing sync requests over to
146 : : * the checkpointer. SYNC_HANDLER_NONE to disable fsync (eg pg_notify).
147 : : */
148 : : SyncRequestHandler sync_handler;
149 : :
150 : : /*
151 : : * Decide whether a page is "older" for truncation and as a hint for
152 : : * evicting pages in LRU order. Return true if every entry of the first
153 : : * argument is older than every entry of the second argument. Note that
154 : : * !PagePrecedes(a,b) && !PagePrecedes(b,a) need not imply a==b; it also
155 : : * arises when some entries are older and some are not. For SLRUs using
156 : : * SimpleLruTruncate(), this must use modular arithmetic. (For others,
157 : : * the behavior of this callback has no functional implications.) Use
158 : : * SlruPagePrecedesUnitTests() in SLRUs meeting its criteria.
159 : : */
160 : : bool (*PagePrecedes) (int64, int64);
161 : :
162 : : /*
163 : : * Dir is set during SimpleLruInit and does not change thereafter. Since
164 : : * it's always the same, it doesn't need to be in shared memory.
165 : : */
166 : : char Dir[64];
167 : :
168 : : } SlruCtlData;
169 : :
170 : : typedef SlruCtlData *SlruCtl;
171 : :
172 : : /*
173 : : * Get the SLRU bank lock for given SlruCtl and the pageno.
174 : : *
175 : : * This lock needs to be acquired to access the slru buffer slots in the
176 : : * respective bank.
177 : : */
178 : : static inline LWLock *
46 alvherre@alvh.no-ip. 179 :GNC 38709774 : SimpleLruGetBankLock(SlruCtl ctl, int64 pageno)
180 : : {
181 : : int bankno;
182 : :
183 : 38709774 : bankno = pageno & ctl->bank_mask;
184 : 38709774 : return &(ctl->shared->bank_locks[bankno].lock);
185 : : }
186 : :
187 : : extern Size SimpleLruShmemSize(int nslots, int nlsns);
188 : : extern int SimpleLruAutotuneBuffers(int divisor, int max);
189 : : extern void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
190 : : const char *subdir, int buffer_tranche_id,
191 : : int bank_tranche_id, SyncRequestHandler sync_handler,
192 : : bool long_segment_names);
193 : : extern int SimpleLruZeroPage(SlruCtl ctl, int64 pageno);
194 : : extern int SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok,
195 : : TransactionId xid);
196 : : extern int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int64 pageno,
197 : : TransactionId xid);
198 : : extern void SimpleLruWritePage(SlruCtl ctl, int slotno);
199 : : extern void SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied);
200 : : #ifdef USE_ASSERT_CHECKING
201 : : extern void SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page);
202 : : #else
203 : : #define SlruPagePrecedesUnitTests(ctl, per_page) do {} while (0)
204 : : #endif
205 : : extern void SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage);
206 : : extern bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int64 pageno);
207 : :
208 : : typedef bool (*SlruScanCallback) (SlruCtl ctl, char *filename, int64 segpage,
209 : : void *data);
210 : : extern bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data);
211 : : extern void SlruDeleteSegment(SlruCtl ctl, int64 segno);
212 : :
213 : : extern int SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path);
214 : :
215 : : /* SlruScanDirectory public callbacks */
216 : : extern bool SlruScanDirCbReportPresence(SlruCtl ctl, char *filename,
217 : : int64 segpage, void *data);
218 : : extern bool SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int64 segpage,
219 : : void *data);
220 : : extern bool check_slru_buffers(const char *name, int *newval);
221 : :
222 : : #endif /* SLRU_H */
|