Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * freelist.c
4 : * routines for managing the buffer pool's replacement strategy.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : *
11 : * IDENTIFICATION
12 : * src/backend/storage/buffer/freelist.c
13 : *
14 : *-------------------------------------------------------------------------
15 : */
16 : #include "postgres.h"
17 :
18 : #include "pgstat.h"
19 : #include "port/atomics.h"
20 : #include "storage/buf_internals.h"
21 : #include "storage/bufmgr.h"
22 : #include "storage/proc.h"
23 :
24 : #define INT_ACCESS_ONCE(var) ((int)(*((volatile int *)&(var))))
25 :
26 :
27 : /*
28 : * The shared freelist control information.
29 : */
30 : typedef struct
31 : {
32 : /* Spinlock: protects the values below */
33 : slock_t buffer_strategy_lock;
34 :
35 : /*
36 : * Clock sweep hand: index of next buffer to consider grabbing. Note that
37 : * this isn't a concrete buffer - we only ever increase the value. So, to
38 : * get an actual buffer, it needs to be used modulo NBuffers.
39 : */
40 : pg_atomic_uint32 nextVictimBuffer;
41 :
42 : int firstFreeBuffer; /* Head of list of unused buffers */
43 : int lastFreeBuffer; /* Tail of list of unused buffers */
44 :
45 : /*
46 : * NOTE: lastFreeBuffer is undefined when firstFreeBuffer is -1 (that is,
47 : * when the list is empty)
48 : */
49 :
50 : /*
51 : * Statistics. These counters should be wide enough that they can't
52 : * overflow during a single bgwriter cycle.
53 : */
54 : uint32 completePasses; /* Complete cycles of the clock sweep */
55 : pg_atomic_uint32 numBufferAllocs; /* Buffers allocated since last reset */
56 :
57 : /*
58 : * Bgworker process to be notified upon activity or -1 if none. See
59 : * StrategyNotifyBgWriter.
60 : */
61 : int bgwprocno;
62 : } BufferStrategyControl;
63 :
64 : /* Pointers to shared state */
65 : static BufferStrategyControl *StrategyControl = NULL;
66 :
67 : /*
68 : * Private (non-shared) state for managing a ring of shared buffers to re-use.
69 : * This is currently the only kind of BufferAccessStrategy object, but someday
70 : * we might have more kinds.
71 : */
72 : typedef struct BufferAccessStrategyData
73 : {
74 : /* Overall strategy type */
75 : BufferAccessStrategyType btype;
76 : /* Number of elements in buffers[] array */
77 : int nbuffers;
78 :
79 : /*
80 : * Index of the "current" slot in the ring, ie, the one most recently
81 : * returned by GetBufferFromRing.
82 : */
83 : int current;
84 :
85 : /*
86 : * Array of buffer numbers. InvalidBuffer (that is, zero) indicates we
87 : * have not yet selected a buffer for this ring slot. For allocation
88 : * simplicity this is palloc'd together with the fixed fields of the
89 : * struct.
90 : */
91 : Buffer buffers[FLEXIBLE_ARRAY_MEMBER];
92 : } BufferAccessStrategyData;
93 :
94 :
95 : /* Prototypes for internal functions */
96 : static BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy,
97 : uint32 *buf_state);
98 : static void AddBufferToRing(BufferAccessStrategy strategy,
99 : BufferDesc *buf);
100 :
101 : /*
102 : * ClockSweepTick - Helper routine for StrategyGetBuffer()
3027 andres 103 ECB : *
104 : * Move the clock hand one buffer ahead of its current position and return the
105 : * id of the buffer now under the hand.
106 : */
107 : static inline uint32
3027 andres 108 GIC 3133247 : ClockSweepTick(void)
109 : {
110 : uint32 victim;
111 :
112 : /*
3027 andres 113 ECB : * Atomically move hand ahead one buffer - if there's several processes
114 : * doing this, this can lead to buffers being returned slightly out of
115 : * apparent order.
116 : */
117 : victim =
3027 andres 118 GIC 3133247 : pg_atomic_fetch_add_u32(&StrategyControl->nextVictimBuffer, 1);
119 :
3027 andres 120 CBC 3133247 : if (victim >= NBuffers)
121 : {
2878 bruce 122 GIC 25520 : uint32 originalVictim = victim;
123 :
124 : /* always wrap what we look up in BufferDescriptors */
3027 andres 125 25520 : victim = victim % NBuffers;
126 :
127 : /*
3027 andres 128 ECB : * If we're the one that just caused a wraparound, force
129 : * completePasses to be incremented while holding the spinlock. We
130 : * need the spinlock so StrategySyncStart() can return a consistent
131 : * value consisting of nextVictimBuffer and completePasses.
132 : */
3027 andres 133 GIC 25520 : if (victim == 0)
3027 andres 134 ECB : {
135 : uint32 expected;
2878 bruce 136 : uint32 wrapped;
2878 bruce 137 GIC 25276 : bool success = false;
138 :
3027 andres 139 25276 : expected = originalVictim + 1;
140 :
141 50762 : while (!success)
142 : {
143 : /*
144 : * Acquire the spinlock while increasing completePasses. That
145 : * allows other readers to read nextVictimBuffer and
3027 andres 146 ECB : * completePasses in a consistent manner which is required for
147 : * StrategySyncStart(). In theory delaying the increment
2881 heikki.linnakangas 148 : * could lead to an overflow of nextVictimBuffers, but that's
149 : * highly unlikely and wouldn't be particularly harmful.
3027 andres 150 : */
3027 andres 151 GIC 25486 : SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
3027 andres 152 ECB :
3027 andres 153 CBC 25486 : wrapped = expected % NBuffers;
3027 andres 154 ECB :
3027 andres 155 GIC 25486 : success = pg_atomic_compare_exchange_u32(&StrategyControl->nextVictimBuffer,
156 : &expected, wrapped);
157 25486 : if (success)
3027 andres 158 CBC 25276 : StrategyControl->completePasses++;
3027 andres 159 GIC 25486 : SpinLockRelease(&StrategyControl->buffer_strategy_lock);
160 : }
161 : }
162 : }
163 3133247 : return victim;
164 : }
165 :
166 : /*
167 : * have_free_buffer -- a lockless check to see if there is a free buffer in
168 : * buffer pool.
169 : *
2057 rhaas 170 ECB : * If the result is true that will become stale once free buffers are moved out
171 : * by other operations, so the caller who strictly want to use a free buffer
172 : * should not call this.
173 : */
174 : bool
1053 noah 175 GBC 177 : have_free_buffer(void)
176 : {
2057 rhaas 177 GIC 177 : if (StrategyControl->firstFreeBuffer >= 0)
178 177 : return true;
179 : else
2057 rhaas 180 UIC 0 : return false;
181 : }
182 :
183 : /*
184 : * StrategyGetBuffer
185 : *
186 : * Called by the bufmgr to get the next candidate buffer to use in
187 : * BufferAlloc(). The only hard requirement BufferAlloc() has is that
188 : * the selected buffer must not currently be pinned by anyone.
189 : *
190 : * strategy is a BufferAccessStrategy object, or NULL for default strategy.
5793 tgl 191 ECB : *
192 : * To ensure that no one else can pin the buffer before we do, we must
193 : * return the buffer with the buffer header spinlock still held.
194 : */
195 : BufferDesc *
59 andres 196 GNC 1878503 : StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_ring)
197 : {
2701 rhaas 198 ECB : BufferDesc *buf;
199 : int bgwprocno;
200 : int trycounter;
201 : uint32 local_buf_state; /* to avoid repeated (de-)referencing */
202 :
59 andres 203 GNC 1878503 : *from_ring = false;
204 :
205 : /*
5624 bruce 206 ECB : * If given a strategy object, see whether it can select a buffer. We
207 : * assume strategy objects don't need buffer_strategy_lock.
5793 tgl 208 : */
5793 tgl 209 CBC 1878503 : if (strategy != NULL)
210 : {
2555 andres 211 637604 : buf = GetBufferFromRing(strategy, buf_state);
5793 tgl 212 637604 : if (buf != NULL)
213 : {
59 andres 214 GNC 282571 : *from_ring = true;
5793 tgl 215 GIC 282571 : return buf;
216 : }
217 : }
218 :
219 : /*
220 : * If asked, we need to waken the bgwriter. Since we don't want to rely on
221 : * a spinlock for this we force a read from shared memory once, and then
222 : * set the latch based on that value. We need to go through that length
223 : * because otherwise bgwprocno might be reset while/after we check because
224 : * the compiler might just reread from memory.
225 : *
226 : * This can possibly set the latch of the wrong process if the bgwriter
227 : * dies in the wrong moment. But since PGPROC->procLatch is never
228 : * deallocated the worst consequence of that is that we set the latch of
229 : * some arbitrary process.
230 : */
3027 andres 231 CBC 1595932 : bgwprocno = INT_ACCESS_ONCE(StrategyControl->bgwprocno);
232 1595932 : if (bgwprocno != -1)
233 : {
234 : /* reset bgwprocno first, before setting the latch */
235 337 : StrategyControl->bgwprocno = -1;
236 :
237 : /*
238 : * Not acquiring ProcArrayLock here which is slightly icky. It's
239 : * actually fine because procLatch isn't ever freed, so we just can
240 : * potentially set the wrong process' (or no process') latch.
241 : */
242 337 : SetLatch(&ProcGlobal->allProcs[bgwprocno].procLatch);
243 : }
244 :
245 : /*
246 : * We count buffer allocation requests so that the bgwriter can estimate
247 : * the rate of buffer consumption. Note that buffers recycled by a
248 : * strategy object are intentionally not counted here.
249 : */
250 1595932 : pg_atomic_fetch_add_u32(&StrategyControl->numBufferAllocs, 1);
251 :
252 : /*
253 : * First check, without acquiring the lock, whether there's buffers in the
254 : * freelist. Since we otherwise don't require the spinlock in every
255 : * StrategyGetBuffer() invocation, it'd be sad to acquire it here -
256 : * uselessly in most cases. That obviously leaves a race where a buffer is
257 : * put on the freelist but we don't see the store yet - but that's pretty
258 : * harmless, it'll just get used during the next buffer acquisition.
259 : *
260 : * If there's buffers on the freelist, acquire the spinlock to pop one
261 : * buffer of the freelist. Then check whether that buffer is usable and
262 : * repeat if not.
263 : *
264 : * Note that the freeNext fields are considered to be protected by the
265 : * buffer_strategy_lock not the individual buffer spinlocks, so it's OK to
266 : * manipulate them without holding the spinlock.
267 : */
268 1595932 : if (StrategyControl->firstFreeBuffer >= 0)
269 : {
270 : while (true)
271 : {
272 : /* Acquire the spinlock to remove element from the freelist */
273 818172 : SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
274 :
275 818172 : if (StrategyControl->firstFreeBuffer < 0)
276 : {
277 26 : SpinLockRelease(&StrategyControl->buffer_strategy_lock);
278 26 : break;
279 : }
280 :
2992 281 818146 : buf = GetBufferDescriptor(StrategyControl->firstFreeBuffer);
3027 282 818146 : Assert(buf->freeNext != FREENEXT_NOT_IN_LIST);
283 :
284 : /* Unconditionally remove buffer from freelist */
285 818146 : StrategyControl->firstFreeBuffer = buf->freeNext;
286 818146 : buf->freeNext = FREENEXT_NOT_IN_LIST;
287 :
288 : /*
289 : * Release the lock so someone else can access the freelist while
290 : * we check out this buffer.
291 : */
292 818146 : SpinLockRelease(&StrategyControl->buffer_strategy_lock);
293 :
294 : /*
295 : * If the buffer is pinned or has a nonzero usage_count, we cannot
296 : * use it; discard it and retry. (This can only happen if VACUUM
297 : * put a valid buffer in the freelist and then someone else used
298 : * it before we got to it. It's probably impossible altogether as
299 : * of 8.3, but we'd better check anyway.)
300 : */
2555 301 818146 : local_buf_state = LockBufHdr(buf);
302 818146 : if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0
303 818140 : && BUF_STATE_GET_USAGECOUNT(local_buf_state) == 0)
304 : {
3027 305 818133 : if (strategy != NULL)
306 261146 : AddBufferToRing(strategy, buf);
2555 307 818133 : *buf_state = local_buf_state;
3027 308 818133 : return buf;
309 : }
2555 310 13 : UnlockBufHdr(buf, local_buf_state);
311 : }
312 : }
313 :
314 : /* Nothing on the freelist, so run the "clock sweep" algorithm */
6610 tgl 315 777799 : trycounter = NBuffers;
316 : for (;;)
317 : {
2992 andres 318 3133247 : buf = GetBufferDescriptor(ClockSweepTick());
319 :
320 : /*
321 : * If the buffer is pinned or has a nonzero usage_count, we cannot use
322 : * it; decrement the usage_count (unless pinned) and keep scanning.
323 : */
2555 324 3133247 : local_buf_state = LockBufHdr(buf);
325 :
326 3133247 : if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0)
327 : {
328 3091142 : if (BUF_STATE_GET_USAGECOUNT(local_buf_state) != 0)
329 : {
330 2313343 : local_buf_state -= BUF_USAGECOUNT_ONE;
331 :
5793 tgl 332 2313343 : trycounter = NBuffers;
333 : }
334 : else
335 : {
336 : /* Found a usable buffer */
337 777799 : if (strategy != NULL)
338 93887 : AddBufferToRing(strategy, buf);
2555 andres 339 777799 : *buf_state = local_buf_state;
5793 tgl 340 777799 : return buf;
341 : }
342 : }
6610 343 42105 : else if (--trycounter == 0)
344 : {
345 : /*
346 : * We've scanned all the buffers without making any state changes,
347 : * so all the buffers are pinned (or were when we looked at them).
348 : * We could hope that someone will free one eventually, but it's
349 : * probably better to fail than to risk getting stuck in an
350 : * infinite loop.
351 : */
2555 andres 352 UBC 0 : UnlockBufHdr(buf, local_buf_state);
6929 tgl 353 0 : elog(ERROR, "no unpinned buffers available");
354 : }
2555 andres 355 CBC 2355448 : UnlockBufHdr(buf, local_buf_state);
356 : }
357 : }
358 :
359 : /*
360 : * StrategyFreeBuffer: put a buffer on the freelist
361 : */
362 : void
2701 rhaas 363 84578 : StrategyFreeBuffer(BufferDesc *buf)
364 : {
3118 365 84578 : SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
366 :
367 : /*
368 : * It is possible that we are told to put something in the freelist that
369 : * is already in it; don't screw up the list if so.
370 : */
6610 tgl 371 84578 : if (buf->freeNext == FREENEXT_NOT_IN_LIST)
372 : {
5793 373 84578 : buf->freeNext = StrategyControl->firstFreeBuffer;
374 84578 : if (buf->freeNext < 0)
6610 375 2435 : StrategyControl->lastFreeBuffer = buf->buf_id;
5793 376 84578 : StrategyControl->firstFreeBuffer = buf->buf_id;
377 : }
378 :
3118 rhaas 379 84578 : SpinLockRelease(&StrategyControl->buffer_strategy_lock);
6610 tgl 380 84578 : }
381 :
382 : /*
383 : * StrategySyncStart -- tell BufferSync where to start syncing
384 : *
385 : * The result is the buffer index of the best buffer to sync first.
386 : * BufferSync() will proceed circularly around the buffer array from there.
387 : *
388 : * In addition, we return the completed-pass count (which is effectively
389 : * the higher-order bits of nextVictimBuffer) and the count of recent buffer
390 : * allocs if non-NULL pointers are passed. The alloc count is reset after
391 : * being read.
392 : */
393 : int
5675 394 10438 : StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
395 : {
396 : uint32 nextVictimBuffer;
397 : int result;
398 :
3118 rhaas 399 10438 : SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
3027 andres 400 10438 : nextVictimBuffer = pg_atomic_read_u32(&StrategyControl->nextVictimBuffer);
401 10438 : result = nextVictimBuffer % NBuffers;
402 :
5675 tgl 403 10438 : if (complete_passes)
404 : {
405 10438 : *complete_passes = StrategyControl->completePasses;
406 :
407 : /*
408 : * Additionally add the number of wraparounds that happened before
409 : * completePasses could be incremented. C.f. ClockSweepTick().
410 : */
3027 andres 411 10438 : *complete_passes += nextVictimBuffer / NBuffers;
412 : }
413 :
5675 tgl 414 10438 : if (num_buf_alloc)
415 : {
3027 andres 416 10438 : *num_buf_alloc = pg_atomic_exchange_u32(&StrategyControl->numBufferAllocs, 0);
417 : }
3118 rhaas 418 10438 : SpinLockRelease(&StrategyControl->buffer_strategy_lock);
6610 tgl 419 10438 : return result;
420 : }
421 :
422 : /*
423 : * StrategyNotifyBgWriter -- set or clear allocation notification latch
424 : *
425 : * If bgwprocno isn't -1, the next invocation of StrategyGetBuffer will
426 : * set that latch. Pass -1 to clear the pending notification before it
427 : * happens. This feature is used by the bgwriter process to wake itself up
428 : * from hibernation, and is not meant for anybody else to use.
429 : */
430 : void
3027 andres 431 520 : StrategyNotifyBgWriter(int bgwprocno)
432 : {
433 : /*
434 : * We acquire buffer_strategy_lock just to ensure that the store appears
435 : * atomic to StrategyGetBuffer. The bgwriter should call this rather
436 : * infrequently, so there's no performance penalty from being safe.
437 : */
3118 rhaas 438 520 : SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
3027 andres 439 520 : StrategyControl->bgwprocno = bgwprocno;
3118 rhaas 440 520 : SpinLockRelease(&StrategyControl->buffer_strategy_lock);
3987 tgl 441 520 : }
442 :
443 :
444 : /*
445 : * StrategyShmemSize
446 : *
447 : * estimate the size of shared memory used by the freelist-related structures.
448 : *
449 : * Note: for somewhat historical reasons, the buffer lookup hashtable size
450 : * is also determined here.
451 : */
452 : Size
6639 453 2738 : StrategyShmemSize(void)
454 : {
6441 455 2738 : Size size = 0;
456 :
457 : /* size of lookup hash table ... see comment in StrategyInitialize */
6104 458 2738 : size = add_size(size, BufTableShmemSize(NBuffers + NUM_BUFFER_PARTITIONS));
459 :
460 : /* size of the shared replacement strategy control block */
6441 461 2738 : size = add_size(size, MAXALIGN(sizeof(BufferStrategyControl)));
462 :
6639 463 2738 : return size;
464 : }
465 :
466 : /*
467 : * StrategyInitialize -- initialize the buffer cache replacement
468 : * strategy.
469 : *
470 : * Assumes: All of the buffers are already built into a linked list.
471 : * Only called by postmaster and only during initialization.
472 : */
473 : void
7087 JanWieck 474 1826 : StrategyInitialize(bool init)
475 : {
476 : bool found;
477 :
478 : /*
479 : * Initialize the shared buffer lookup hashtable.
480 : *
481 : * Since we can't tolerate running out of lookup table entries, we must be
482 : * sure to specify an adequate table size here. The maximum steady-state
483 : * usage is of course NBuffers entries, but BufferAlloc() tries to insert
484 : * a new entry before deleting the old. In principle this could be
485 : * happening in each partition concurrently, so we could need as many as
486 : * NBuffers + NUM_BUFFER_PARTITIONS entries.
487 : */
6104 tgl 488 1826 : InitBufTable(NBuffers + NUM_BUFFER_PARTITIONS);
489 :
490 : /*
491 : * Get or create the shared strategy control block
492 : */
7087 JanWieck 493 1826 : StrategyControl = (BufferStrategyControl *)
6929 tgl 494 1826 : ShmemInitStruct("Buffer Strategy Status",
495 : sizeof(BufferStrategyControl),
496 : &found);
497 :
7087 JanWieck 498 1826 : if (!found)
499 : {
500 : /*
501 : * Only done once, usually in postmaster
502 : */
503 1826 : Assert(init);
504 :
3118 rhaas 505 1826 : SpinLockInit(&StrategyControl->buffer_strategy_lock);
506 :
507 : /*
508 : * Grab the whole linked list of free buffers for our strategy. We
509 : * assume it was previously set up by InitBufferPool().
510 : */
6610 tgl 511 1826 : StrategyControl->firstFreeBuffer = 0;
512 1826 : StrategyControl->lastFreeBuffer = NBuffers - 1;
513 :
514 : /* Initialize the clock sweep pointer */
3027 andres 515 1826 : pg_atomic_init_u32(&StrategyControl->nextVictimBuffer, 0);
516 :
517 : /* Clear statistics */
5675 tgl 518 1826 : StrategyControl->completePasses = 0;
3027 andres 519 1826 : pg_atomic_init_u32(&StrategyControl->numBufferAllocs, 0);
520 :
521 : /* No pending notification */
522 1826 : StrategyControl->bgwprocno = -1;
523 : }
524 : else
7087 JanWieck 525 UBC 0 : Assert(!init);
7087 JanWieck 526 CBC 1826 : }
527 :
528 :
529 : /* ----------------------------------------------------------------
530 : * Backend-private buffer ring management
531 : * ----------------------------------------------------------------
532 : */
533 :
534 :
535 : /*
536 : * GetAccessStrategy -- create a BufferAccessStrategy object
537 : *
538 : * The object is allocated in the current memory context.
539 : */
540 : BufferAccessStrategy
5793 tgl 541 105890 : GetAccessStrategy(BufferAccessStrategyType btype)
542 : {
543 : int ring_size_kb;
544 :
545 : /*
546 : * Select ring size to use. See buffer/README for rationales.
547 : *
548 : * Note: if you change the ring size for BAS_BULKREAD, see also
549 : * SYNC_SCAN_REPORT_INTERVAL in access/heap/syncscan.c.
5793 tgl 550 ECB : */
5793 tgl 551 GIC 105890 : switch (btype)
5793 tgl 552 EUB : {
5793 tgl 553 UIC 0 : case BAS_NORMAL:
5793 tgl 554 EUB : /* if someone asks for NORMAL, just give 'em a "default" object */
5793 tgl 555 UIC 0 : return NULL;
5793 tgl 556 ECB :
5793 tgl 557 CBC 58578 : case BAS_BULKREAD:
2 drowley 558 GNC 58578 : ring_size_kb = 256;
5793 tgl 559 CBC 58578 : break;
5267 560 47312 : case BAS_BULKWRITE:
2 drowley 561 GNC 47312 : ring_size_kb = 16 * 1024;
5267 tgl 562 GBC 47312 : break;
5793 tgl 563 UBC 0 : case BAS_VACUUM:
2 drowley 564 UNC 0 : ring_size_kb = 256;
5793 tgl 565 UIC 0 : break;
5793 tgl 566 EUB :
5793 tgl 567 UBC 0 : default:
5793 tgl 568 UIC 0 : elog(ERROR, "unrecognized buffer access strategy: %d",
569 : (int) btype);
570 : return NULL; /* keep compiler quiet */
571 : }
5793 tgl 572 ECB :
2 drowley 573 GNC 105890 : return GetAccessStrategyWithSize(btype, ring_size_kb);
574 : }
575 :
576 : /*
577 : * GetAccessStrategyWithSize -- create a BufferAccessStrategy object with a
578 : * number of buffers equivalent to the passed in size.
579 : *
580 : * If the given ring size is 0, no BufferAccessStrategy will be created and
581 : * the function will return NULL. ring_size_kb must not be negative.
582 : */
583 : BufferAccessStrategy
584 110735 : GetAccessStrategyWithSize(BufferAccessStrategyType btype, int ring_size_kb)
585 : {
586 : int ring_buffers;
587 : BufferAccessStrategy strategy;
588 :
589 110735 : Assert(ring_size_kb >= 0);
590 :
591 : /* Figure out how many buffers ring_size_kb is */
592 110735 : ring_buffers = ring_size_kb / (BLCKSZ / 1024);
593 :
594 : /* 0 means unlimited, so no BufferAccessStrategy required */
595 110735 : if (ring_buffers == 0)
596 6 : return NULL;
597 :
598 : /* Cap to 1/8th of shared_buffers */
599 110729 : ring_buffers = Min(NBuffers / 8, ring_buffers);
600 :
601 : /* NBuffers should never be less than 16, so this shouldn't happen */
602 110729 : Assert(ring_buffers > 0);
603 :
604 : /* Allocate the object and initialize all elements to zeroes */
605 : strategy = (BufferAccessStrategy)
5793 tgl 606 GIC 110729 : palloc0(offsetof(BufferAccessStrategyData, buffers) +
607 : ring_buffers * sizeof(Buffer));
608 :
609 : /* Set fields that don't start out zero */
610 110729 : strategy->btype = btype;
2 drowley 611 GNC 110729 : strategy->nbuffers = ring_buffers;
612 :
5793 tgl 613 GIC 110729 : return strategy;
614 : }
615 :
616 : /*
617 : * GetAccessStrategyBufferCount -- an accessor for the number of buffers in
618 : * the ring
619 : *
620 : * Returns 0 on NULL input to match behavior of GetAccessStrategyWithSize()
621 : * returning NULL with 0 size.
622 : */
623 : int
2 drowley 624 GNC 9 : GetAccessStrategyBufferCount(BufferAccessStrategy strategy)
625 : {
626 9 : if (strategy == NULL)
2 drowley 627 UNC 0 : return 0;
628 :
2 drowley 629 GNC 9 : return strategy->nbuffers;
630 : }
631 :
5793 tgl 632 ECB : /*
633 : * FreeAccessStrategy -- release a BufferAccessStrategy object
634 : *
635 : * A simple pfree would do at the moment, but we would prefer that callers
636 : * don't assume that much about the representation of BufferAccessStrategy.
637 : */
638 : void
5793 tgl 639 CBC 101510 : FreeAccessStrategy(BufferAccessStrategy strategy)
640 : {
641 : /* don't crash if called on a "default" strategy */
642 101510 : if (strategy != NULL)
5793 tgl 643 GIC 101510 : pfree(strategy);
644 101510 : }
5793 tgl 645 ECB :
646 : /*
647 : * GetBufferFromRing -- returns a buffer from the ring, or NULL if the
648 : * ring is empty / not usable.
649 : *
650 : * The bufhdr spin lock is held on the returned buffer.
651 : */
652 : static BufferDesc *
2555 andres 653 CBC 637604 : GetBufferFromRing(BufferAccessStrategy strategy, uint32 *buf_state)
5793 tgl 654 ECB : {
655 : BufferDesc *buf;
656 : Buffer bufnum;
657 : uint32 local_buf_state; /* to avoid repeated (de-)referencing */
658 :
659 :
660 : /* Advance to next ring slot */
6 drowley 661 GNC 637604 : if (++strategy->current >= strategy->nbuffers)
5793 tgl 662 GIC 21103 : strategy->current = 0;
663 :
664 : /*
665 : * If the slot hasn't been filled yet, tell the caller to allocate a new
666 : * buffer with the normal allocation strategy. He will then fill this
5624 bruce 667 ECB : * slot by calling AddBufferToRing with the new buffer.
668 : */
5793 tgl 669 CBC 637604 : bufnum = strategy->buffers[strategy->current];
5793 tgl 670 GBC 637604 : if (bufnum == InvalidBuffer)
5793 tgl 671 GIC 349765 : return NULL;
672 :
673 : /*
674 : * If the buffer is pinned we cannot use it under any circumstances.
675 : *
676 : * If usage_count is 0 or 1 then the buffer is fair game (we expect 1,
677 : * since our own previous usage of the ring element would have left it
678 : * there, but it might've been decremented by clock sweep since then). A
5624 bruce 679 ECB : * higher usage_count indicates someone else has touched the buffer, so we
680 : * shouldn't re-use it.
681 : */
2992 andres 682 CBC 287839 : buf = GetBufferDescriptor(bufnum - 1);
2555 683 287839 : local_buf_state = LockBufHdr(buf);
684 287839 : if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0
2555 andres 685 GIC 285854 : && BUF_STATE_GET_USAGECOUNT(local_buf_state) <= 1)
686 : {
687 282571 : *buf_state = local_buf_state;
5793 tgl 688 282571 : return buf;
689 : }
2555 andres 690 5268 : UnlockBufHdr(buf, local_buf_state);
691 :
5793 tgl 692 ECB : /*
693 : * Tell caller to allocate a new buffer with the normal allocation
694 : * strategy. He'll then replace this ring element via AddBufferToRing.
695 : */
5793 tgl 696 GIC 5268 : return NULL;
697 : }
698 :
5793 tgl 699 ECB : /*
700 : * AddBufferToRing -- add a buffer to the buffer ring
701 : *
702 : * Caller must hold the buffer header spinlock on the buffer. Since this
703 : * is called with the spinlock held, it had better be quite cheap.
704 : */
705 : static void
2701 rhaas 706 GIC 355033 : AddBufferToRing(BufferAccessStrategy strategy, BufferDesc *buf)
5793 tgl 707 ECB : {
5793 tgl 708 CBC 355033 : strategy->buffers[strategy->current] = BufferDescriptorGetBuffer(buf);
709 355033 : }
710 :
711 : /*
712 : * Utility function returning the IOContext of a given BufferAccessStrategy's
713 : * strategy ring.
714 : */
715 : IOContext
59 andres 716 GNC 69581061 : IOContextForStrategy(BufferAccessStrategy strategy)
717 : {
718 69581061 : if (!strategy)
719 68399637 : return IOCONTEXT_NORMAL;
720 :
721 1181424 : switch (strategy->btype)
722 : {
59 andres 723 UNC 0 : case BAS_NORMAL:
724 :
725 : /*
726 : * Currently, GetAccessStrategy() returns NULL for
727 : * BufferAccessStrategyType BAS_NORMAL, so this case is
728 : * unreachable.
729 : */
730 0 : pg_unreachable();
731 : return IOCONTEXT_NORMAL;
59 andres 732 GNC 596566 : case BAS_BULKREAD:
733 596566 : return IOCONTEXT_BULKREAD;
734 189447 : case BAS_BULKWRITE:
735 189447 : return IOCONTEXT_BULKWRITE;
736 395411 : case BAS_VACUUM:
737 395411 : return IOCONTEXT_VACUUM;
738 : }
739 :
59 andres 740 UNC 0 : elog(ERROR, "unrecognized BufferAccessStrategyType: %d", strategy->btype);
741 : pg_unreachable();
742 : }
743 :
744 : /*
745 : * StrategyRejectBuffer -- consider rejecting a dirty buffer
746 : *
747 : * When a nondefault strategy is used, the buffer manager calls this function
748 : * when it turns out that the buffer selected by StrategyGetBuffer needs to
749 : * be written out and doing so would require flushing WAL too. This gives us
750 : * a chance to choose a different victim.
751 : *
752 : * Returns true if buffer manager should ask for a new victim, and false
5793 tgl 753 ECB : * if this buffer should be written and re-used.
754 : */
755 : bool
59 andres 756 GNC 10275 : StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
757 : {
5793 tgl 758 ECB : /* We only do this in bulkread mode */
5793 tgl 759 CBC 10275 : if (strategy->btype != BAS_BULKREAD)
5793 tgl 760 GIC 1838 : return false;
5793 tgl 761 ECB :
762 : /* Don't muck with behavior of normal buffer-replacement strategy */
59 andres 763 GNC 16557 : if (!from_ring ||
2118 tgl 764 GIC 8120 : strategy->buffers[strategy->current] != BufferDescriptorGetBuffer(buf))
5793 765 317 : return false;
766 :
5793 tgl 767 ECB : /*
768 : * Remove the dirty buffer from the ring; necessary to prevent infinite
769 : * loop if all ring members are dirty.
770 : */
5793 tgl 771 GIC 8120 : strategy->buffers[strategy->current] = InvalidBuffer;
772 :
773 8120 : return true;
774 : }
|