LCOV - differential code coverage report
Current view: top level - src/backend/storage/buffer - freelist.c (source / functions) Coverage Total Hit UNC UIC UBC GBC GIC GNC CBC EUB ECB DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 91.8 % 183 168 5 5 5 3 35 30 100 6 63 1 5
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 16 16 5 6 5 9 1
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * freelist.c
       4                 :  *    routines for managing the buffer pool's replacement strategy.
       5                 :  *
       6                 :  *
       7                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       8                 :  * Portions Copyright (c) 1994, Regents of the University of California
       9                 :  *
      10                 :  *
      11                 :  * IDENTIFICATION
      12                 :  *    src/backend/storage/buffer/freelist.c
      13                 :  *
      14                 :  *-------------------------------------------------------------------------
      15                 :  */
      16                 : #include "postgres.h"
      17                 : 
      18                 : #include "pgstat.h"
      19                 : #include "port/atomics.h"
      20                 : #include "storage/buf_internals.h"
      21                 : #include "storage/bufmgr.h"
      22                 : #include "storage/proc.h"
      23                 : 
      24                 : #define INT_ACCESS_ONCE(var)    ((int)(*((volatile int *)&(var))))
      25                 : 
      26                 : 
      27                 : /*
      28                 :  * The shared freelist control information.
      29                 :  */
      30                 : typedef struct
      31                 : {
      32                 :     /* Spinlock: protects the values below */
      33                 :     slock_t     buffer_strategy_lock;
      34                 : 
      35                 :     /*
      36                 :      * Clock sweep hand: index of next buffer to consider grabbing. Note that
      37                 :      * this isn't a concrete buffer - we only ever increase the value. So, to
      38                 :      * get an actual buffer, it needs to be used modulo NBuffers.
      39                 :      */
      40                 :     pg_atomic_uint32 nextVictimBuffer;
      41                 : 
      42                 :     int         firstFreeBuffer;    /* Head of list of unused buffers */
      43                 :     int         lastFreeBuffer; /* Tail of list of unused buffers */
      44                 : 
      45                 :     /*
      46                 :      * NOTE: lastFreeBuffer is undefined when firstFreeBuffer is -1 (that is,
      47                 :      * when the list is empty)
      48                 :      */
      49                 : 
      50                 :     /*
      51                 :      * Statistics.  These counters should be wide enough that they can't
      52                 :      * overflow during a single bgwriter cycle.
      53                 :      */
      54                 :     uint32      completePasses; /* Complete cycles of the clock sweep */
      55                 :     pg_atomic_uint32 numBufferAllocs;   /* Buffers allocated since last reset */
      56                 : 
      57                 :     /*
      58                 :      * Bgworker process to be notified upon activity or -1 if none. See
      59                 :      * StrategyNotifyBgWriter.
      60                 :      */
      61                 :     int         bgwprocno;
      62                 : } BufferStrategyControl;
      63                 : 
      64                 : /* Pointers to shared state */
      65                 : static BufferStrategyControl *StrategyControl = NULL;
      66                 : 
      67                 : /*
      68                 :  * Private (non-shared) state for managing a ring of shared buffers to re-use.
      69                 :  * This is currently the only kind of BufferAccessStrategy object, but someday
      70                 :  * we might have more kinds.
      71                 :  */
      72                 : typedef struct BufferAccessStrategyData
      73                 : {
      74                 :     /* Overall strategy type */
      75                 :     BufferAccessStrategyType btype;
      76                 :     /* Number of elements in buffers[] array */
      77                 :     int         nbuffers;
      78                 : 
      79                 :     /*
      80                 :      * Index of the "current" slot in the ring, ie, the one most recently
      81                 :      * returned by GetBufferFromRing.
      82                 :      */
      83                 :     int         current;
      84                 : 
      85                 :     /*
      86                 :      * Array of buffer numbers.  InvalidBuffer (that is, zero) indicates we
      87                 :      * have not yet selected a buffer for this ring slot.  For allocation
      88                 :      * simplicity this is palloc'd together with the fixed fields of the
      89                 :      * struct.
      90                 :      */
      91                 :     Buffer      buffers[FLEXIBLE_ARRAY_MEMBER];
      92                 : }           BufferAccessStrategyData;
      93                 : 
      94                 : 
      95                 : /* Prototypes for internal functions */
      96                 : static BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy,
      97                 :                                      uint32 *buf_state);
      98                 : static void AddBufferToRing(BufferAccessStrategy strategy,
      99                 :                             BufferDesc *buf);
     100                 : 
     101                 : /*
     102                 :  * ClockSweepTick - Helper routine for StrategyGetBuffer()
     103 ECB             :  *
     104                 :  * Move the clock hand one buffer ahead of its current position and return the
     105                 :  * id of the buffer now under the hand.
     106                 :  */
     107                 : static inline uint32
     108 GIC     3133247 : ClockSweepTick(void)
     109                 : {
     110                 :     uint32      victim;
     111                 : 
     112                 :     /*
     113 ECB             :      * Atomically move hand ahead one buffer - if there's several processes
     114                 :      * doing this, this can lead to buffers being returned slightly out of
     115                 :      * apparent order.
     116                 :      */
     117                 :     victim =
     118 GIC     3133247 :         pg_atomic_fetch_add_u32(&StrategyControl->nextVictimBuffer, 1);
     119                 : 
     120 CBC     3133247 :     if (victim >= NBuffers)
     121                 :     {
     122 GIC       25520 :         uint32      originalVictim = victim;
     123                 : 
     124                 :         /* always wrap what we look up in BufferDescriptors */
     125           25520 :         victim = victim % NBuffers;
     126                 : 
     127                 :         /*
     128 ECB             :          * If we're the one that just caused a wraparound, force
     129                 :          * completePasses to be incremented while holding the spinlock. We
     130                 :          * need the spinlock so StrategySyncStart() can return a consistent
     131                 :          * value consisting of nextVictimBuffer and completePasses.
     132                 :          */
     133 GIC       25520 :         if (victim == 0)
     134 ECB             :         {
     135                 :             uint32      expected;
     136                 :             uint32      wrapped;
     137 GIC       25276 :             bool        success = false;
     138                 : 
     139           25276 :             expected = originalVictim + 1;
     140                 : 
     141           50762 :             while (!success)
     142                 :             {
     143                 :                 /*
     144                 :                  * Acquire the spinlock while increasing completePasses. That
     145                 :                  * allows other readers to read nextVictimBuffer and
     146 ECB             :                  * completePasses in a consistent manner which is required for
     147                 :                  * StrategySyncStart().  In theory delaying the increment
     148                 :                  * could lead to an overflow of nextVictimBuffers, but that's
     149                 :                  * highly unlikely and wouldn't be particularly harmful.
     150                 :                  */
     151 GIC       25486 :                 SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
     152 ECB             : 
     153 CBC       25486 :                 wrapped = expected % NBuffers;
     154 ECB             : 
     155 GIC       25486 :                 success = pg_atomic_compare_exchange_u32(&StrategyControl->nextVictimBuffer,
     156                 :                                                          &expected, wrapped);
     157           25486 :                 if (success)
     158 CBC       25276 :                     StrategyControl->completePasses++;
     159 GIC       25486 :                 SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     160                 :             }
     161                 :         }
     162                 :     }
     163         3133247 :     return victim;
     164                 : }
     165                 : 
     166                 : /*
     167                 :  * have_free_buffer -- a lockless check to see if there is a free buffer in
     168                 :  *                     buffer pool.
     169                 :  *
     170 ECB             :  * If the result is true that will become stale once free buffers are moved out
     171                 :  * by other operations, so the caller who strictly want to use a free buffer
     172                 :  * should not call this.
     173                 :  */
     174                 : bool
     175 GBC         177 : have_free_buffer(void)
     176                 : {
     177 GIC         177 :     if (StrategyControl->firstFreeBuffer >= 0)
     178             177 :         return true;
     179                 :     else
     180 UIC           0 :         return false;
     181                 : }
     182                 : 
     183                 : /*
     184                 :  * StrategyGetBuffer
     185                 :  *
     186                 :  *  Called by the bufmgr to get the next candidate buffer to use in
     187                 :  *  BufferAlloc(). The only hard requirement BufferAlloc() has is that
     188                 :  *  the selected buffer must not currently be pinned by anyone.
     189                 :  *
     190                 :  *  strategy is a BufferAccessStrategy object, or NULL for default strategy.
     191 ECB             :  *
     192                 :  *  To ensure that no one else can pin the buffer before we do, we must
     193                 :  *  return the buffer with the buffer header spinlock still held.
     194                 :  */
     195                 : BufferDesc *
     196 GNC     1878503 : StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_ring)
     197                 : {
     198 ECB             :     BufferDesc *buf;
     199                 :     int         bgwprocno;
     200                 :     int         trycounter;
     201                 :     uint32      local_buf_state;    /* to avoid repeated (de-)referencing */
     202                 : 
     203 GNC     1878503 :     *from_ring = false;
     204                 : 
     205                 :     /*
     206 ECB             :      * If given a strategy object, see whether it can select a buffer. We
     207                 :      * assume strategy objects don't need buffer_strategy_lock.
     208                 :      */
     209 CBC     1878503 :     if (strategy != NULL)
     210                 :     {
     211          637604 :         buf = GetBufferFromRing(strategy, buf_state);
     212          637604 :         if (buf != NULL)
     213                 :         {
     214 GNC      282571 :             *from_ring = true;
     215 GIC      282571 :             return buf;
     216                 :         }
     217                 :     }
     218                 : 
     219                 :     /*
     220                 :      * If asked, we need to waken the bgwriter. Since we don't want to rely on
     221                 :      * a spinlock for this we force a read from shared memory once, and then
     222                 :      * set the latch based on that value. We need to go through that length
     223                 :      * because otherwise bgwprocno might be reset while/after we check because
     224                 :      * the compiler might just reread from memory.
     225                 :      *
     226                 :      * This can possibly set the latch of the wrong process if the bgwriter
     227                 :      * dies in the wrong moment. But since PGPROC->procLatch is never
     228                 :      * deallocated the worst consequence of that is that we set the latch of
     229                 :      * some arbitrary process.
     230                 :      */
     231 CBC     1595932 :     bgwprocno = INT_ACCESS_ONCE(StrategyControl->bgwprocno);
     232         1595932 :     if (bgwprocno != -1)
     233                 :     {
     234                 :         /* reset bgwprocno first, before setting the latch */
     235             337 :         StrategyControl->bgwprocno = -1;
     236                 : 
     237                 :         /*
     238                 :          * Not acquiring ProcArrayLock here which is slightly icky. It's
     239                 :          * actually fine because procLatch isn't ever freed, so we just can
     240                 :          * potentially set the wrong process' (or no process') latch.
     241                 :          */
     242             337 :         SetLatch(&ProcGlobal->allProcs[bgwprocno].procLatch);
     243                 :     }
     244                 : 
     245                 :     /*
     246                 :      * We count buffer allocation requests so that the bgwriter can estimate
     247                 :      * the rate of buffer consumption.  Note that buffers recycled by a
     248                 :      * strategy object are intentionally not counted here.
     249                 :      */
     250         1595932 :     pg_atomic_fetch_add_u32(&StrategyControl->numBufferAllocs, 1);
     251                 : 
     252                 :     /*
     253                 :      * First check, without acquiring the lock, whether there's buffers in the
     254                 :      * freelist. Since we otherwise don't require the spinlock in every
     255                 :      * StrategyGetBuffer() invocation, it'd be sad to acquire it here -
     256                 :      * uselessly in most cases. That obviously leaves a race where a buffer is
     257                 :      * put on the freelist but we don't see the store yet - but that's pretty
     258                 :      * harmless, it'll just get used during the next buffer acquisition.
     259                 :      *
     260                 :      * If there's buffers on the freelist, acquire the spinlock to pop one
     261                 :      * buffer of the freelist. Then check whether that buffer is usable and
     262                 :      * repeat if not.
     263                 :      *
     264                 :      * Note that the freeNext fields are considered to be protected by the
     265                 :      * buffer_strategy_lock not the individual buffer spinlocks, so it's OK to
     266                 :      * manipulate them without holding the spinlock.
     267                 :      */
     268         1595932 :     if (StrategyControl->firstFreeBuffer >= 0)
     269                 :     {
     270                 :         while (true)
     271                 :         {
     272                 :             /* Acquire the spinlock to remove element from the freelist */
     273          818172 :             SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
     274                 : 
     275          818172 :             if (StrategyControl->firstFreeBuffer < 0)
     276                 :             {
     277              26 :                 SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     278              26 :                 break;
     279                 :             }
     280                 : 
     281          818146 :             buf = GetBufferDescriptor(StrategyControl->firstFreeBuffer);
     282          818146 :             Assert(buf->freeNext != FREENEXT_NOT_IN_LIST);
     283                 : 
     284                 :             /* Unconditionally remove buffer from freelist */
     285          818146 :             StrategyControl->firstFreeBuffer = buf->freeNext;
     286          818146 :             buf->freeNext = FREENEXT_NOT_IN_LIST;
     287                 : 
     288                 :             /*
     289                 :              * Release the lock so someone else can access the freelist while
     290                 :              * we check out this buffer.
     291                 :              */
     292          818146 :             SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     293                 : 
     294                 :             /*
     295                 :              * If the buffer is pinned or has a nonzero usage_count, we cannot
     296                 :              * use it; discard it and retry.  (This can only happen if VACUUM
     297                 :              * put a valid buffer in the freelist and then someone else used
     298                 :              * it before we got to it.  It's probably impossible altogether as
     299                 :              * of 8.3, but we'd better check anyway.)
     300                 :              */
     301          818146 :             local_buf_state = LockBufHdr(buf);
     302          818146 :             if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0
     303          818140 :                 && BUF_STATE_GET_USAGECOUNT(local_buf_state) == 0)
     304                 :             {
     305          818133 :                 if (strategy != NULL)
     306          261146 :                     AddBufferToRing(strategy, buf);
     307          818133 :                 *buf_state = local_buf_state;
     308          818133 :                 return buf;
     309                 :             }
     310              13 :             UnlockBufHdr(buf, local_buf_state);
     311                 :         }
     312                 :     }
     313                 : 
     314                 :     /* Nothing on the freelist, so run the "clock sweep" algorithm */
     315          777799 :     trycounter = NBuffers;
     316                 :     for (;;)
     317                 :     {
     318         3133247 :         buf = GetBufferDescriptor(ClockSweepTick());
     319                 : 
     320                 :         /*
     321                 :          * If the buffer is pinned or has a nonzero usage_count, we cannot use
     322                 :          * it; decrement the usage_count (unless pinned) and keep scanning.
     323                 :          */
     324         3133247 :         local_buf_state = LockBufHdr(buf);
     325                 : 
     326         3133247 :         if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0)
     327                 :         {
     328         3091142 :             if (BUF_STATE_GET_USAGECOUNT(local_buf_state) != 0)
     329                 :             {
     330         2313343 :                 local_buf_state -= BUF_USAGECOUNT_ONE;
     331                 : 
     332         2313343 :                 trycounter = NBuffers;
     333                 :             }
     334                 :             else
     335                 :             {
     336                 :                 /* Found a usable buffer */
     337          777799 :                 if (strategy != NULL)
     338           93887 :                     AddBufferToRing(strategy, buf);
     339          777799 :                 *buf_state = local_buf_state;
     340          777799 :                 return buf;
     341                 :             }
     342                 :         }
     343           42105 :         else if (--trycounter == 0)
     344                 :         {
     345                 :             /*
     346                 :              * We've scanned all the buffers without making any state changes,
     347                 :              * so all the buffers are pinned (or were when we looked at them).
     348                 :              * We could hope that someone will free one eventually, but it's
     349                 :              * probably better to fail than to risk getting stuck in an
     350                 :              * infinite loop.
     351                 :              */
     352 UBC           0 :             UnlockBufHdr(buf, local_buf_state);
     353               0 :             elog(ERROR, "no unpinned buffers available");
     354                 :         }
     355 CBC     2355448 :         UnlockBufHdr(buf, local_buf_state);
     356                 :     }
     357                 : }
     358                 : 
     359                 : /*
     360                 :  * StrategyFreeBuffer: put a buffer on the freelist
     361                 :  */
     362                 : void
     363           84578 : StrategyFreeBuffer(BufferDesc *buf)
     364                 : {
     365           84578 :     SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
     366                 : 
     367                 :     /*
     368                 :      * It is possible that we are told to put something in the freelist that
     369                 :      * is already in it; don't screw up the list if so.
     370                 :      */
     371           84578 :     if (buf->freeNext == FREENEXT_NOT_IN_LIST)
     372                 :     {
     373           84578 :         buf->freeNext = StrategyControl->firstFreeBuffer;
     374           84578 :         if (buf->freeNext < 0)
     375            2435 :             StrategyControl->lastFreeBuffer = buf->buf_id;
     376           84578 :         StrategyControl->firstFreeBuffer = buf->buf_id;
     377                 :     }
     378                 : 
     379           84578 :     SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     380           84578 : }
     381                 : 
     382                 : /*
     383                 :  * StrategySyncStart -- tell BufferSync where to start syncing
     384                 :  *
     385                 :  * The result is the buffer index of the best buffer to sync first.
     386                 :  * BufferSync() will proceed circularly around the buffer array from there.
     387                 :  *
     388                 :  * In addition, we return the completed-pass count (which is effectively
     389                 :  * the higher-order bits of nextVictimBuffer) and the count of recent buffer
     390                 :  * allocs if non-NULL pointers are passed.  The alloc count is reset after
     391                 :  * being read.
     392                 :  */
     393                 : int
     394           10438 : StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
     395                 : {
     396                 :     uint32      nextVictimBuffer;
     397                 :     int         result;
     398                 : 
     399           10438 :     SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
     400           10438 :     nextVictimBuffer = pg_atomic_read_u32(&StrategyControl->nextVictimBuffer);
     401           10438 :     result = nextVictimBuffer % NBuffers;
     402                 : 
     403           10438 :     if (complete_passes)
     404                 :     {
     405           10438 :         *complete_passes = StrategyControl->completePasses;
     406                 : 
     407                 :         /*
     408                 :          * Additionally add the number of wraparounds that happened before
     409                 :          * completePasses could be incremented. C.f. ClockSweepTick().
     410                 :          */
     411           10438 :         *complete_passes += nextVictimBuffer / NBuffers;
     412                 :     }
     413                 : 
     414           10438 :     if (num_buf_alloc)
     415                 :     {
     416           10438 :         *num_buf_alloc = pg_atomic_exchange_u32(&StrategyControl->numBufferAllocs, 0);
     417                 :     }
     418           10438 :     SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     419           10438 :     return result;
     420                 : }
     421                 : 
     422                 : /*
     423                 :  * StrategyNotifyBgWriter -- set or clear allocation notification latch
     424                 :  *
     425                 :  * If bgwprocno isn't -1, the next invocation of StrategyGetBuffer will
     426                 :  * set that latch.  Pass -1 to clear the pending notification before it
     427                 :  * happens.  This feature is used by the bgwriter process to wake itself up
     428                 :  * from hibernation, and is not meant for anybody else to use.
     429                 :  */
     430                 : void
     431             520 : StrategyNotifyBgWriter(int bgwprocno)
     432                 : {
     433                 :     /*
     434                 :      * We acquire buffer_strategy_lock just to ensure that the store appears
     435                 :      * atomic to StrategyGetBuffer.  The bgwriter should call this rather
     436                 :      * infrequently, so there's no performance penalty from being safe.
     437                 :      */
     438             520 :     SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
     439             520 :     StrategyControl->bgwprocno = bgwprocno;
     440             520 :     SpinLockRelease(&StrategyControl->buffer_strategy_lock);
     441             520 : }
     442                 : 
     443                 : 
     444                 : /*
     445                 :  * StrategyShmemSize
     446                 :  *
     447                 :  * estimate the size of shared memory used by the freelist-related structures.
     448                 :  *
     449                 :  * Note: for somewhat historical reasons, the buffer lookup hashtable size
     450                 :  * is also determined here.
     451                 :  */
     452                 : Size
     453            2738 : StrategyShmemSize(void)
     454                 : {
     455            2738 :     Size        size = 0;
     456                 : 
     457                 :     /* size of lookup hash table ... see comment in StrategyInitialize */
     458            2738 :     size = add_size(size, BufTableShmemSize(NBuffers + NUM_BUFFER_PARTITIONS));
     459                 : 
     460                 :     /* size of the shared replacement strategy control block */
     461            2738 :     size = add_size(size, MAXALIGN(sizeof(BufferStrategyControl)));
     462                 : 
     463            2738 :     return size;
     464                 : }
     465                 : 
     466                 : /*
     467                 :  * StrategyInitialize -- initialize the buffer cache replacement
     468                 :  *      strategy.
     469                 :  *
     470                 :  * Assumes: All of the buffers are already built into a linked list.
     471                 :  *      Only called by postmaster and only during initialization.
     472                 :  */
     473                 : void
     474            1826 : StrategyInitialize(bool init)
     475                 : {
     476                 :     bool        found;
     477                 : 
     478                 :     /*
     479                 :      * Initialize the shared buffer lookup hashtable.
     480                 :      *
     481                 :      * Since we can't tolerate running out of lookup table entries, we must be
     482                 :      * sure to specify an adequate table size here.  The maximum steady-state
     483                 :      * usage is of course NBuffers entries, but BufferAlloc() tries to insert
     484                 :      * a new entry before deleting the old.  In principle this could be
     485                 :      * happening in each partition concurrently, so we could need as many as
     486                 :      * NBuffers + NUM_BUFFER_PARTITIONS entries.
     487                 :      */
     488            1826 :     InitBufTable(NBuffers + NUM_BUFFER_PARTITIONS);
     489                 : 
     490                 :     /*
     491                 :      * Get or create the shared strategy control block
     492                 :      */
     493            1826 :     StrategyControl = (BufferStrategyControl *)
     494            1826 :         ShmemInitStruct("Buffer Strategy Status",
     495                 :                         sizeof(BufferStrategyControl),
     496                 :                         &found);
     497                 : 
     498            1826 :     if (!found)
     499                 :     {
     500                 :         /*
     501                 :          * Only done once, usually in postmaster
     502                 :          */
     503            1826 :         Assert(init);
     504                 : 
     505            1826 :         SpinLockInit(&StrategyControl->buffer_strategy_lock);
     506                 : 
     507                 :         /*
     508                 :          * Grab the whole linked list of free buffers for our strategy. We
     509                 :          * assume it was previously set up by InitBufferPool().
     510                 :          */
     511            1826 :         StrategyControl->firstFreeBuffer = 0;
     512            1826 :         StrategyControl->lastFreeBuffer = NBuffers - 1;
     513                 : 
     514                 :         /* Initialize the clock sweep pointer */
     515            1826 :         pg_atomic_init_u32(&StrategyControl->nextVictimBuffer, 0);
     516                 : 
     517                 :         /* Clear statistics */
     518            1826 :         StrategyControl->completePasses = 0;
     519            1826 :         pg_atomic_init_u32(&StrategyControl->numBufferAllocs, 0);
     520                 : 
     521                 :         /* No pending notification */
     522            1826 :         StrategyControl->bgwprocno = -1;
     523                 :     }
     524                 :     else
     525 UBC           0 :         Assert(!init);
     526 CBC        1826 : }
     527                 : 
     528                 : 
     529                 : /* ----------------------------------------------------------------
     530                 :  *              Backend-private buffer ring management
     531                 :  * ----------------------------------------------------------------
     532                 :  */
     533                 : 
     534                 : 
     535                 : /*
     536                 :  * GetAccessStrategy -- create a BufferAccessStrategy object
     537                 :  *
     538                 :  * The object is allocated in the current memory context.
     539                 :  */
     540                 : BufferAccessStrategy
     541          105890 : GetAccessStrategy(BufferAccessStrategyType btype)
     542                 : {
     543                 :     int         ring_size_kb;
     544                 : 
     545                 :     /*
     546                 :      * Select ring size to use.  See buffer/README for rationales.
     547                 :      *
     548                 :      * Note: if you change the ring size for BAS_BULKREAD, see also
     549                 :      * SYNC_SCAN_REPORT_INTERVAL in access/heap/syncscan.c.
     550 ECB             :      */
     551 GIC      105890 :     switch (btype)
     552 EUB             :     {
     553 UIC           0 :         case BAS_NORMAL:
     554 EUB             :             /* if someone asks for NORMAL, just give 'em a "default" object */
     555 UIC           0 :             return NULL;
     556 ECB             : 
     557 CBC       58578 :         case BAS_BULKREAD:
     558 GNC       58578 :             ring_size_kb = 256;
     559 CBC       58578 :             break;
     560           47312 :         case BAS_BULKWRITE:
     561 GNC       47312 :             ring_size_kb = 16 * 1024;
     562 GBC       47312 :             break;
     563 UBC           0 :         case BAS_VACUUM:
     564 UNC           0 :             ring_size_kb = 256;
     565 UIC           0 :             break;
     566 EUB             : 
     567 UBC           0 :         default:
     568 UIC           0 :             elog(ERROR, "unrecognized buffer access strategy: %d",
     569                 :                  (int) btype);
     570                 :             return NULL;        /* keep compiler quiet */
     571                 :     }
     572 ECB             : 
     573 GNC      105890 :     return GetAccessStrategyWithSize(btype, ring_size_kb);
     574                 : }
     575                 : 
     576                 : /*
     577                 :  * GetAccessStrategyWithSize -- create a BufferAccessStrategy object with a
     578                 :  *      number of buffers equivalent to the passed in size.
     579                 :  *
     580                 :  * If the given ring size is 0, no BufferAccessStrategy will be created and
     581                 :  * the function will return NULL.  ring_size_kb must not be negative.
     582                 :  */
     583                 : BufferAccessStrategy
     584          110735 : GetAccessStrategyWithSize(BufferAccessStrategyType btype, int ring_size_kb)
     585                 : {
     586                 :     int         ring_buffers;
     587                 :     BufferAccessStrategy strategy;
     588                 : 
     589          110735 :     Assert(ring_size_kb >= 0);
     590                 : 
     591                 :     /* Figure out how many buffers ring_size_kb is */
     592          110735 :     ring_buffers = ring_size_kb / (BLCKSZ / 1024);
     593                 : 
     594                 :     /* 0 means unlimited, so no BufferAccessStrategy required */
     595          110735 :     if (ring_buffers == 0)
     596               6 :         return NULL;
     597                 : 
     598                 :     /* Cap to 1/8th of shared_buffers */
     599          110729 :     ring_buffers = Min(NBuffers / 8, ring_buffers);
     600                 : 
     601                 :     /* NBuffers should never be less than 16, so this shouldn't happen */
     602          110729 :     Assert(ring_buffers > 0);
     603                 : 
     604                 :     /* Allocate the object and initialize all elements to zeroes */
     605                 :     strategy = (BufferAccessStrategy)
     606 GIC      110729 :         palloc0(offsetof(BufferAccessStrategyData, buffers) +
     607                 :                 ring_buffers * sizeof(Buffer));
     608                 : 
     609                 :     /* Set fields that don't start out zero */
     610          110729 :     strategy->btype = btype;
     611 GNC      110729 :     strategy->nbuffers = ring_buffers;
     612                 : 
     613 GIC      110729 :     return strategy;
     614                 : }
     615                 : 
     616                 : /*
     617                 :  * GetAccessStrategyBufferCount -- an accessor for the number of buffers in
     618                 :  *      the ring
     619                 :  *
     620                 :  * Returns 0 on NULL input to match behavior of GetAccessStrategyWithSize()
     621                 :  * returning NULL with 0 size.
     622                 :  */
     623                 : int
     624 GNC           9 : GetAccessStrategyBufferCount(BufferAccessStrategy strategy)
     625                 : {
     626               9 :     if (strategy == NULL)
     627 UNC           0 :         return 0;
     628                 : 
     629 GNC           9 :     return strategy->nbuffers;
     630                 : }
     631                 : 
     632 ECB             : /*
     633                 :  * FreeAccessStrategy -- release a BufferAccessStrategy object
     634                 :  *
     635                 :  * A simple pfree would do at the moment, but we would prefer that callers
     636                 :  * don't assume that much about the representation of BufferAccessStrategy.
     637                 :  */
     638                 : void
     639 CBC      101510 : FreeAccessStrategy(BufferAccessStrategy strategy)
     640                 : {
     641                 :     /* don't crash if called on a "default" strategy */
     642          101510 :     if (strategy != NULL)
     643 GIC      101510 :         pfree(strategy);
     644          101510 : }
     645 ECB             : 
     646                 : /*
     647                 :  * GetBufferFromRing -- returns a buffer from the ring, or NULL if the
     648                 :  *      ring is empty / not usable.
     649                 :  *
     650                 :  * The bufhdr spin lock is held on the returned buffer.
     651                 :  */
     652                 : static BufferDesc *
     653 CBC      637604 : GetBufferFromRing(BufferAccessStrategy strategy, uint32 *buf_state)
     654 ECB             : {
     655                 :     BufferDesc *buf;
     656                 :     Buffer      bufnum;
     657                 :     uint32      local_buf_state;    /* to avoid repeated (de-)referencing */
     658                 : 
     659                 : 
     660                 :     /* Advance to next ring slot */
     661 GNC      637604 :     if (++strategy->current >= strategy->nbuffers)
     662 GIC       21103 :         strategy->current = 0;
     663                 : 
     664                 :     /*
     665                 :      * If the slot hasn't been filled yet, tell the caller to allocate a new
     666                 :      * buffer with the normal allocation strategy.  He will then fill this
     667 ECB             :      * slot by calling AddBufferToRing with the new buffer.
     668                 :      */
     669 CBC      637604 :     bufnum = strategy->buffers[strategy->current];
     670 GBC      637604 :     if (bufnum == InvalidBuffer)
     671 GIC      349765 :         return NULL;
     672                 : 
     673                 :     /*
     674                 :      * If the buffer is pinned we cannot use it under any circumstances.
     675                 :      *
     676                 :      * If usage_count is 0 or 1 then the buffer is fair game (we expect 1,
     677                 :      * since our own previous usage of the ring element would have left it
     678                 :      * there, but it might've been decremented by clock sweep since then). A
     679 ECB             :      * higher usage_count indicates someone else has touched the buffer, so we
     680                 :      * shouldn't re-use it.
     681                 :      */
     682 CBC      287839 :     buf = GetBufferDescriptor(bufnum - 1);
     683          287839 :     local_buf_state = LockBufHdr(buf);
     684          287839 :     if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0
     685 GIC      285854 :         && BUF_STATE_GET_USAGECOUNT(local_buf_state) <= 1)
     686                 :     {
     687          282571 :         *buf_state = local_buf_state;
     688          282571 :         return buf;
     689                 :     }
     690            5268 :     UnlockBufHdr(buf, local_buf_state);
     691                 : 
     692 ECB             :     /*
     693                 :      * Tell caller to allocate a new buffer with the normal allocation
     694                 :      * strategy.  He'll then replace this ring element via AddBufferToRing.
     695                 :      */
     696 GIC        5268 :     return NULL;
     697                 : }
     698                 : 
     699 ECB             : /*
     700                 :  * AddBufferToRing -- add a buffer to the buffer ring
     701                 :  *
     702                 :  * Caller must hold the buffer header spinlock on the buffer.  Since this
     703                 :  * is called with the spinlock held, it had better be quite cheap.
     704                 :  */
     705                 : static void
     706 GIC      355033 : AddBufferToRing(BufferAccessStrategy strategy, BufferDesc *buf)
     707 ECB             : {
     708 CBC      355033 :     strategy->buffers[strategy->current] = BufferDescriptorGetBuffer(buf);
     709          355033 : }
     710                 : 
     711                 : /*
     712                 :  * Utility function returning the IOContext of a given BufferAccessStrategy's
     713                 :  * strategy ring.
     714                 :  */
     715                 : IOContext
     716 GNC    69581061 : IOContextForStrategy(BufferAccessStrategy strategy)
     717                 : {
     718        69581061 :     if (!strategy)
     719        68399637 :         return IOCONTEXT_NORMAL;
     720                 : 
     721         1181424 :     switch (strategy->btype)
     722                 :     {
     723 UNC           0 :         case BAS_NORMAL:
     724                 : 
     725                 :             /*
     726                 :              * Currently, GetAccessStrategy() returns NULL for
     727                 :              * BufferAccessStrategyType BAS_NORMAL, so this case is
     728                 :              * unreachable.
     729                 :              */
     730               0 :             pg_unreachable();
     731                 :             return IOCONTEXT_NORMAL;
     732 GNC      596566 :         case BAS_BULKREAD:
     733          596566 :             return IOCONTEXT_BULKREAD;
     734          189447 :         case BAS_BULKWRITE:
     735          189447 :             return IOCONTEXT_BULKWRITE;
     736          395411 :         case BAS_VACUUM:
     737          395411 :             return IOCONTEXT_VACUUM;
     738                 :     }
     739                 : 
     740 UNC           0 :     elog(ERROR, "unrecognized BufferAccessStrategyType: %d", strategy->btype);
     741                 :     pg_unreachable();
     742                 : }
     743                 : 
     744                 : /*
     745                 :  * StrategyRejectBuffer -- consider rejecting a dirty buffer
     746                 :  *
     747                 :  * When a nondefault strategy is used, the buffer manager calls this function
     748                 :  * when it turns out that the buffer selected by StrategyGetBuffer needs to
     749                 :  * be written out and doing so would require flushing WAL too.  This gives us
     750                 :  * a chance to choose a different victim.
     751                 :  *
     752                 :  * Returns true if buffer manager should ask for a new victim, and false
     753 ECB             :  * if this buffer should be written and re-used.
     754                 :  */
     755                 : bool
     756 GNC       10275 : StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
     757                 : {
     758 ECB             :     /* We only do this in bulkread mode */
     759 CBC       10275 :     if (strategy->btype != BAS_BULKREAD)
     760 GIC        1838 :         return false;
     761 ECB             : 
     762                 :     /* Don't muck with behavior of normal buffer-replacement strategy */
     763 GNC       16557 :     if (!from_ring ||
     764 GIC        8120 :         strategy->buffers[strategy->current] != BufferDescriptorGetBuffer(buf))
     765             317 :         return false;
     766                 : 
     767 ECB             :     /*
     768                 :      * Remove the dirty buffer from the ring; necessary to prevent infinite
     769                 :      * loop if all ring members are dirty.
     770                 :      */
     771 GIC        8120 :     strategy->buffers[strategy->current] = InvalidBuffer;
     772                 : 
     773            8120 :     return true;
     774                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a