LCOV - differential code coverage report
Current view: top level - src/backend/access/transam - slru.c (source / functions) Coverage Total Hit UBC GNC CBC DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 74.9 % 510 382 128 1 381 1
Current Date: 2023-04-08 15:15:32 Functions: 92.3 % 26 24 2 1 23
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * slru.c
       4                 :  *      Simple LRU buffering for transaction status logfiles
       5                 :  *
       6                 :  * We use a simple least-recently-used scheme to manage a pool of page
       7                 :  * buffers.  Under ordinary circumstances we expect that write
       8                 :  * traffic will occur mostly to the latest page (and to the just-prior
       9                 :  * page, soon after a page transition).  Read traffic will probably touch
      10                 :  * a larger span of pages, but in any case a fairly small number of page
      11                 :  * buffers should be sufficient.  So, we just search the buffers using plain
      12                 :  * linear search; there's no need for a hashtable or anything fancy.
      13                 :  * The management algorithm is straight LRU except that we will never swap
      14                 :  * out the latest page (since we know it's going to be hit again eventually).
      15                 :  *
      16                 :  * We use a control LWLock to protect the shared data structures, plus
      17                 :  * per-buffer LWLocks that synchronize I/O for each buffer.  The control lock
      18                 :  * must be held to examine or modify any shared state.  A process that is
      19                 :  * reading in or writing out a page buffer does not hold the control lock,
      20                 :  * only the per-buffer lock for the buffer it is working on.
      21                 :  *
      22                 :  * "Holding the control lock" means exclusive lock in all cases except for
      23                 :  * SimpleLruReadPage_ReadOnly(); see comments for SlruRecentlyUsed() for
      24                 :  * the implications of that.
      25                 :  *
      26                 :  * When initiating I/O on a buffer, we acquire the per-buffer lock exclusively
      27                 :  * before releasing the control lock.  The per-buffer lock is released after
      28                 :  * completing the I/O, re-acquiring the control lock, and updating the shared
      29                 :  * state.  (Deadlock is not possible here, because we never try to initiate
      30                 :  * I/O when someone else is already doing I/O on the same buffer.)
      31                 :  * To wait for I/O to complete, release the control lock, acquire the
      32                 :  * per-buffer lock in shared mode, immediately release the per-buffer lock,
      33                 :  * reacquire the control lock, and then recheck state (since arbitrary things
      34                 :  * could have happened while we didn't have the lock).
      35                 :  *
      36                 :  * As with the regular buffer manager, it is possible for another process
      37                 :  * to re-dirty a page that is currently being written out.  This is handled
      38                 :  * by re-setting the page's page_dirty flag.
      39                 :  *
      40                 :  *
      41                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      42                 :  * Portions Copyright (c) 1994, Regents of the University of California
      43                 :  *
      44                 :  * src/backend/access/transam/slru.c
      45                 :  *
      46                 :  *-------------------------------------------------------------------------
      47                 :  */
      48                 : #include "postgres.h"
      49                 : 
      50                 : #include <fcntl.h>
      51                 : #include <sys/stat.h>
      52                 : #include <unistd.h>
      53                 : 
      54                 : #include "access/slru.h"
      55                 : #include "access/transam.h"
      56                 : #include "access/xlog.h"
      57                 : #include "access/xlogutils.h"
      58                 : #include "miscadmin.h"
      59                 : #include "pgstat.h"
      60                 : #include "storage/fd.h"
      61                 : #include "storage/shmem.h"
      62                 : 
      63                 : #define SlruFileName(ctl, path, seg) \
      64                 :     snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg)
      65                 : 
      66                 : /*
      67                 :  * During SimpleLruWriteAll(), we will usually not need to write more than one
      68                 :  * or two physical files, but we may need to write several pages per file.  We
      69                 :  * can consolidate the I/O requests by leaving files open until control returns
      70                 :  * to SimpleLruWriteAll().  This data structure remembers which files are open.
      71                 :  */
      72                 : #define MAX_WRITEALL_BUFFERS    16
      73                 : 
      74                 : typedef struct SlruWriteAllData
      75                 : {
      76                 :     int         num_files;      /* # files actually open */
      77                 :     int         fd[MAX_WRITEALL_BUFFERS];   /* their FD's */
      78                 :     int         segno[MAX_WRITEALL_BUFFERS];    /* their log seg#s */
      79                 : } SlruWriteAllData;
      80                 : 
      81                 : typedef struct SlruWriteAllData *SlruWriteAll;
      82                 : 
      83                 : /*
      84                 :  * Populate a file tag describing a segment file.  We only use the segment
      85                 :  * number, since we can derive everything else we need by having separate
      86                 :  * sync handler functions for clog, multixact etc.
      87                 :  */
      88                 : #define INIT_SLRUFILETAG(a,xx_handler,xx_segno) \
      89                 : ( \
      90                 :     memset(&(a), 0, sizeof(FileTag)), \
      91                 :     (a).handler = (xx_handler), \
      92                 :     (a).segno = (xx_segno) \
      93                 : )
      94                 : 
      95                 : /*
      96                 :  * Macro to mark a buffer slot "most recently used".  Note multiple evaluation
      97                 :  * of arguments!
      98                 :  *
      99                 :  * The reason for the if-test is that there are often many consecutive
     100                 :  * accesses to the same page (particularly the latest page).  By suppressing
     101                 :  * useless increments of cur_lru_count, we reduce the probability that old
     102                 :  * pages' counts will "wrap around" and make them appear recently used.
     103                 :  *
     104                 :  * We allow this code to be executed concurrently by multiple processes within
     105                 :  * SimpleLruReadPage_ReadOnly().  As long as int reads and writes are atomic,
     106                 :  * this should not cause any completely-bogus values to enter the computation.
     107                 :  * However, it is possible for either cur_lru_count or individual
     108                 :  * page_lru_count entries to be "reset" to lower values than they should have,
     109                 :  * in case a process is delayed while it executes this macro.  With care in
     110                 :  * SlruSelectLRUPage(), this does little harm, and in any case the absolute
     111                 :  * worst possible consequence is a nonoptimal choice of page to evict.  The
     112                 :  * gain from allowing concurrent reads of SLRU pages seems worth it.
     113                 :  */
     114                 : #define SlruRecentlyUsed(shared, slotno)    \
     115                 :     do { \
     116                 :         int     new_lru_count = (shared)->cur_lru_count; \
     117                 :         if (new_lru_count != (shared)->page_lru_count[slotno]) { \
     118                 :             (shared)->cur_lru_count = ++new_lru_count; \
     119                 :             (shared)->page_lru_count[slotno] = new_lru_count; \
     120                 :         } \
     121                 :     } while (0)
     122                 : 
     123                 : /* Saved info for SlruReportIOError */
     124                 : typedef enum
     125                 : {
     126                 :     SLRU_OPEN_FAILED,
     127                 :     SLRU_SEEK_FAILED,
     128                 :     SLRU_READ_FAILED,
     129                 :     SLRU_WRITE_FAILED,
     130                 :     SLRU_FSYNC_FAILED,
     131                 :     SLRU_CLOSE_FAILED
     132                 : } SlruErrorCause;
     133                 : 
     134                 : static SlruErrorCause slru_errcause;
     135                 : static int  slru_errno;
     136                 : 
     137                 : 
     138                 : static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno);
     139                 : static void SimpleLruWaitIO(SlruCtl ctl, int slotno);
     140                 : static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata);
     141                 : static bool SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno);
     142                 : static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno,
     143                 :                                   SlruWriteAll fdata);
     144                 : static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid);
     145                 : static int  SlruSelectLRUPage(SlruCtl ctl, int pageno);
     146                 : 
     147                 : static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename,
     148                 :                                       int segpage, void *data);
     149                 : static void SlruInternalDeleteSegment(SlruCtl ctl, int segno);
     150                 : 
     151                 : /*
     152                 :  * Initialization of shared memory
     153                 :  */
     154                 : 
     155                 : Size
     156 CBC       44733 : SimpleLruShmemSize(int nslots, int nlsns)
     157                 : {
     158                 :     Size        sz;
     159                 : 
     160                 :     /* we assume nslots isn't so large as to risk overflow */
     161           44733 :     sz = MAXALIGN(sizeof(SlruSharedData));
     162           44733 :     sz += MAXALIGN(nslots * sizeof(char *));    /* page_buffer[] */
     163           44733 :     sz += MAXALIGN(nslots * sizeof(SlruPageStatus));    /* page_status[] */
     164           44733 :     sz += MAXALIGN(nslots * sizeof(bool));  /* page_dirty[] */
     165           44733 :     sz += MAXALIGN(nslots * sizeof(int));   /* page_number[] */
     166           44733 :     sz += MAXALIGN(nslots * sizeof(int));   /* page_lru_count[] */
     167           44733 :     sz += MAXALIGN(nslots * sizeof(LWLockPadded));  /* buffer_locks[] */
     168                 : 
     169           44733 :     if (nlsns > 0)
     170            6390 :         sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));    /* group_lsn[] */
     171                 : 
     172           44733 :     return BUFFERALIGN(sz) + BLCKSZ * nslots;
     173                 : }
     174                 : 
     175                 : /*
     176                 :  * Initialize, or attach to, a simple LRU cache in shared memory.
     177                 :  *
     178                 :  * ctl: address of local (unshared) control structure.
     179                 :  * name: name of SLRU.  (This is user-visible, pick with care!)
     180                 :  * nslots: number of page slots to use.
     181                 :  * nlsns: number of LSN groups per page (set to zero if not relevant).
     182                 :  * ctllock: LWLock to use to control access to the shared control structure.
     183                 :  * subdir: PGDATA-relative subdirectory that will contain the files.
     184                 :  * tranche_id: LWLock tranche ID to use for the SLRU's per-buffer LWLocks.
     185                 :  * sync_handler: which set of functions to use to handle sync requests
     186                 :  */
     187                 : void
     188           12783 : SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
     189                 :               LWLock *ctllock, const char *subdir, int tranche_id,
     190                 :               SyncRequestHandler sync_handler)
     191                 : {
     192                 :     SlruShared  shared;
     193                 :     bool        found;
     194                 : 
     195           12783 :     shared = (SlruShared) ShmemInitStruct(name,
     196                 :                                           SimpleLruShmemSize(nslots, nlsns),
     197                 :                                           &found);
     198                 : 
     199           12783 :     if (!IsUnderPostmaster)
     200                 :     {
     201                 :         /* Initialize locks and shared memory area */
     202                 :         char       *ptr;
     203                 :         Size        offset;
     204                 :         int         slotno;
     205                 : 
     206           12783 :         Assert(!found);
     207                 : 
     208           12783 :         memset(shared, 0, sizeof(SlruSharedData));
     209                 : 
     210           12783 :         shared->ControlLock = ctllock;
     211                 : 
     212           12783 :         shared->num_slots = nslots;
     213           12783 :         shared->lsn_groups_per_page = nlsns;
     214                 : 
     215           12783 :         shared->cur_lru_count = 0;
     216                 : 
     217                 :         /* shared->latest_page_number will be set later */
     218                 : 
     219           12783 :         shared->slru_stats_idx = pgstat_get_slru_index(name);
     220                 : 
     221           12783 :         ptr = (char *) shared;
     222           12783 :         offset = MAXALIGN(sizeof(SlruSharedData));
     223           12783 :         shared->page_buffer = (char **) (ptr + offset);
     224           12783 :         offset += MAXALIGN(nslots * sizeof(char *));
     225           12783 :         shared->page_status = (SlruPageStatus *) (ptr + offset);
     226           12783 :         offset += MAXALIGN(nslots * sizeof(SlruPageStatus));
     227           12783 :         shared->page_dirty = (bool *) (ptr + offset);
     228           12783 :         offset += MAXALIGN(nslots * sizeof(bool));
     229           12783 :         shared->page_number = (int *) (ptr + offset);
     230           12783 :         offset += MAXALIGN(nslots * sizeof(int));
     231           12783 :         shared->page_lru_count = (int *) (ptr + offset);
     232           12783 :         offset += MAXALIGN(nslots * sizeof(int));
     233                 : 
     234                 :         /* Initialize LWLocks */
     235           12783 :         shared->buffer_locks = (LWLockPadded *) (ptr + offset);
     236           12783 :         offset += MAXALIGN(nslots * sizeof(LWLockPadded));
     237                 : 
     238           12783 :         if (nlsns > 0)
     239                 :         {
     240            1826 :             shared->group_lsn = (XLogRecPtr *) (ptr + offset);
     241            1826 :             offset += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));
     242                 :         }
     243                 : 
     244           12783 :         ptr += BUFFERALIGN(offset);
     245          278383 :         for (slotno = 0; slotno < nslots; slotno++)
     246                 :         {
     247          265600 :             LWLockInitialize(&shared->buffer_locks[slotno].lock,
     248                 :                              tranche_id);
     249                 : 
     250          265600 :             shared->page_buffer[slotno] = ptr;
     251          265600 :             shared->page_status[slotno] = SLRU_PAGE_EMPTY;
     252          265600 :             shared->page_dirty[slotno] = false;
     253          265600 :             shared->page_lru_count[slotno] = 0;
     254          265600 :             ptr += BLCKSZ;
     255                 :         }
     256                 : 
     257                 :         /* Should fit to estimated shmem size */
     258           12783 :         Assert(ptr - (char *) shared <= SimpleLruShmemSize(nslots, nlsns));
     259                 :     }
     260                 :     else
     261 UBC           0 :         Assert(found);
     262                 : 
     263                 :     /*
     264                 :      * Initialize the unshared control struct, including directory path. We
     265                 :      * assume caller set PagePrecedes.
     266                 :      */
     267 CBC       12783 :     ctl->shared = shared;
     268           12783 :     ctl->sync_handler = sync_handler;
     269           12783 :     strlcpy(ctl->Dir, subdir, sizeof(ctl->Dir));
     270           12783 : }
     271                 : 
     272                 : /*
     273                 :  * Initialize (or reinitialize) a page to zeroes.
     274                 :  *
     275                 :  * The page is not actually written, just set up in shared memory.
     276                 :  * The slot number of the new page is returned.
     277                 :  *
     278                 :  * Control lock must be held at entry, and will be held at exit.
     279                 :  */
     280                 : int
     281            3160 : SimpleLruZeroPage(SlruCtl ctl, int pageno)
     282                 : {
     283            3160 :     SlruShared  shared = ctl->shared;
     284                 :     int         slotno;
     285                 : 
     286                 :     /* Find a suitable buffer slot for the page */
     287            3160 :     slotno = SlruSelectLRUPage(ctl, pageno);
     288            3160 :     Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
     289                 :            (shared->page_status[slotno] == SLRU_PAGE_VALID &&
     290                 :             !shared->page_dirty[slotno]) ||
     291                 :            shared->page_number[slotno] == pageno);
     292                 : 
     293                 :     /* Mark the slot as containing this page */
     294            3160 :     shared->page_number[slotno] = pageno;
     295            3160 :     shared->page_status[slotno] = SLRU_PAGE_VALID;
     296            3160 :     shared->page_dirty[slotno] = true;
     297            3160 :     SlruRecentlyUsed(shared, slotno);
     298                 : 
     299                 :     /* Set the buffer to zeroes */
     300            3160 :     MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
     301                 : 
     302                 :     /* Set the LSNs for this new page to zero */
     303            3160 :     SimpleLruZeroLSNs(ctl, slotno);
     304                 : 
     305                 :     /* Assume this page is now the latest active page */
     306            3160 :     shared->latest_page_number = pageno;
     307                 : 
     308                 :     /* update the stats counter of zeroed pages */
     309            3160 :     pgstat_count_slru_page_zeroed(shared->slru_stats_idx);
     310                 : 
     311            3160 :     return slotno;
     312                 : }
     313                 : 
     314                 : /*
     315                 :  * Zero all the LSNs we store for this slru page.
     316                 :  *
     317                 :  * This should be called each time we create a new page, and each time we read
     318                 :  * in a page from disk into an existing buffer.  (Such an old page cannot
     319                 :  * have any interesting LSNs, since we'd have flushed them before writing
     320                 :  * the page in the first place.)
     321                 :  *
     322                 :  * This assumes that InvalidXLogRecPtr is bitwise-all-0.
     323                 :  */
     324                 : static void
     325            4875 : SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
     326                 : {
     327            4875 :     SlruShared  shared = ctl->shared;
     328                 : 
     329            4875 :     if (shared->lsn_groups_per_page > 0)
     330            1472 :         MemSet(&shared->group_lsn[slotno * shared->lsn_groups_per_page], 0,
     331                 :                shared->lsn_groups_per_page * sizeof(XLogRecPtr));
     332            4875 : }
     333                 : 
     334                 : /*
     335                 :  * Wait for any active I/O on a page slot to finish.  (This does not
     336                 :  * guarantee that new I/O hasn't been started before we return, though.
     337                 :  * In fact the slot might not even contain the same page anymore.)
     338                 :  *
     339                 :  * Control lock must be held at entry, and will be held at exit.
     340                 :  */
     341                 : static void
     342 UBC           0 : SimpleLruWaitIO(SlruCtl ctl, int slotno)
     343                 : {
     344               0 :     SlruShared  shared = ctl->shared;
     345                 : 
     346                 :     /* See notes at top of file */
     347               0 :     LWLockRelease(shared->ControlLock);
     348               0 :     LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED);
     349               0 :     LWLockRelease(&shared->buffer_locks[slotno].lock);
     350               0 :     LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
     351                 : 
     352                 :     /*
     353                 :      * If the slot is still in an io-in-progress state, then either someone
     354                 :      * already started a new I/O on the slot, or a previous I/O failed and
     355                 :      * neglected to reset the page state.  That shouldn't happen, really, but
     356                 :      * it seems worth a few extra cycles to check and recover from it. We can
     357                 :      * cheaply test for failure by seeing if the buffer lock is still held (we
     358                 :      * assume that transaction abort would release the lock).
     359                 :      */
     360               0 :     if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
     361               0 :         shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS)
     362                 :     {
     363               0 :         if (LWLockConditionalAcquire(&shared->buffer_locks[slotno].lock, LW_SHARED))
     364                 :         {
     365                 :             /* indeed, the I/O must have failed */
     366               0 :             if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS)
     367               0 :                 shared->page_status[slotno] = SLRU_PAGE_EMPTY;
     368                 :             else                /* write_in_progress */
     369                 :             {
     370               0 :                 shared->page_status[slotno] = SLRU_PAGE_VALID;
     371               0 :                 shared->page_dirty[slotno] = true;
     372                 :             }
     373               0 :             LWLockRelease(&shared->buffer_locks[slotno].lock);
     374                 :         }
     375                 :     }
     376               0 : }
     377                 : 
     378                 : /*
     379                 :  * Find a page in a shared buffer, reading it in if necessary.
     380                 :  * The page number must correspond to an already-initialized page.
     381                 :  *
     382                 :  * If write_ok is true then it is OK to return a page that is in
     383                 :  * WRITE_IN_PROGRESS state; it is the caller's responsibility to be sure
     384                 :  * that modification of the page is safe.  If write_ok is false then we
     385                 :  * will not return the page until it is not undergoing active I/O.
     386                 :  *
     387                 :  * The passed-in xid is used only for error reporting, and may be
     388                 :  * InvalidTransactionId if no specific xid is associated with the action.
     389                 :  *
     390                 :  * Return value is the shared-buffer slot number now holding the page.
     391                 :  * The buffer's LRU access info is updated.
     392                 :  *
     393                 :  * Control lock must be held at entry, and will be held at exit.
     394                 :  */
     395                 : int
     396 CBC      327571 : SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
     397                 :                   TransactionId xid)
     398                 : {
     399          327571 :     SlruShared  shared = ctl->shared;
     400                 : 
     401                 :     /* Outer loop handles restart if we must wait for someone else's I/O */
     402                 :     for (;;)
     403 UBC           0 :     {
     404                 :         int         slotno;
     405                 :         bool        ok;
     406                 : 
     407                 :         /* See if page already is in memory; if not, pick victim slot */
     408 CBC      327571 :         slotno = SlruSelectLRUPage(ctl, pageno);
     409                 : 
     410                 :         /* Did we find the page in memory? */
     411          327571 :         if (shared->page_number[slotno] == pageno &&
     412          327569 :             shared->page_status[slotno] != SLRU_PAGE_EMPTY)
     413                 :         {
     414                 :             /*
     415                 :              * If page is still being read in, we must wait for I/O.  Likewise
     416                 :              * if the page is being written and the caller said that's not OK.
     417                 :              */
     418          325856 :             if (shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS ||
     419          325856 :                 (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
     420 UBC           0 :                  !write_ok))
     421                 :             {
     422               0 :                 SimpleLruWaitIO(ctl, slotno);
     423                 :                 /* Now we must recheck state from the top */
     424               0 :                 continue;
     425                 :             }
     426                 :             /* Otherwise, it's ready to use */
     427 CBC      325856 :             SlruRecentlyUsed(shared, slotno);
     428                 : 
     429                 :             /* update the stats counter of pages found in the SLRU */
     430          325856 :             pgstat_count_slru_page_hit(shared->slru_stats_idx);
     431                 : 
     432          325856 :             return slotno;
     433                 :         }
     434                 : 
     435                 :         /* We found no match; assert we selected a freeable slot */
     436            1715 :         Assert(shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
     437                 :                (shared->page_status[slotno] == SLRU_PAGE_VALID &&
     438                 :                 !shared->page_dirty[slotno]));
     439                 : 
     440                 :         /* Mark the slot read-busy */
     441            1715 :         shared->page_number[slotno] = pageno;
     442            1715 :         shared->page_status[slotno] = SLRU_PAGE_READ_IN_PROGRESS;
     443            1715 :         shared->page_dirty[slotno] = false;
     444                 : 
     445                 :         /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
     446            1715 :         LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
     447                 : 
     448                 :         /* Release control lock while doing I/O */
     449            1715 :         LWLockRelease(shared->ControlLock);
     450                 : 
     451                 :         /* Do the read */
     452            1715 :         ok = SlruPhysicalReadPage(ctl, pageno, slotno);
     453                 : 
     454                 :         /* Set the LSNs for this newly read-in page to zero */
     455            1715 :         SimpleLruZeroLSNs(ctl, slotno);
     456                 : 
     457                 :         /* Re-acquire control lock and update page state */
     458            1715 :         LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
     459                 : 
     460            1715 :         Assert(shared->page_number[slotno] == pageno &&
     461                 :                shared->page_status[slotno] == SLRU_PAGE_READ_IN_PROGRESS &&
     462                 :                !shared->page_dirty[slotno]);
     463                 : 
     464            1715 :         shared->page_status[slotno] = ok ? SLRU_PAGE_VALID : SLRU_PAGE_EMPTY;
     465                 : 
     466            1715 :         LWLockRelease(&shared->buffer_locks[slotno].lock);
     467                 : 
     468                 :         /* Now it's okay to ereport if we failed */
     469            1715 :         if (!ok)
     470 UBC           0 :             SlruReportIOError(ctl, pageno, xid);
     471                 : 
     472 CBC        1715 :         SlruRecentlyUsed(shared, slotno);
     473                 : 
     474                 :         /* update the stats counter of pages not found in SLRU */
     475            1715 :         pgstat_count_slru_page_read(shared->slru_stats_idx);
     476                 : 
     477            1715 :         return slotno;
     478                 :     }
     479                 : }
     480                 : 
     481                 : /*
     482                 :  * Find a page in a shared buffer, reading it in if necessary.
     483                 :  * The page number must correspond to an already-initialized page.
     484                 :  * The caller must intend only read-only access to the page.
     485                 :  *
     486                 :  * The passed-in xid is used only for error reporting, and may be
     487                 :  * InvalidTransactionId if no specific xid is associated with the action.
     488                 :  *
     489                 :  * Return value is the shared-buffer slot number now holding the page.
     490                 :  * The buffer's LRU access info is updated.
     491                 :  *
     492                 :  * Control lock must NOT be held at entry, but will be held at exit.
     493                 :  * It is unspecified whether the lock will be shared or exclusive.
     494                 :  */
     495                 : int
     496         1803179 : SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid)
     497                 : {
     498         1803179 :     SlruShared  shared = ctl->shared;
     499                 :     int         slotno;
     500                 : 
     501                 :     /* Try to find the page while holding only shared lock */
     502         1803179 :     LWLockAcquire(shared->ControlLock, LW_SHARED);
     503                 : 
     504                 :     /* See if page is already in a buffer */
     505         1805655 :     for (slotno = 0; slotno < shared->num_slots; slotno++)
     506                 :     {
     507         1805610 :         if (shared->page_number[slotno] == pageno &&
     508         1803394 :             shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
     509         1803134 :             shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS)
     510                 :         {
     511                 :             /* See comments for SlruRecentlyUsed macro */
     512         1803134 :             SlruRecentlyUsed(shared, slotno);
     513                 : 
     514                 :             /* update the stats counter of pages found in the SLRU */
     515         1803134 :             pgstat_count_slru_page_hit(shared->slru_stats_idx);
     516                 : 
     517         1803134 :             return slotno;
     518                 :         }
     519                 :     }
     520                 : 
     521                 :     /* No luck, so switch to normal exclusive lock and do regular read */
     522              45 :     LWLockRelease(shared->ControlLock);
     523              45 :     LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
     524                 : 
     525              45 :     return SimpleLruReadPage(ctl, pageno, true, xid);
     526                 : }
     527                 : 
     528                 : /*
     529                 :  * Write a page from a shared buffer, if necessary.
     530                 :  * Does nothing if the specified slot is not dirty.
     531                 :  *
     532                 :  * NOTE: only one write attempt is made here.  Hence, it is possible that
     533                 :  * the page is still dirty at exit (if someone else re-dirtied it during
     534                 :  * the write).  However, we *do* attempt a fresh write even if the page
     535                 :  * is already being written; this is for checkpoints.
     536                 :  *
     537                 :  * Control lock must be held at entry, and will be held at exit.
     538                 :  */
     539                 : static void
     540          336032 : SlruInternalWritePage(SlruCtl ctl, int slotno, SlruWriteAll fdata)
     541                 : {
     542          336032 :     SlruShared  shared = ctl->shared;
     543          336032 :     int         pageno = shared->page_number[slotno];
     544                 :     bool        ok;
     545                 : 
     546                 :     /* If a write is in progress, wait for it to finish */
     547          336032 :     while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS &&
     548 UBC           0 :            shared->page_number[slotno] == pageno)
     549                 :     {
     550               0 :         SimpleLruWaitIO(ctl, slotno);
     551                 :     }
     552                 : 
     553                 :     /*
     554                 :      * Do nothing if page is not dirty, or if buffer no longer contains the
     555                 :      * same page we were called for.
     556                 :      */
     557 CBC      336032 :     if (!shared->page_dirty[slotno] ||
     558            4983 :         shared->page_status[slotno] != SLRU_PAGE_VALID ||
     559            4983 :         shared->page_number[slotno] != pageno)
     560          331049 :         return;
     561                 : 
     562                 :     /*
     563                 :      * Mark the slot write-busy, and clear the dirtybit.  After this point, a
     564                 :      * transaction status update on this page will mark it dirty again.
     565                 :      */
     566            4983 :     shared->page_status[slotno] = SLRU_PAGE_WRITE_IN_PROGRESS;
     567            4983 :     shared->page_dirty[slotno] = false;
     568                 : 
     569                 :     /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
     570            4983 :     LWLockAcquire(&shared->buffer_locks[slotno].lock, LW_EXCLUSIVE);
     571                 : 
     572                 :     /* Release control lock while doing I/O */
     573            4983 :     LWLockRelease(shared->ControlLock);
     574                 : 
     575                 :     /* Do the write */
     576            4983 :     ok = SlruPhysicalWritePage(ctl, pageno, slotno, fdata);
     577                 : 
     578                 :     /* If we failed, and we're in a flush, better close the files */
     579            4983 :     if (!ok && fdata)
     580                 :     {
     581                 :         int         i;
     582                 : 
     583 UBC           0 :         for (i = 0; i < fdata->num_files; i++)
     584               0 :             CloseTransientFile(fdata->fd[i]);
     585                 :     }
     586                 : 
     587                 :     /* Re-acquire control lock and update page state */
     588 CBC        4983 :     LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
     589                 : 
     590            4983 :     Assert(shared->page_number[slotno] == pageno &&
     591                 :            shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS);
     592                 : 
     593                 :     /* If we failed to write, mark the page dirty again */
     594            4983 :     if (!ok)
     595 UBC           0 :         shared->page_dirty[slotno] = true;
     596                 : 
     597 CBC        4983 :     shared->page_status[slotno] = SLRU_PAGE_VALID;
     598                 : 
     599            4983 :     LWLockRelease(&shared->buffer_locks[slotno].lock);
     600                 : 
     601                 :     /* Now it's okay to ereport if we failed */
     602            4983 :     if (!ok)
     603 UBC           0 :         SlruReportIOError(ctl, pageno, InvalidTransactionId);
     604                 : 
     605                 :     /* If part of a checkpoint, count this as a buffer written. */
     606 CBC        4983 :     if (fdata)
     607            3671 :         CheckpointStats.ckpt_bufs_written++;
     608                 : }
     609                 : 
     610                 : /*
     611                 :  * Wrapper of SlruInternalWritePage, for external callers.
     612                 :  * fdata is always passed a NULL here.
     613                 :  */
     614                 : void
     615            1278 : SimpleLruWritePage(SlruCtl ctl, int slotno)
     616                 : {
     617            1278 :     SlruInternalWritePage(ctl, slotno, NULL);
     618            1278 : }
     619                 : 
     620                 : /*
     621                 :  * Return whether the given page exists on disk.
     622                 :  *
     623                 :  * A false return means that either the file does not exist, or that it's not
     624                 :  * large enough to contain the given page.
     625                 :  */
     626                 : bool
     627              38 : SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno)
     628                 : {
     629              38 :     int         segno = pageno / SLRU_PAGES_PER_SEGMENT;
     630              38 :     int         rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
     631              38 :     int         offset = rpageno * BLCKSZ;
     632                 :     char        path[MAXPGPATH];
     633                 :     int         fd;
     634                 :     bool        result;
     635                 :     off_t       endpos;
     636                 : 
     637                 :     /* update the stats counter of checked pages */
     638              38 :     pgstat_count_slru_page_exists(ctl->shared->slru_stats_idx);
     639                 : 
     640              38 :     SlruFileName(ctl, path, segno);
     641                 : 
     642              38 :     fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
     643              38 :     if (fd < 0)
     644                 :     {
     645                 :         /* expected: file doesn't exist */
     646              12 :         if (errno == ENOENT)
     647              12 :             return false;
     648                 : 
     649                 :         /* report error normally */
     650 UBC           0 :         slru_errcause = SLRU_OPEN_FAILED;
     651               0 :         slru_errno = errno;
     652               0 :         SlruReportIOError(ctl, pageno, 0);
     653                 :     }
     654                 : 
     655 CBC          26 :     if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
     656                 :     {
     657 UBC           0 :         slru_errcause = SLRU_SEEK_FAILED;
     658               0 :         slru_errno = errno;
     659               0 :         SlruReportIOError(ctl, pageno, 0);
     660                 :     }
     661                 : 
     662 CBC          26 :     result = endpos >= (off_t) (offset + BLCKSZ);
     663                 : 
     664              26 :     if (CloseTransientFile(fd) != 0)
     665                 :     {
     666 UBC           0 :         slru_errcause = SLRU_CLOSE_FAILED;
     667               0 :         slru_errno = errno;
     668               0 :         return false;
     669                 :     }
     670                 : 
     671 CBC          26 :     return result;
     672                 : }
     673                 : 
     674                 : /*
     675                 :  * Physical read of a (previously existing) page into a buffer slot
     676                 :  *
     677                 :  * On failure, we cannot just ereport(ERROR) since caller has put state in
     678                 :  * shared memory that must be undone.  So, we return false and save enough
     679                 :  * info in static variables to let SlruReportIOError make the report.
     680                 :  *
     681                 :  * For now, assume it's not worth keeping a file pointer open across
     682                 :  * read/write operations.  We could cache one virtual file pointer ...
     683                 :  */
     684                 : static bool
     685            1715 : SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
     686                 : {
     687            1715 :     SlruShared  shared = ctl->shared;
     688            1715 :     int         segno = pageno / SLRU_PAGES_PER_SEGMENT;
     689            1715 :     int         rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
     690            1715 :     off_t       offset = rpageno * BLCKSZ;
     691                 :     char        path[MAXPGPATH];
     692                 :     int         fd;
     693                 : 
     694            1715 :     SlruFileName(ctl, path, segno);
     695                 : 
     696                 :     /*
     697                 :      * In a crash-and-restart situation, it's possible for us to receive
     698                 :      * commands to set the commit status of transactions whose bits are in
     699                 :      * already-truncated segments of the commit log (see notes in
     700                 :      * SlruPhysicalWritePage).  Hence, if we are InRecovery, allow the case
     701                 :      * where the file doesn't exist, and return zeroes instead.
     702                 :      */
     703            1715 :     fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
     704            1715 :     if (fd < 0)
     705                 :     {
     706 UBC           0 :         if (errno != ENOENT || !InRecovery)
     707                 :         {
     708               0 :             slru_errcause = SLRU_OPEN_FAILED;
     709               0 :             slru_errno = errno;
     710               0 :             return false;
     711                 :         }
     712                 : 
     713               0 :         ereport(LOG,
     714                 :                 (errmsg("file \"%s\" doesn't exist, reading as zeroes",
     715                 :                         path)));
     716               0 :         MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
     717               0 :         return true;
     718                 :     }
     719                 : 
     720 CBC        1715 :     errno = 0;
     721            1715 :     pgstat_report_wait_start(WAIT_EVENT_SLRU_READ);
     722            1715 :     if (pg_pread(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
     723                 :     {
     724 UBC           0 :         pgstat_report_wait_end();
     725               0 :         slru_errcause = SLRU_READ_FAILED;
     726               0 :         slru_errno = errno;
     727               0 :         CloseTransientFile(fd);
     728               0 :         return false;
     729                 :     }
     730 CBC        1715 :     pgstat_report_wait_end();
     731                 : 
     732            1715 :     if (CloseTransientFile(fd) != 0)
     733                 :     {
     734 UBC           0 :         slru_errcause = SLRU_CLOSE_FAILED;
     735               0 :         slru_errno = errno;
     736               0 :         return false;
     737                 :     }
     738                 : 
     739 CBC        1715 :     return true;
     740                 : }
     741                 : 
     742                 : /*
     743                 :  * Physical write of a page from a buffer slot
     744                 :  *
     745                 :  * On failure, we cannot just ereport(ERROR) since caller has put state in
     746                 :  * shared memory that must be undone.  So, we return false and save enough
     747                 :  * info in static variables to let SlruReportIOError make the report.
     748                 :  *
     749                 :  * For now, assume it's not worth keeping a file pointer open across
     750                 :  * independent read/write operations.  We do batch operations during
     751                 :  * SimpleLruWriteAll, though.
     752                 :  *
     753                 :  * fdata is NULL for a standalone write, pointer to open-file info during
     754                 :  * SimpleLruWriteAll.
     755                 :  */
     756                 : static bool
     757            4983 : SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruWriteAll fdata)
     758                 : {
     759            4983 :     SlruShared  shared = ctl->shared;
     760            4983 :     int         segno = pageno / SLRU_PAGES_PER_SEGMENT;
     761            4983 :     int         rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
     762            4983 :     off_t       offset = rpageno * BLCKSZ;
     763                 :     char        path[MAXPGPATH];
     764            4983 :     int         fd = -1;
     765                 : 
     766                 :     /* update the stats counter of written pages */
     767            4983 :     pgstat_count_slru_page_written(shared->slru_stats_idx);
     768                 : 
     769                 :     /*
     770                 :      * Honor the write-WAL-before-data rule, if appropriate, so that we do not
     771                 :      * write out data before associated WAL records.  This is the same action
     772                 :      * performed during FlushBuffer() in the main buffer manager.
     773                 :      */
     774            4983 :     if (shared->group_lsn != NULL)
     775                 :     {
     776                 :         /*
     777                 :          * We must determine the largest async-commit LSN for the page. This
     778                 :          * is a bit tedious, but since this entire function is a slow path
     779                 :          * anyway, it seems better to do this here than to maintain a per-page
     780                 :          * LSN variable (which'd need an extra comparison in the
     781                 :          * transaction-commit path).
     782                 :          */
     783                 :         XLogRecPtr  max_lsn;
     784                 :         int         lsnindex,
     785                 :                     lsnoff;
     786                 : 
     787            1990 :         lsnindex = slotno * shared->lsn_groups_per_page;
     788            1990 :         max_lsn = shared->group_lsn[lsnindex++];
     789         2037760 :         for (lsnoff = 1; lsnoff < shared->lsn_groups_per_page; lsnoff++)
     790                 :         {
     791         2035770 :             XLogRecPtr  this_lsn = shared->group_lsn[lsnindex++];
     792                 : 
     793         2035770 :             if (max_lsn < this_lsn)
     794            4787 :                 max_lsn = this_lsn;
     795                 :         }
     796                 : 
     797            1990 :         if (!XLogRecPtrIsInvalid(max_lsn))
     798                 :         {
     799                 :             /*
     800                 :              * As noted above, elog(ERROR) is not acceptable here, so if
     801                 :              * XLogFlush were to fail, we must PANIC.  This isn't much of a
     802                 :              * restriction because XLogFlush is just about all critical
     803                 :              * section anyway, but let's make sure.
     804                 :              */
     805             148 :             START_CRIT_SECTION();
     806             148 :             XLogFlush(max_lsn);
     807             148 :             END_CRIT_SECTION();
     808                 :         }
     809                 :     }
     810                 : 
     811                 :     /*
     812                 :      * During a WriteAll, we may already have the desired file open.
     813                 :      */
     814            4983 :     if (fdata)
     815                 :     {
     816                 :         int         i;
     817                 : 
     818            3671 :         for (i = 0; i < fdata->num_files; i++)
     819                 :         {
     820              25 :             if (fdata->segno[i] == segno)
     821                 :             {
     822              25 :                 fd = fdata->fd[i];
     823              25 :                 break;
     824                 :             }
     825                 :         }
     826                 :     }
     827                 : 
     828            4983 :     if (fd < 0)
     829                 :     {
     830                 :         /*
     831                 :          * If the file doesn't already exist, we should create it.  It is
     832                 :          * possible for this to need to happen when writing a page that's not
     833                 :          * first in its segment; we assume the OS can cope with that. (Note:
     834                 :          * it might seem that it'd be okay to create files only when
     835                 :          * SimpleLruZeroPage is called for the first page of a segment.
     836                 :          * However, if after a crash and restart the REDO logic elects to
     837                 :          * replay the log from a checkpoint before the latest one, then it's
     838                 :          * possible that we will get commands to set transaction status of
     839                 :          * transactions that have already been truncated from the commit log.
     840                 :          * Easiest way to deal with that is to accept references to
     841                 :          * nonexistent files here and in SlruPhysicalReadPage.)
     842                 :          *
     843                 :          * Note: it is possible for more than one backend to be executing this
     844                 :          * code simultaneously for different pages of the same file. Hence,
     845                 :          * don't use O_EXCL or O_TRUNC or anything like that.
     846                 :          */
     847            4958 :         SlruFileName(ctl, path, segno);
     848            4958 :         fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY);
     849            4958 :         if (fd < 0)
     850                 :         {
     851 UBC           0 :             slru_errcause = SLRU_OPEN_FAILED;
     852               0 :             slru_errno = errno;
     853               0 :             return false;
     854                 :         }
     855                 : 
     856 CBC        4958 :         if (fdata)
     857                 :         {
     858            3646 :             if (fdata->num_files < MAX_WRITEALL_BUFFERS)
     859                 :             {
     860            3646 :                 fdata->fd[fdata->num_files] = fd;
     861            3646 :                 fdata->segno[fdata->num_files] = segno;
     862            3646 :                 fdata->num_files++;
     863                 :             }
     864                 :             else
     865                 :             {
     866                 :                 /*
     867                 :                  * In the unlikely event that we exceed MAX_FLUSH_BUFFERS,
     868                 :                  * fall back to treating it as a standalone write.
     869                 :                  */
     870 UBC           0 :                 fdata = NULL;
     871                 :             }
     872                 :         }
     873                 :     }
     874                 : 
     875 CBC        4983 :     errno = 0;
     876            4983 :     pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE);
     877            4983 :     if (pg_pwrite(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
     878                 :     {
     879 UBC           0 :         pgstat_report_wait_end();
     880                 :         /* if write didn't set errno, assume problem is no disk space */
     881               0 :         if (errno == 0)
     882               0 :             errno = ENOSPC;
     883               0 :         slru_errcause = SLRU_WRITE_FAILED;
     884               0 :         slru_errno = errno;
     885               0 :         if (!fdata)
     886               0 :             CloseTransientFile(fd);
     887               0 :         return false;
     888                 :     }
     889 CBC        4983 :     pgstat_report_wait_end();
     890                 : 
     891                 :     /* Queue up a sync request for the checkpointer. */
     892            4983 :     if (ctl->sync_handler != SYNC_HANDLER_NONE)
     893                 :     {
     894                 :         FileTag     tag;
     895                 : 
     896            3580 :         INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
     897            3580 :         if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false))
     898                 :         {
     899                 :             /* No space to enqueue sync request.  Do it synchronously. */
     900               4 :             pgstat_report_wait_start(WAIT_EVENT_SLRU_SYNC);
     901               4 :             if (pg_fsync(fd) != 0)
     902                 :             {
     903 UBC           0 :                 pgstat_report_wait_end();
     904               0 :                 slru_errcause = SLRU_FSYNC_FAILED;
     905               0 :                 slru_errno = errno;
     906               0 :                 CloseTransientFile(fd);
     907               0 :                 return false;
     908                 :             }
     909 CBC           4 :             pgstat_report_wait_end();
     910                 :         }
     911                 :     }
     912                 : 
     913                 :     /* Close file, unless part of flush request. */
     914            4983 :     if (!fdata)
     915                 :     {
     916            1312 :         if (CloseTransientFile(fd) != 0)
     917                 :         {
     918 UBC           0 :             slru_errcause = SLRU_CLOSE_FAILED;
     919               0 :             slru_errno = errno;
     920               0 :             return false;
     921                 :         }
     922                 :     }
     923                 : 
     924 CBC        4983 :     return true;
     925                 : }
     926                 : 
     927                 : /*
     928                 :  * Issue the error message after failure of SlruPhysicalReadPage or
     929                 :  * SlruPhysicalWritePage.  Call this after cleaning up shared-memory state.
     930                 :  */
     931                 : static void
     932 UBC           0 : SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid)
     933                 : {
     934               0 :     int         segno = pageno / SLRU_PAGES_PER_SEGMENT;
     935               0 :     int         rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
     936               0 :     int         offset = rpageno * BLCKSZ;
     937                 :     char        path[MAXPGPATH];
     938                 : 
     939               0 :     SlruFileName(ctl, path, segno);
     940               0 :     errno = slru_errno;
     941               0 :     switch (slru_errcause)
     942                 :     {
     943               0 :         case SLRU_OPEN_FAILED:
     944               0 :             ereport(ERROR,
     945                 :                     (errcode_for_file_access(),
     946                 :                      errmsg("could not access status of transaction %u", xid),
     947                 :                      errdetail("Could not open file \"%s\": %m.", path)));
     948                 :             break;
     949               0 :         case SLRU_SEEK_FAILED:
     950               0 :             ereport(ERROR,
     951                 :                     (errcode_for_file_access(),
     952                 :                      errmsg("could not access status of transaction %u", xid),
     953                 :                      errdetail("Could not seek in file \"%s\" to offset %d: %m.",
     954                 :                                path, offset)));
     955                 :             break;
     956               0 :         case SLRU_READ_FAILED:
     957               0 :             if (errno)
     958               0 :                 ereport(ERROR,
     959                 :                         (errcode_for_file_access(),
     960                 :                          errmsg("could not access status of transaction %u", xid),
     961                 :                          errdetail("Could not read from file \"%s\" at offset %d: %m.",
     962                 :                                    path, offset)));
     963                 :             else
     964               0 :                 ereport(ERROR,
     965                 :                         (errmsg("could not access status of transaction %u", xid),
     966                 :                          errdetail("Could not read from file \"%s\" at offset %d: read too few bytes.", path, offset)));
     967                 :             break;
     968               0 :         case SLRU_WRITE_FAILED:
     969               0 :             if (errno)
     970               0 :                 ereport(ERROR,
     971                 :                         (errcode_for_file_access(),
     972                 :                          errmsg("could not access status of transaction %u", xid),
     973                 :                          errdetail("Could not write to file \"%s\" at offset %d: %m.",
     974                 :                                    path, offset)));
     975                 :             else
     976               0 :                 ereport(ERROR,
     977                 :                         (errmsg("could not access status of transaction %u", xid),
     978                 :                          errdetail("Could not write to file \"%s\" at offset %d: wrote too few bytes.",
     979                 :                                    path, offset)));
     980                 :             break;
     981               0 :         case SLRU_FSYNC_FAILED:
     982               0 :             ereport(data_sync_elevel(ERROR),
     983                 :                     (errcode_for_file_access(),
     984                 :                      errmsg("could not access status of transaction %u", xid),
     985                 :                      errdetail("Could not fsync file \"%s\": %m.",
     986                 :                                path)));
     987               0 :             break;
     988               0 :         case SLRU_CLOSE_FAILED:
     989               0 :             ereport(ERROR,
     990                 :                     (errcode_for_file_access(),
     991                 :                      errmsg("could not access status of transaction %u", xid),
     992                 :                      errdetail("Could not close file \"%s\": %m.",
     993                 :                                path)));
     994                 :             break;
     995               0 :         default:
     996                 :             /* can't get here, we trust */
     997               0 :             elog(ERROR, "unrecognized SimpleLru error cause: %d",
     998                 :                  (int) slru_errcause);
     999                 :             break;
    1000                 :     }
    1001               0 : }
    1002                 : 
    1003                 : /*
    1004                 :  * Select the slot to re-use when we need a free slot.
    1005                 :  *
    1006                 :  * The target page number is passed because we need to consider the
    1007                 :  * possibility that some other process reads in the target page while
    1008                 :  * we are doing I/O to free a slot.  Hence, check or recheck to see if
    1009                 :  * any slot already holds the target page, and return that slot if so.
    1010                 :  * Thus, the returned slot is *either* a slot already holding the pageno
    1011                 :  * (could be any state except EMPTY), *or* a freeable slot (state EMPTY
    1012                 :  * or CLEAN).
    1013                 :  *
    1014                 :  * Control lock must be held at entry, and will be held at exit.
    1015                 :  */
    1016                 : static int
    1017 CBC      330731 : SlruSelectLRUPage(SlruCtl ctl, int pageno)
    1018                 : {
    1019          330731 :     SlruShared  shared = ctl->shared;
    1020                 : 
    1021                 :     /* Outer loop handles restart after I/O */
    1022                 :     for (;;)
    1023              28 :     {
    1024                 :         int         slotno;
    1025                 :         int         cur_count;
    1026          330759 :         int         bestvalidslot = 0;  /* keep compiler quiet */
    1027          330759 :         int         best_valid_delta = -1;
    1028          330759 :         int         best_valid_page_number = 0; /* keep compiler quiet */
    1029          330759 :         int         bestinvalidslot = 0;    /* keep compiler quiet */
    1030          330759 :         int         best_invalid_delta = -1;
    1031          330759 :         int         best_invalid_page_number = 0;   /* keep compiler quiet */
    1032                 : 
    1033                 :         /* See if page already has a buffer assigned */
    1034          416486 :         for (slotno = 0; slotno < shared->num_slots; slotno++)
    1035                 :         {
    1036          412504 :             if (shared->page_number[slotno] == pageno &&
    1037          408341 :                 shared->page_status[slotno] != SLRU_PAGE_EMPTY)
    1038          326777 :                 return slotno;
    1039                 :         }
    1040                 : 
    1041                 :         /*
    1042                 :          * If we find any EMPTY slot, just select that one. Else choose a
    1043                 :          * victim page to replace.  We normally take the least recently used
    1044                 :          * valid page, but we will never take the slot containing
    1045                 :          * latest_page_number, even if it appears least recently used.  We
    1046                 :          * will select a slot that is already I/O busy only if there is no
    1047                 :          * other choice: a read-busy slot will not be least recently used once
    1048                 :          * the read finishes, and waiting for an I/O on a write-busy slot is
    1049                 :          * inferior to just picking some other slot.  Testing shows the slot
    1050                 :          * we pick instead will often be clean, allowing us to begin a read at
    1051                 :          * once.
    1052                 :          *
    1053                 :          * Normally the page_lru_count values will all be different and so
    1054                 :          * there will be a well-defined LRU page.  But since we allow
    1055                 :          * concurrent execution of SlruRecentlyUsed() within
    1056                 :          * SimpleLruReadPage_ReadOnly(), it is possible that multiple pages
    1057                 :          * acquire the same lru_count values.  In that case we break ties by
    1058                 :          * choosing the furthest-back page.
    1059                 :          *
    1060                 :          * Notice that this next line forcibly advances cur_lru_count to a
    1061                 :          * value that is certainly beyond any value that will be in the
    1062                 :          * page_lru_count array after the loop finishes.  This ensures that
    1063                 :          * the next execution of SlruRecentlyUsed will mark the page newly
    1064                 :          * used, even if it's for a page that has the current counter value.
    1065                 :          * That gets us back on the path to having good data when there are
    1066                 :          * multiple pages with the same lru_count.
    1067                 :          */
    1068            3982 :         cur_count = (shared->cur_lru_count)++;
    1069            5212 :         for (slotno = 0; slotno < shared->num_slots; slotno++)
    1070                 :         {
    1071                 :             int         this_delta;
    1072                 :             int         this_page_number;
    1073                 : 
    1074            5121 :             if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
    1075            3891 :                 return slotno;
    1076            1230 :             this_delta = cur_count - shared->page_lru_count[slotno];
    1077            1230 :             if (this_delta < 0)
    1078                 :             {
    1079                 :                 /*
    1080                 :                  * Clean up in case shared updates have caused cur_count
    1081                 :                  * increments to get "lost".  We back off the page counts,
    1082                 :                  * rather than trying to increase cur_count, to avoid any
    1083                 :                  * question of infinite loops or failure in the presence of
    1084                 :                  * wrapped-around counts.
    1085                 :                  */
    1086 UBC           0 :                 shared->page_lru_count[slotno] = cur_count;
    1087               0 :                 this_delta = 0;
    1088                 :             }
    1089 CBC        1230 :             this_page_number = shared->page_number[slotno];
    1090            1230 :             if (this_page_number == shared->latest_page_number)
    1091             144 :                 continue;
    1092            1086 :             if (shared->page_status[slotno] == SLRU_PAGE_VALID)
    1093                 :             {
    1094            1086 :                 if (this_delta > best_valid_delta ||
    1095 UBC           0 :                     (this_delta == best_valid_delta &&
    1096               0 :                      ctl->PagePrecedes(this_page_number,
    1097                 :                                        best_valid_page_number)))
    1098                 :                 {
    1099 CBC         200 :                     bestvalidslot = slotno;
    1100             200 :                     best_valid_delta = this_delta;
    1101             200 :                     best_valid_page_number = this_page_number;
    1102                 :                 }
    1103                 :             }
    1104                 :             else
    1105                 :             {
    1106 UBC           0 :                 if (this_delta > best_invalid_delta ||
    1107               0 :                     (this_delta == best_invalid_delta &&
    1108               0 :                      ctl->PagePrecedes(this_page_number,
    1109                 :                                        best_invalid_page_number)))
    1110                 :                 {
    1111               0 :                     bestinvalidslot = slotno;
    1112               0 :                     best_invalid_delta = this_delta;
    1113               0 :                     best_invalid_page_number = this_page_number;
    1114                 :                 }
    1115                 :             }
    1116                 :         }
    1117                 : 
    1118                 :         /*
    1119                 :          * If all pages (except possibly the latest one) are I/O busy, we'll
    1120                 :          * have to wait for an I/O to complete and then retry.  In that
    1121                 :          * unhappy case, we choose to wait for the I/O on the least recently
    1122                 :          * used slot, on the assumption that it was likely initiated first of
    1123                 :          * all the I/Os in progress and may therefore finish first.
    1124                 :          */
    1125 CBC          91 :         if (best_valid_delta < 0)
    1126                 :         {
    1127 UBC           0 :             SimpleLruWaitIO(ctl, bestinvalidslot);
    1128               0 :             continue;
    1129                 :         }
    1130                 : 
    1131                 :         /*
    1132                 :          * If the selected page is clean, we're set.
    1133                 :          */
    1134 CBC          91 :         if (!shared->page_dirty[bestvalidslot])
    1135              63 :             return bestvalidslot;
    1136                 : 
    1137                 :         /*
    1138                 :          * Write the page.
    1139                 :          */
    1140              28 :         SlruInternalWritePage(ctl, bestvalidslot, NULL);
    1141                 : 
    1142                 :         /*
    1143                 :          * Now loop back and try again.  This is the easiest way of dealing
    1144                 :          * with corner cases such as the victim page being re-dirtied while we
    1145                 :          * wrote it.
    1146                 :          */
    1147                 :     }
    1148                 : }
    1149                 : 
    1150                 : /*
    1151                 :  * Write dirty pages to disk during checkpoint or database shutdown.  Flushing
    1152                 :  * is deferred until the next call to ProcessSyncRequests(), though we do fsync
    1153                 :  * the containing directory here to make sure that newly created directory
    1154                 :  * entries are on disk.
    1155                 :  */
    1156                 : void
    1157           11830 : SimpleLruWriteAll(SlruCtl ctl, bool allow_redirtied)
    1158                 : {
    1159           11830 :     SlruShared  shared = ctl->shared;
    1160                 :     SlruWriteAllData fdata;
    1161                 :     int         slotno;
    1162           11830 :     int         pageno = 0;
    1163                 :     int         i;
    1164                 :     bool        ok;
    1165                 : 
    1166                 :     /* update the stats counter of flushes */
    1167           11830 :     pgstat_count_slru_flush(shared->slru_stats_idx);
    1168                 : 
    1169                 :     /*
    1170                 :      * Find and write dirty pages
    1171                 :      */
    1172           11830 :     fdata.num_files = 0;
    1173                 : 
    1174           11830 :     LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
    1175                 : 
    1176          346550 :     for (slotno = 0; slotno < shared->num_slots; slotno++)
    1177                 :     {
    1178          334720 :         SlruInternalWritePage(ctl, slotno, &fdata);
    1179                 : 
    1180                 :         /*
    1181                 :          * In some places (e.g. checkpoints), we cannot assert that the slot
    1182                 :          * is clean now, since another process might have re-dirtied it
    1183                 :          * already.  That's okay.
    1184                 :          */
    1185          334720 :         Assert(allow_redirtied ||
    1186                 :                shared->page_status[slotno] == SLRU_PAGE_EMPTY ||
    1187                 :                (shared->page_status[slotno] == SLRU_PAGE_VALID &&
    1188                 :                 !shared->page_dirty[slotno]));
    1189                 :     }
    1190                 : 
    1191           11830 :     LWLockRelease(shared->ControlLock);
    1192                 : 
    1193                 :     /*
    1194                 :      * Now close any files that were open
    1195                 :      */
    1196           11830 :     ok = true;
    1197           15476 :     for (i = 0; i < fdata.num_files; i++)
    1198                 :     {
    1199            3646 :         if (CloseTransientFile(fdata.fd[i]) != 0)
    1200                 :         {
    1201 UBC           0 :             slru_errcause = SLRU_CLOSE_FAILED;
    1202               0 :             slru_errno = errno;
    1203               0 :             pageno = fdata.segno[i] * SLRU_PAGES_PER_SEGMENT;
    1204               0 :             ok = false;
    1205                 :         }
    1206                 :     }
    1207 CBC       11830 :     if (!ok)
    1208 UBC           0 :         SlruReportIOError(ctl, pageno, InvalidTransactionId);
    1209                 : 
    1210                 :     /* Ensure that directory entries for new files are on disk. */
    1211 CBC       11830 :     if (ctl->sync_handler != SYNC_HANDLER_NONE)
    1212            9466 :         fsync_fname(ctl->Dir, true);
    1213           11830 : }
    1214                 : 
    1215                 : /*
    1216                 :  * Remove all segments before the one holding the passed page number
    1217                 :  *
    1218                 :  * All SLRUs prevent concurrent calls to this function, either with an LWLock
    1219                 :  * or by calling it only as part of a checkpoint.  Mutual exclusion must begin
    1220                 :  * before computing cutoffPage.  Mutual exclusion must end after any limit
    1221                 :  * update that would permit other backends to write fresh data into the
    1222                 :  * segment immediately preceding the one containing cutoffPage.  Otherwise,
    1223                 :  * when the SLRU is quite full, SimpleLruTruncate() might delete that segment
    1224                 :  * after it has accrued freshly-written data.
    1225                 :  */
    1226                 : void
    1227            2337 : SimpleLruTruncate(SlruCtl ctl, int cutoffPage)
    1228                 : {
    1229            2337 :     SlruShared  shared = ctl->shared;
    1230                 :     int         slotno;
    1231                 : 
    1232                 :     /* update the stats counter of truncates */
    1233            2337 :     pgstat_count_slru_truncate(shared->slru_stats_idx);
    1234                 : 
    1235                 :     /*
    1236                 :      * Scan shared memory and remove any pages preceding the cutoff page, to
    1237                 :      * ensure we won't rewrite them later.  (Since this is normally called in
    1238                 :      * or just after a checkpoint, any dirty pages should have been flushed
    1239                 :      * already ... we're just being extra careful here.)
    1240                 :      */
    1241            2337 :     LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
    1242                 : 
    1243 GNC        2343 : restart:
    1244                 : 
    1245                 :     /*
    1246                 :      * While we are holding the lock, make an important safety check: the
    1247                 :      * current endpoint page must not be eligible for removal.
    1248                 :      */
    1249 CBC        2343 :     if (ctl->PagePrecedes(shared->latest_page_number, cutoffPage))
    1250                 :     {
    1251 UBC           0 :         LWLockRelease(shared->ControlLock);
    1252               0 :         ereport(LOG,
    1253                 :                 (errmsg("could not truncate directory \"%s\": apparent wraparound",
    1254                 :                         ctl->Dir)));
    1255               0 :         return;
    1256                 :     }
    1257                 : 
    1258 CBC       77113 :     for (slotno = 0; slotno < shared->num_slots; slotno++)
    1259                 :     {
    1260           74776 :         if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
    1261           72386 :             continue;
    1262            2390 :         if (!ctl->PagePrecedes(shared->page_number[slotno], cutoffPage))
    1263            2344 :             continue;
    1264                 : 
    1265                 :         /*
    1266                 :          * If page is clean, just change state to EMPTY (expected case).
    1267                 :          */
    1268              46 :         if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
    1269              46 :             !shared->page_dirty[slotno])
    1270                 :         {
    1271              40 :             shared->page_status[slotno] = SLRU_PAGE_EMPTY;
    1272              40 :             continue;
    1273                 :         }
    1274                 : 
    1275                 :         /*
    1276                 :          * Hmm, we have (or may have) I/O operations acting on the page, so
    1277                 :          * we've got to wait for them to finish and then start again. This is
    1278                 :          * the same logic as in SlruSelectLRUPage.  (XXX if page is dirty,
    1279                 :          * wouldn't it be OK to just discard it without writing it?
    1280                 :          * SlruMayDeleteSegment() uses a stricter qualification, so we might
    1281                 :          * not delete this page in the end; even if we don't delete it, we
    1282                 :          * won't have cause to read its data again.  For now, keep the logic
    1283                 :          * the same as it was.)
    1284                 :          */
    1285               6 :         if (shared->page_status[slotno] == SLRU_PAGE_VALID)
    1286               6 :             SlruInternalWritePage(ctl, slotno, NULL);
    1287                 :         else
    1288 UBC           0 :             SimpleLruWaitIO(ctl, slotno);
    1289 CBC           6 :         goto restart;
    1290                 :     }
    1291                 : 
    1292            2337 :     LWLockRelease(shared->ControlLock);
    1293                 : 
    1294                 :     /* Now we can remove the old segment(s) */
    1295            2337 :     (void) SlruScanDirectory(ctl, SlruScanDirCbDeleteCutoff, &cutoffPage);
    1296                 : }
    1297                 : 
    1298                 : /*
    1299                 :  * Delete an individual SLRU segment.
    1300                 :  *
    1301                 :  * NB: This does not touch the SLRU buffers themselves, callers have to ensure
    1302                 :  * they either can't yet contain anything, or have already been cleaned out.
    1303                 :  */
    1304                 : static void
    1305               8 : SlruInternalDeleteSegment(SlruCtl ctl, int segno)
    1306                 : {
    1307                 :     char        path[MAXPGPATH];
    1308                 : 
    1309                 :     /* Forget any fsync requests queued for this segment. */
    1310               8 :     if (ctl->sync_handler != SYNC_HANDLER_NONE)
    1311                 :     {
    1312                 :         FileTag     tag;
    1313                 : 
    1314               5 :         INIT_SLRUFILETAG(tag, ctl->sync_handler, segno);
    1315               5 :         RegisterSyncRequest(&tag, SYNC_FORGET_REQUEST, true);
    1316                 :     }
    1317                 : 
    1318                 :     /* Unlink the file. */
    1319               8 :     SlruFileName(ctl, path, segno);
    1320               8 :     ereport(DEBUG2, (errmsg_internal("removing file \"%s\"", path)));
    1321               8 :     unlink(path);
    1322               8 : }
    1323                 : 
    1324                 : /*
    1325                 :  * Delete an individual SLRU segment, identified by the segment number.
    1326                 :  */
    1327                 : void
    1328               1 : SlruDeleteSegment(SlruCtl ctl, int segno)
    1329                 : {
    1330               1 :     SlruShared  shared = ctl->shared;
    1331                 :     int         slotno;
    1332                 :     bool        did_write;
    1333                 : 
    1334                 :     /* Clean out any possibly existing references to the segment. */
    1335               1 :     LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
    1336               1 : restart:
    1337               1 :     did_write = false;
    1338              17 :     for (slotno = 0; slotno < shared->num_slots; slotno++)
    1339                 :     {
    1340              16 :         int         pagesegno = shared->page_number[slotno] / SLRU_PAGES_PER_SEGMENT;
    1341                 : 
    1342              16 :         if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
    1343 UBC           0 :             continue;
    1344                 : 
    1345                 :         /* not the segment we're looking for */
    1346 CBC          16 :         if (pagesegno != segno)
    1347               6 :             continue;
    1348                 : 
    1349                 :         /* If page is clean, just change state to EMPTY (expected case). */
    1350              10 :         if (shared->page_status[slotno] == SLRU_PAGE_VALID &&
    1351              10 :             !shared->page_dirty[slotno])
    1352                 :         {
    1353              10 :             shared->page_status[slotno] = SLRU_PAGE_EMPTY;
    1354              10 :             continue;
    1355                 :         }
    1356                 : 
    1357                 :         /* Same logic as SimpleLruTruncate() */
    1358 UBC           0 :         if (shared->page_status[slotno] == SLRU_PAGE_VALID)
    1359               0 :             SlruInternalWritePage(ctl, slotno, NULL);
    1360                 :         else
    1361               0 :             SimpleLruWaitIO(ctl, slotno);
    1362                 : 
    1363               0 :         did_write = true;
    1364                 :     }
    1365                 : 
    1366                 :     /*
    1367                 :      * Be extra careful and re-check. The IO functions release the control
    1368                 :      * lock, so new pages could have been read in.
    1369                 :      */
    1370 CBC           1 :     if (did_write)
    1371 UBC           0 :         goto restart;
    1372                 : 
    1373 CBC           1 :     SlruInternalDeleteSegment(ctl, segno);
    1374                 : 
    1375               1 :     LWLockRelease(shared->ControlLock);
    1376               1 : }
    1377                 : 
    1378                 : /*
    1379                 :  * Determine whether a segment is okay to delete.
    1380                 :  *
    1381                 :  * segpage is the first page of the segment, and cutoffPage is the oldest (in
    1382                 :  * PagePrecedes order) page in the SLRU containing still-useful data.  Since
    1383                 :  * every core PagePrecedes callback implements "wrap around", check the
    1384                 :  * segment's first and last pages:
    1385                 :  *
    1386                 :  * first<cutoff  && last<cutoff:  yes
    1387                 :  * first<cutoff  && last>=cutoff: no; cutoff falls inside this segment
    1388                 :  * first>=cutoff && last<cutoff:  no; wrap point falls inside this segment
    1389                 :  * first>=cutoff && last>=cutoff: no; every page of this segment is too young
    1390                 :  */
    1391                 : static bool
    1392           57435 : SlruMayDeleteSegment(SlruCtl ctl, int segpage, int cutoffPage)
    1393                 : {
    1394           57435 :     int         seg_last_page = segpage + SLRU_PAGES_PER_SEGMENT - 1;
    1395                 : 
    1396           57435 :     Assert(segpage % SLRU_PAGES_PER_SEGMENT == 0);
    1397                 : 
    1398           57483 :     return (ctl->PagePrecedes(segpage, cutoffPage) &&
    1399              48 :             ctl->PagePrecedes(seg_last_page, cutoffPage));
    1400                 : }
    1401                 : 
    1402                 : #ifdef USE_ASSERT_CHECKING
    1403                 : static void
    1404           27390 : SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset)
    1405                 : {
    1406                 :     TransactionId lhs,
    1407                 :                 rhs;
    1408                 :     int         newestPage,
    1409                 :                 oldestPage;
    1410                 :     TransactionId newestXact,
    1411                 :                 oldestXact;
    1412                 : 
    1413                 :     /*
    1414                 :      * Compare an XID pair having undefined order (see RFC 1982), a pair at
    1415                 :      * "opposite ends" of the XID space.  TransactionIdPrecedes() treats each
    1416                 :      * as preceding the other.  If RHS is oldestXact, LHS is the first XID we
    1417                 :      * must not assign.
    1418                 :      */
    1419           27390 :     lhs = per_page + offset;    /* skip first page to avoid non-normal XIDs */
    1420           27390 :     rhs = lhs + (1U << 31);
    1421           27390 :     Assert(TransactionIdPrecedes(lhs, rhs));
    1422           27390 :     Assert(TransactionIdPrecedes(rhs, lhs));
    1423           27390 :     Assert(!TransactionIdPrecedes(lhs - 1, rhs));
    1424           27390 :     Assert(TransactionIdPrecedes(rhs, lhs - 1));
    1425           27390 :     Assert(TransactionIdPrecedes(lhs + 1, rhs));
    1426           27390 :     Assert(!TransactionIdPrecedes(rhs, lhs + 1));
    1427           27390 :     Assert(!TransactionIdFollowsOrEquals(lhs, rhs));
    1428           27390 :     Assert(!TransactionIdFollowsOrEquals(rhs, lhs));
    1429           27390 :     Assert(!ctl->PagePrecedes(lhs / per_page, lhs / per_page));
    1430           27390 :     Assert(!ctl->PagePrecedes(lhs / per_page, rhs / per_page));
    1431           27390 :     Assert(!ctl->PagePrecedes(rhs / per_page, lhs / per_page));
    1432           27390 :     Assert(!ctl->PagePrecedes((lhs - per_page) / per_page, rhs / per_page));
    1433           27390 :     Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 3 * per_page) / per_page));
    1434           27390 :     Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 2 * per_page) / per_page));
    1435           27390 :     Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 1 * per_page) / per_page)
    1436                 :            || (1U << 31) % per_page != 0);    /* See CommitTsPagePrecedes() */
    1437           27390 :     Assert(ctl->PagePrecedes((lhs + 1 * per_page) / per_page, rhs / per_page)
    1438                 :            || (1U << 31) % per_page != 0);
    1439           27390 :     Assert(ctl->PagePrecedes((lhs + 2 * per_page) / per_page, rhs / per_page));
    1440           27390 :     Assert(ctl->PagePrecedes((lhs + 3 * per_page) / per_page, rhs / per_page));
    1441           27390 :     Assert(!ctl->PagePrecedes(rhs / per_page, (lhs + per_page) / per_page));
    1442                 : 
    1443                 :     /*
    1444                 :      * GetNewTransactionId() has assigned the last XID it can safely use, and
    1445                 :      * that XID is in the *LAST* page of the second segment.  We must not
    1446                 :      * delete that segment.
    1447                 :      */
    1448           27390 :     newestPage = 2 * SLRU_PAGES_PER_SEGMENT - 1;
    1449           27390 :     newestXact = newestPage * per_page + offset;
    1450           27390 :     Assert(newestXact / per_page == newestPage);
    1451           27390 :     oldestXact = newestXact + 1;
    1452           27390 :     oldestXact -= 1U << 31;
    1453           27390 :     oldestPage = oldestXact / per_page;
    1454           27390 :     Assert(!SlruMayDeleteSegment(ctl,
    1455                 :                                  (newestPage -
    1456                 :                                   newestPage % SLRU_PAGES_PER_SEGMENT),
    1457                 :                                  oldestPage));
    1458                 : 
    1459                 :     /*
    1460                 :      * GetNewTransactionId() has assigned the last XID it can safely use, and
    1461                 :      * that XID is in the *FIRST* page of the second segment.  We must not
    1462                 :      * delete that segment.
    1463                 :      */
    1464           27390 :     newestPage = SLRU_PAGES_PER_SEGMENT;
    1465           27390 :     newestXact = newestPage * per_page + offset;
    1466           27390 :     Assert(newestXact / per_page == newestPage);
    1467           27390 :     oldestXact = newestXact + 1;
    1468           27390 :     oldestXact -= 1U << 31;
    1469           27390 :     oldestPage = oldestXact / per_page;
    1470           27390 :     Assert(!SlruMayDeleteSegment(ctl,
    1471                 :                                  (newestPage -
    1472                 :                                   newestPage % SLRU_PAGES_PER_SEGMENT),
    1473                 :                                  oldestPage));
    1474           27390 : }
    1475                 : 
    1476                 : /*
    1477                 :  * Unit-test a PagePrecedes function.
    1478                 :  *
    1479                 :  * This assumes every uint32 >= FirstNormalTransactionId is a valid key.  It
    1480                 :  * assumes each value occupies a contiguous, fixed-size region of SLRU bytes.
    1481                 :  * (MultiXactMemberCtl separates flags from XIDs.  AsyncCtl has
    1482                 :  * variable-length entries, no keys, and no random access.  These unit tests
    1483                 :  * do not apply to them.)
    1484                 :  */
    1485                 : void
    1486            9130 : SlruPagePrecedesUnitTests(SlruCtl ctl, int per_page)
    1487                 : {
    1488                 :     /* Test first, middle and last entries of a page. */
    1489            9130 :     SlruPagePrecedesTestOffset(ctl, per_page, 0);
    1490            9130 :     SlruPagePrecedesTestOffset(ctl, per_page, per_page / 2);
    1491            9130 :     SlruPagePrecedesTestOffset(ctl, per_page, per_page - 1);
    1492            9130 : }
    1493                 : #endif
    1494                 : 
    1495                 : /*
    1496                 :  * SlruScanDirectory callback
    1497                 :  *      This callback reports true if there's any segment wholly prior to the
    1498                 :  *      one containing the page passed as "data".
    1499                 :  */
    1500                 : bool
    1501             317 : SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data)
    1502                 : {
    1503             317 :     int         cutoffPage = *(int *) data;
    1504                 : 
    1505             317 :     if (SlruMayDeleteSegment(ctl, segpage, cutoffPage))
    1506 UBC           0 :         return true;            /* found one; don't iterate any more */
    1507                 : 
    1508 CBC         317 :     return false;               /* keep going */
    1509                 : }
    1510                 : 
    1511                 : /*
    1512                 :  * SlruScanDirectory callback.
    1513                 :  *      This callback deletes segments prior to the one passed in as "data".
    1514                 :  */
    1515                 : static bool
    1516            2338 : SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data)
    1517                 : {
    1518            2338 :     int         cutoffPage = *(int *) data;
    1519                 : 
    1520            2338 :     if (SlruMayDeleteSegment(ctl, segpage, cutoffPage))
    1521               1 :         SlruInternalDeleteSegment(ctl, segpage / SLRU_PAGES_PER_SEGMENT);
    1522                 : 
    1523            2338 :     return false;               /* keep going */
    1524                 : }
    1525                 : 
    1526                 : /*
    1527                 :  * SlruScanDirectory callback.
    1528                 :  *      This callback deletes all segments.
    1529                 :  */
    1530                 : bool
    1531               6 : SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data)
    1532                 : {
    1533               6 :     SlruInternalDeleteSegment(ctl, segpage / SLRU_PAGES_PER_SEGMENT);
    1534                 : 
    1535               6 :     return false;               /* keep going */
    1536                 : }
    1537                 : 
    1538                 : /*
    1539                 :  * Scan the SimpleLru directory and apply a callback to each file found in it.
    1540                 :  *
    1541                 :  * If the callback returns true, the scan is stopped.  The last return value
    1542                 :  * from the callback is returned.
    1543                 :  *
    1544                 :  * The callback receives the following arguments: 1. the SlruCtl struct for the
    1545                 :  * slru being truncated; 2. the filename being considered; 3. the page number
    1546                 :  * for the first page of that file; 4. a pointer to the opaque data given to us
    1547                 :  * by the caller.
    1548                 :  *
    1549                 :  * Note that the ordering in which the directory is scanned is not guaranteed.
    1550                 :  *
    1551                 :  * Note that no locking is applied.
    1552                 :  */
    1553                 : bool
    1554            5930 : SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
    1555                 : {
    1556            5930 :     bool        retval = false;
    1557                 :     DIR        *cldir;
    1558                 :     struct dirent *clde;
    1559                 :     int         segno;
    1560                 :     int         segpage;
    1561                 : 
    1562            5930 :     cldir = AllocateDir(ctl->Dir);
    1563           20451 :     while ((clde = ReadDir(cldir, ctl->Dir)) != NULL)
    1564                 :     {
    1565                 :         size_t      len;
    1566                 : 
    1567           14521 :         len = strlen(clde->d_name);
    1568                 : 
    1569           14521 :         if ((len == 4 || len == 5 || len == 6) &&
    1570            2661 :             strspn(clde->d_name, "0123456789ABCDEF") == len)
    1571                 :         {
    1572            2661 :             segno = (int) strtol(clde->d_name, NULL, 16);
    1573            2661 :             segpage = segno * SLRU_PAGES_PER_SEGMENT;
    1574                 : 
    1575            2661 :             elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s",
    1576                 :                  ctl->Dir, clde->d_name);
    1577            2661 :             retval = callback(ctl, clde->d_name, segpage, data);
    1578            2661 :             if (retval)
    1579 UBC           0 :                 break;
    1580                 :         }
    1581                 :     }
    1582 CBC        5930 :     FreeDir(cldir);
    1583                 : 
    1584            5930 :     return retval;
    1585                 : }
    1586                 : 
    1587                 : /*
    1588                 :  * Individual SLRUs (clog, ...) have to provide a sync.c handler function so
    1589                 :  * that they can provide the correct "SlruCtl" (otherwise we don't know how to
    1590                 :  * build the path), but they just forward to this common implementation that
    1591                 :  * performs the fsync.
    1592                 :  */
    1593                 : int
    1594               1 : SlruSyncFileTag(SlruCtl ctl, const FileTag *ftag, char *path)
    1595                 : {
    1596                 :     int         fd;
    1597                 :     int         save_errno;
    1598                 :     int         result;
    1599                 : 
    1600               1 :     SlruFileName(ctl, path, ftag->segno);
    1601                 : 
    1602               1 :     fd = OpenTransientFile(path, O_RDWR | PG_BINARY);
    1603               1 :     if (fd < 0)
    1604 UBC           0 :         return -1;
    1605                 : 
    1606 CBC           1 :     result = pg_fsync(fd);
    1607               1 :     save_errno = errno;
    1608                 : 
    1609               1 :     CloseTransientFile(fd);
    1610                 : 
    1611               1 :     errno = save_errno;
    1612               1 :     return result;
    1613                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a