LCOV - Differential Code Coverage HEAD vs 15 - src/backend/storage/sync/sync.c

LCOV - differential code coverage report

Current view:	top level - src/backend/storage/sync - sync.c (source / functions)		Coverage	Total	Hit	UBC	GNC	CBC	DCB
Current:	Differential Code Coverage HEAD vs 15	Lines:	77.2 %	127	98	29	8	90	8
Current Date:	2023-04-08 15:15:32	Functions:	100.0 %	6	6		1	5
Baseline:	15
Baseline Date:	2023-04-08 15:09:40
Legend:	Lines: hit not hit

           TLA  Line data    Source code

       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * sync.c
       4                 :  *    File synchronization management code.
       5                 :  *
       6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       7                 :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :  *
       9                 :  *
      10                 :  * IDENTIFICATION
      11                 :  *    src/backend/storage/sync/sync.c
      12                 :  *
      13                 :  *-------------------------------------------------------------------------
      14                 :  */
      15                 : #include "postgres.h"
      16                 : 
      17                 : #include <unistd.h>
      18                 : #include <fcntl.h>
      19                 : #include <sys/file.h>
      20                 : 
      21                 : #include "access/commit_ts.h"
      22                 : #include "access/clog.h"
      23                 : #include "access/multixact.h"
      24                 : #include "access/xlog.h"
      25                 : #include "access/xlogutils.h"
      26                 : #include "commands/tablespace.h"
      27                 : #include "miscadmin.h"
      28                 : #include "pgstat.h"
      29                 : #include "portability/instr_time.h"
      30                 : #include "postmaster/bgwriter.h"
      31                 : #include "storage/bufmgr.h"
      32                 : #include "storage/fd.h"
      33                 : #include "storage/ipc.h"
      34                 : #include "storage/latch.h"
      35                 : #include "storage/md.h"
      36                 : #include "utils/hsearch.h"
      37                 : #include "utils/inval.h"
      38                 : #include "utils/memutils.h"
      39                 : 
      40                 : static MemoryContext pendingOpsCxt; /* context for the pending ops state  */
      41                 : 
      42                 : /*
      43                 :  * In some contexts (currently, standalone backends and the checkpointer)
      44                 :  * we keep track of pending fsync operations: we need to remember all relation
      45                 :  * segments that have been written since the last checkpoint, so that we can
      46                 :  * fsync them down to disk before completing the next checkpoint.  This hash
      47                 :  * table remembers the pending operations.  We use a hash table mostly as
      48                 :  * a convenient way of merging duplicate requests.
      49                 :  *
      50                 :  * We use a similar mechanism to remember no-longer-needed files that can
      51                 :  * be deleted after the next checkpoint, but we use a linked list instead of
      52                 :  * a hash table, because we don't expect there to be any duplicate requests.
      53                 :  *
      54                 :  * These mechanisms are only used for non-temp relations; we never fsync
      55                 :  * temp rels, nor do we need to postpone their deletion (see comments in
      56                 :  * mdunlink).
      57                 :  *
      58                 :  * (Regular backends do not track pending operations locally, but forward
      59                 :  * them to the checkpointer.)
      60                 :  */
      61                 : typedef uint16 CycleCtr;        /* can be any convenient integer size */
      62                 : 
      63                 : typedef struct
      64                 : {
      65                 :     FileTag     tag;            /* identifies handler and file */
      66                 :     CycleCtr    cycle_ctr;      /* sync_cycle_ctr of oldest request */
      67                 :     bool        canceled;       /* canceled is true if we canceled "recently" */
      68                 : } PendingFsyncEntry;
      69                 : 
      70                 : typedef struct
      71                 : {
      72                 :     FileTag     tag;            /* identifies handler and file */
      73                 :     CycleCtr    cycle_ctr;      /* checkpoint_cycle_ctr when request was made */
      74                 :     bool        canceled;       /* true if request has been canceled */
      75                 : } PendingUnlinkEntry;
      76                 : 
      77                 : static HTAB *pendingOps = NULL;
      78                 : static List *pendingUnlinks = NIL;
      79                 : static MemoryContext pendingOpsCxt; /* context for the above  */
      80                 : 
      81                 : static CycleCtr sync_cycle_ctr = 0;
      82                 : static CycleCtr checkpoint_cycle_ctr = 0;
      83                 : 
      84                 : /* Intervals for calling AbsorbSyncRequests */
      85                 : #define FSYNCS_PER_ABSORB       10
      86                 : #define UNLINKS_PER_ABSORB      10
      87                 : 
      88                 : /*
      89                 :  * Function pointers for handling sync and unlink requests.
      90                 :  */
      91                 : typedef struct SyncOps
      92                 : {
      93                 :     int         (*sync_syncfiletag) (const FileTag *ftag, char *path);
      94                 :     int         (*sync_unlinkfiletag) (const FileTag *ftag, char *path);
      95                 :     bool        (*sync_filetagmatches) (const FileTag *ftag,
      96                 :                                         const FileTag *candidate);
      97                 : } SyncOps;
      98                 : 
      99                 : /*
     100                 :  * These indexes must correspond to the values of the SyncRequestHandler enum.
     101                 :  */
     102                 : static const SyncOps syncsw[] = {
     103                 :     /* magnetic disk */
     104                 :     [SYNC_HANDLER_MD] = {
     105                 :         .sync_syncfiletag = mdsyncfiletag,
     106                 :         .sync_unlinkfiletag = mdunlinkfiletag,
     107                 :         .sync_filetagmatches = mdfiletagmatches
     108                 :     },
     109                 :     /* pg_xact */
     110                 :     [SYNC_HANDLER_CLOG] = {
     111                 :         .sync_syncfiletag = clogsyncfiletag
     112                 :     },
     113                 :     /* pg_commit_ts */
     114                 :     [SYNC_HANDLER_COMMIT_TS] = {
     115                 :         .sync_syncfiletag = committssyncfiletag
     116                 :     },
     117                 :     /* pg_multixact/offsets */
     118                 :     [SYNC_HANDLER_MULTIXACT_OFFSET] = {
     119                 :         .sync_syncfiletag = multixactoffsetssyncfiletag
     120                 :     },
     121                 :     /* pg_multixact/members */
     122                 :     [SYNC_HANDLER_MULTIXACT_MEMBER] = {
     123                 :         .sync_syncfiletag = multixactmemberssyncfiletag
     124                 :     }
     125                 : };
     126                 : 
     127                 : /*
     128                 :  * Initialize data structures for the file sync tracking.
     129                 :  */
     130                 : void
     131 CBC       13296 : InitSync(void)
     132                 : {
     133                 :     /*
     134                 :      * Create pending-operations hashtable if we need it.  Currently, we need
     135                 :      * it if we are standalone (not under a postmaster) or if we are a
     136                 :      * checkpointer auxiliary process.
     137                 :      */
     138           13296 :     if (!IsUnderPostmaster || AmCheckpointerProcess())
     139                 :     {
     140                 :         HASHCTL     hash_ctl;
     141                 : 
     142                 :         /*
     143                 :          * XXX: The checkpointer needs to add entries to the pending ops table
     144                 :          * when absorbing fsync requests.  That is done within a critical
     145                 :          * section, which isn't usually allowed, but we make an exception. It
     146                 :          * means that there's a theoretical possibility that you run out of
     147                 :          * memory while absorbing fsync requests, which leads to a PANIC.
     148                 :          * Fortunately the hash table is small so that's unlikely to happen in
     149                 :          * practice.
     150                 :          */
     151             974 :         pendingOpsCxt = AllocSetContextCreate(TopMemoryContext,
     152                 :                                               "Pending ops context",
     153                 :                                               ALLOCSET_DEFAULT_SIZES);
     154             974 :         MemoryContextAllowInCriticalSection(pendingOpsCxt, true);
     155                 : 
     156             974 :         hash_ctl.keysize = sizeof(FileTag);
     157             974 :         hash_ctl.entrysize = sizeof(PendingFsyncEntry);
     158             974 :         hash_ctl.hcxt = pendingOpsCxt;
     159             974 :         pendingOps = hash_create("Pending Ops Table",
     160                 :                                  100L,
     161                 :                                  &hash_ctl,
     162                 :                                  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
     163             974 :         pendingUnlinks = NIL;
     164                 :     }
     165           13296 : }
     166                 : 
     167                 : /*
     168                 :  * SyncPreCheckpoint() -- Do pre-checkpoint work
     169                 :  *
     170                 :  * To distinguish unlink requests that arrived before this checkpoint
     171                 :  * started from those that arrived during the checkpoint, we use a cycle
     172                 :  * counter similar to the one we use for fsync requests. That cycle
     173                 :  * counter is incremented here.
     174                 :  *
     175                 :  * This must be called *before* the checkpoint REDO point is determined.
     176                 :  * That ensures that we won't delete files too soon.  Since this calls
     177                 :  * AbsorbSyncRequests(), which performs memory allocations, it cannot be
     178                 :  * called within a critical section.
     179                 :  *
     180                 :  * Note that we can't do anything here that depends on the assumption
     181                 :  * that the checkpoint will be completed.
     182                 :  */
     183                 : void
     184            2340 : SyncPreCheckpoint(void)
     185                 : {
     186                 :     /*
     187                 :      * Operations such as DROP TABLESPACE assume that the next checkpoint will
     188                 :      * process all recently forwarded unlink requests, but if they aren't
     189                 :      * absorbed prior to advancing the cycle counter, they won't be processed
     190                 :      * until a future checkpoint.  The following absorb ensures that any
     191                 :      * unlink requests forwarded before the checkpoint began will be processed
     192                 :      * in the current checkpoint.
     193                 :      */
     194            2340 :     AbsorbSyncRequests();
     195                 : 
     196                 :     /*
     197                 :      * Any unlink requests arriving after this point will be assigned the next
     198                 :      * cycle counter, and won't be unlinked until next checkpoint.
     199                 :      */
     200            2340 :     checkpoint_cycle_ctr++;
     201            2340 : }
     202                 : 
     203                 : /*
     204                 :  * SyncPostCheckpoint() -- Do post-checkpoint work
     205                 :  *
     206                 :  * Remove any lingering files that can now be safely removed.
     207                 :  */
     208                 : void
     209            2335 : SyncPostCheckpoint(void)
     210                 : {
     211                 :     int         absorb_counter;
     212                 :     ListCell   *lc;
     213                 : 
     214            2335 :     absorb_counter = UNLINKS_PER_ABSORB;
     215           29764 :     foreach(lc, pendingUnlinks)
     216                 :     {
     217           27448 :         PendingUnlinkEntry *entry = (PendingUnlinkEntry *) lfirst(lc);
     218                 :         char        path[MAXPGPATH];
     219                 : 
     220                 :         /* Skip over any canceled entries */
     221           27448 :         if (entry->canceled)
     222               1 :             continue;
     223                 : 
     224                 :         /*
     225                 :          * New entries are appended to the end, so if the entry is new we've
     226                 :          * reached the end of old entries.
     227                 :          *
     228                 :          * Note: if just the right number of consecutive checkpoints fail, we
     229                 :          * could be fooled here by cycle_ctr wraparound.  However, the only
     230                 :          * consequence is that we'd delay unlinking for one more checkpoint,
     231                 :          * which is perfectly tolerable.
     232                 :          */
     233           27447 :         if (entry->cycle_ctr == checkpoint_cycle_ctr)
     234              19 :             break;
     235                 : 
     236                 :         /* Unlink the file */
     237           27428 :         if (syncsw[entry->tag.handler].sync_unlinkfiletag(&entry->tag,
     238                 :                                                           path) < 0)
     239                 :         {
     240                 :             /*
     241                 :              * There's a race condition, when the database is dropped at the
     242                 :              * same time that we process the pending unlink requests. If the
     243                 :              * DROP DATABASE deletes the file before we do, we will get ENOENT
     244                 :              * here. rmtree() also has to ignore ENOENT errors, to deal with
     245                 :              * the possibility that we delete the file first.
     246                 :              */
     247               3 :             if (errno != ENOENT)
     248 UBC           0 :                 ereport(WARNING,
     249                 :                         (errcode_for_file_access(),
     250                 :                          errmsg("could not remove file \"%s\": %m", path)));
     251                 :         }
     252                 : 
     253                 :         /* Mark the list entry as canceled, just in case */
     254 CBC       27428 :         entry->canceled = true;
     255                 : 
     256                 :         /*
     257                 :          * As in ProcessSyncRequests, we don't want to stop absorbing fsync
     258                 :          * requests for a long time when there are many deletions to be done.
     259                 :          * We can safely call AbsorbSyncRequests() at this point in the loop.
     260                 :          */
     261           27428 :         if (--absorb_counter <= 0)
     262                 :         {
     263            2694 :             AbsorbSyncRequests();
     264            2694 :             absorb_counter = UNLINKS_PER_ABSORB;
     265                 :         }
     266                 :     }
     267                 : 
     268                 :     /*
     269                 :      * If we reached the end of the list, we can just remove the whole list
     270                 :      * (remembering to pfree all the PendingUnlinkEntry objects).  Otherwise,
     271                 :      * we must keep the entries at or after "lc".
     272                 :      */
     273            2335 :     if (lc == NULL)
     274                 :     {
     275            2316 :         list_free_deep(pendingUnlinks);
     276            2316 :         pendingUnlinks = NIL;
     277                 :     }
     278                 :     else
     279                 :     {
     280              19 :         int         ntodelete = list_cell_number(pendingUnlinks, lc);
     281                 : 
     282           15330 :         for (int i = 0; i < ntodelete; i++)
     283           15311 :             pfree(list_nth(pendingUnlinks, i));
     284                 : 
     285              19 :         pendingUnlinks = list_delete_first_n(pendingUnlinks, ntodelete);
     286                 :     }
     287            2335 : }
     288                 : 
     289                 : /*
     290                 :  *  ProcessSyncRequests() -- Process queued fsync requests.
     291                 :  */
     292                 : void
     293            2363 : ProcessSyncRequests(void)
     294                 : {
     295                 :     static bool sync_in_progress = false;
     296                 : 
     297                 :     HASH_SEQ_STATUS hstat;
     298                 :     PendingFsyncEntry *entry;
     299                 :     int         absorb_counter;
     300                 : 
     301                 :     /* Statistics on sync times */
     302            2363 :     int         processed = 0;
     303                 :     instr_time  sync_start,
     304                 :                 sync_end,
     305                 :                 sync_diff;
     306                 :     uint64      elapsed;
     307            2363 :     uint64      longest = 0;
     308            2363 :     uint64      total_elapsed = 0;
     309                 : 
     310                 :     /*
     311                 :      * This is only called during checkpoints, and checkpoints should only
     312                 :      * occur in processes that have created a pendingOps.
     313                 :      */
     314            2363 :     if (!pendingOps)
     315 UBC           0 :         elog(ERROR, "cannot sync without a pendingOps table");
     316                 : 
     317                 :     /*
     318                 :      * If we are in the checkpointer, the sync had better include all fsync
     319                 :      * requests that were queued by backends up to this point.  The tightest
     320                 :      * race condition that could occur is that a buffer that must be written
     321                 :      * and fsync'd for the checkpoint could have been dumped by a backend just
     322                 :      * before it was visited by BufferSync().  We know the backend will have
     323                 :      * queued an fsync request before clearing the buffer's dirtybit, so we
     324                 :      * are safe as long as we do an Absorb after completing BufferSync().
     325                 :      */
     326 CBC        2363 :     AbsorbSyncRequests();
     327                 : 
     328                 :     /*
     329                 :      * To avoid excess fsync'ing (in the worst case, maybe a never-terminating
     330                 :      * checkpoint), we want to ignore fsync requests that are entered into the
     331                 :      * hashtable after this point --- they should be processed next time,
     332                 :      * instead.  We use sync_cycle_ctr to tell old entries apart from new
     333                 :      * ones: new ones will have cycle_ctr equal to the incremented value of
     334                 :      * sync_cycle_ctr.
     335                 :      *
     336                 :      * In normal circumstances, all entries present in the table at this point
     337                 :      * will have cycle_ctr exactly equal to the current (about to be old)
     338                 :      * value of sync_cycle_ctr.  However, if we fail partway through the
     339                 :      * fsync'ing loop, then older values of cycle_ctr might remain when we
     340                 :      * come back here to try again.  Repeated checkpoint failures would
     341                 :      * eventually wrap the counter around to the point where an old entry
     342                 :      * might appear new, causing us to skip it, possibly allowing a checkpoint
     343                 :      * to succeed that should not have.  To forestall wraparound, any time the
     344                 :      * previous ProcessSyncRequests() failed to complete, run through the
     345                 :      * table and forcibly set cycle_ctr = sync_cycle_ctr.
     346                 :      *
     347                 :      * Think not to merge this loop with the main loop, as the problem is
     348                 :      * exactly that that loop may fail before having visited all the entries.
     349                 :      * From a performance point of view it doesn't matter anyway, as this path
     350                 :      * will never be taken in a system that's functioning normally.
     351                 :      */
     352            2363 :     if (sync_in_progress)
     353                 :     {
     354                 :         /* prior try failed, so update any stale cycle_ctr values */
     355 UBC           0 :         hash_seq_init(&hstat, pendingOps);
     356               0 :         while ((entry = (PendingFsyncEntry *) hash_seq_search(&hstat)) != NULL)
     357                 :         {
     358               0 :             entry->cycle_ctr = sync_cycle_ctr;
     359                 :         }
     360                 :     }
     361                 : 
     362                 :     /* Advance counter so that new hashtable entries are distinguishable */
     363 CBC        2363 :     sync_cycle_ctr++;
     364                 : 
     365                 :     /* Set flag to detect failure if we don't reach the end of the loop */
     366            2363 :     sync_in_progress = true;
     367                 : 
     368                 :     /* Now scan the hashtable for fsync requests to process */
     369            2363 :     absorb_counter = FSYNCS_PER_ABSORB;
     370            2363 :     hash_seq_init(&hstat, pendingOps);
     371          141438 :     while ((entry = (PendingFsyncEntry *) hash_seq_search(&hstat)) != NULL)
     372                 :     {
     373                 :         int         failures;
     374                 : 
     375                 :         /*
     376                 :          * If the entry is new then don't process it this time; it is new.
     377                 :          * Note "continue" bypasses the hash-remove call at the bottom of the
     378                 :          * loop.
     379                 :          */
     380          139075 :         if (entry->cycle_ctr == sync_cycle_ctr)
     381 UBC           0 :             continue;
     382                 : 
     383                 :         /* Else assert we haven't missed it */
     384 CBC      139075 :         Assert((CycleCtr) (entry->cycle_ctr + 1) == sync_cycle_ctr);
     385                 : 
     386                 :         /*
     387                 :          * If fsync is off then we don't have to bother opening the file at
     388                 :          * all.  (We delay checking until this point so that changing fsync on
     389                 :          * the fly behaves sensibly.)
     390                 :          */
     391          139075 :         if (enableFsync)
     392                 :         {
     393                 :             /*
     394                 :              * If in checkpointer, we want to absorb pending requests every so
     395                 :              * often to prevent overflow of the fsync request queue.  It is
     396                 :              * unspecified whether newly-added entries will be visited by
     397                 :              * hash_seq_search, but we don't care since we don't need to
     398                 :              * process them anyway.
     399                 :              */
     400 UBC           0 :             if (--absorb_counter <= 0)
     401                 :             {
     402               0 :                 AbsorbSyncRequests();
     403               0 :                 absorb_counter = FSYNCS_PER_ABSORB;
     404                 :             }
     405                 : 
     406                 :             /*
     407                 :              * The fsync table could contain requests to fsync segments that
     408                 :              * have been deleted (unlinked) by the time we get to them. Rather
     409                 :              * than just hoping an ENOENT (or EACCES on Windows) error can be
     410                 :              * ignored, what we do on error is absorb pending requests and
     411                 :              * then retry. Since mdunlink() queues a "cancel" message before
     412                 :              * actually unlinking, the fsync request is guaranteed to be
     413                 :              * marked canceled after the absorb if it really was this case.
     414                 :              * DROP DATABASE likewise has to tell us to forget fsync requests
     415                 :              * before it starts deletions.
     416                 :              */
     417               0 :             for (failures = 0; !entry->canceled; failures++)
     418                 :             {
     419                 :                 char        path[MAXPGPATH];
     420                 : 
     421               0 :                 INSTR_TIME_SET_CURRENT(sync_start);
     422               0 :                 if (syncsw[entry->tag.handler].sync_syncfiletag(&entry->tag,
     423                 :                                                                 path) == 0)
     424                 :                 {
     425                 :                     /* Success; update statistics about sync timing */
     426               0 :                     INSTR_TIME_SET_CURRENT(sync_end);
     427               0 :                     sync_diff = sync_end;
     428               0 :                     INSTR_TIME_SUBTRACT(sync_diff, sync_start);
     429               0 :                     elapsed = INSTR_TIME_GET_MICROSEC(sync_diff);
     430               0 :                     if (elapsed > longest)
     431               0 :                         longest = elapsed;
     432               0 :                     total_elapsed += elapsed;
     433               0 :                     processed++;
     434                 : 
     435               0 :                     if (log_checkpoints)
     436               0 :                         elog(DEBUG1, "checkpoint sync: number=%d file=%s time=%.3f ms",
     437                 :                              processed,
     438                 :                              path,
     439                 :                              (double) elapsed / 1000);
     440                 : 
     441               0 :                     break;      /* out of retry loop */
     442                 :                 }
     443                 : 
     444                 :                 /*
     445                 :                  * It is possible that the relation has been dropped or
     446                 :                  * truncated since the fsync request was entered. Therefore,
     447                 :                  * allow ENOENT, but only if we didn't fail already on this
     448                 :                  * file.
     449                 :                  */
     450               0 :                 if (!FILE_POSSIBLY_DELETED(errno) || failures > 0)
     451               0 :                     ereport(data_sync_elevel(ERROR),
     452                 :                             (errcode_for_file_access(),
     453                 :                              errmsg("could not fsync file \"%s\": %m",
     454                 :                                     path)));
     455                 :                 else
     456               0 :                     ereport(DEBUG1,
     457                 :                             (errcode_for_file_access(),
     458                 :                              errmsg_internal("could not fsync file \"%s\" but retrying: %m",
     459                 :                                              path)));
     460                 : 
     461                 :                 /*
     462                 :                  * Absorb incoming requests and check to see if a cancel
     463                 :                  * arrived for this relation fork.
     464                 :                  */
     465               0 :                 AbsorbSyncRequests();
     466               0 :                 absorb_counter = FSYNCS_PER_ABSORB; /* might as well... */
     467                 :             }                   /* end retry loop */
     468                 :         }
     469                 : 
     470                 :         /* We are done with this entry, remove it */
     471 CBC      139075 :         if (hash_search(pendingOps, &entry->tag, HASH_REMOVE, NULL) == NULL)
     472 UBC           0 :             elog(ERROR, "pendingOps corrupted");
     473                 :     }                           /* end loop over hashtable entries */
     474                 : 
     475                 :     /* Return sync performance metrics for report at checkpoint end */
     476 CBC        2363 :     CheckpointStats.ckpt_sync_rels = processed;
     477            2363 :     CheckpointStats.ckpt_longest_sync = longest;
     478            2363 :     CheckpointStats.ckpt_agg_sync_time = total_elapsed;
     479                 : 
     480                 :     /* Flag successful completion of ProcessSyncRequests */
     481            2363 :     sync_in_progress = false;
     482            2363 : }
     483                 : 
     484                 : /*
     485                 :  * RememberSyncRequest() -- callback from checkpointer side of sync request
     486                 :  *
     487                 :  * We stuff fsync requests into the local hash table for execution
     488                 :  * during the checkpointer's next checkpoint.  UNLINK requests go into a
     489                 :  * separate linked list, however, because they get processed separately.
     490                 :  *
     491                 :  * See sync.h for more information on the types of sync requests supported.
     492                 :  */
     493                 : void
     494         1130751 : RememberSyncRequest(const FileTag *ftag, SyncRequestType type)
     495                 : {
     496         1130751 :     Assert(pendingOps);
     497                 : 
     498         1130751 :     if (type == SYNC_FORGET_REQUEST)
     499                 :     {
     500                 :         PendingFsyncEntry *entry;
     501                 : 
     502                 :         /* Cancel previously entered request */
     503          106566 :         entry = (PendingFsyncEntry *) hash_search(pendingOps,
     504                 :                                                   ftag,
     505                 :                                                   HASH_FIND,
     506                 :                                                   NULL);
     507          106566 :         if (entry != NULL)
     508            4783 :             entry->canceled = true;
     509                 :     }
     510         1024185 :     else if (type == SYNC_FILTER_REQUEST)
     511                 :     {
     512                 :         HASH_SEQ_STATUS hstat;
     513                 :         PendingFsyncEntry *pfe;
     514                 :         ListCell   *cell;
     515                 : 
     516                 :         /* Cancel matching fsync requests */
     517              13 :         hash_seq_init(&hstat, pendingOps);
     518 GNC        4356 :         while ((pfe = (PendingFsyncEntry *) hash_seq_search(&hstat)) != NULL)
     519                 :         {
     520            8658 :             if (pfe->tag.handler == ftag->handler &&
     521            4328 :                 syncsw[ftag->handler].sync_filetagmatches(ftag, &pfe->tag))
     522            2524 :                 pfe->canceled = true;
     523                 :         }
     524                 : 
     525                 :         /* Cancel matching unlink requests */
     526 CBC          14 :         foreach(cell, pendingUnlinks)
     527                 :         {
     528 GNC           1 :             PendingUnlinkEntry *pue = (PendingUnlinkEntry *) lfirst(cell);
     529                 : 
     530               2 :             if (pue->tag.handler == ftag->handler &&
     531               1 :                 syncsw[ftag->handler].sync_filetagmatches(ftag, &pue->tag))
     532               1 :                 pue->canceled = true;
     533                 :         }
     534                 :     }
     535 CBC     1024172 :     else if (type == SYNC_UNLINK_REQUEST)
     536                 :     {
     537                 :         /* Unlink request: put it in the linked list */
     538           27429 :         MemoryContext oldcxt = MemoryContextSwitchTo(pendingOpsCxt);
     539                 :         PendingUnlinkEntry *entry;
     540                 : 
     541           27429 :         entry = palloc(sizeof(PendingUnlinkEntry));
     542           27429 :         entry->tag = *ftag;
     543           27429 :         entry->cycle_ctr = checkpoint_cycle_ctr;
     544           27429 :         entry->canceled = false;
     545                 : 
     546           27429 :         pendingUnlinks = lappend(pendingUnlinks, entry);
     547                 : 
     548           27429 :         MemoryContextSwitchTo(oldcxt);
     549                 :     }
     550                 :     else
     551                 :     {
     552                 :         /* Normal case: enter a request to fsync this segment */
     553          996743 :         MemoryContext oldcxt = MemoryContextSwitchTo(pendingOpsCxt);
     554                 :         PendingFsyncEntry *entry;
     555                 :         bool        found;
     556                 : 
     557          996743 :         Assert(type == SYNC_REQUEST);
     558                 : 
     559          996743 :         entry = (PendingFsyncEntry *) hash_search(pendingOps,
     560                 :                                                   ftag,
     561                 :                                                   HASH_ENTER,
     562                 :                                                   &found);
     563                 :         /* if new entry, or was previously canceled, initialize it */
     564          996743 :         if (!found || entry->canceled)
     565                 :         {
     566          140378 :             entry->cycle_ctr = sync_cycle_ctr;
     567          140378 :             entry->canceled = false;
     568                 :         }
     569                 : 
     570                 :         /*
     571                 :          * NB: it's intentional that we don't change cycle_ctr if the entry
     572                 :          * already exists.  The cycle_ctr must represent the oldest fsync
     573                 :          * request that could be in the entry.
     574                 :          */
     575                 : 
     576          996743 :         MemoryContextSwitchTo(oldcxt);
     577                 :     }
     578         1130751 : }
     579                 : 
     580                 : /*
     581                 :  * Register the sync request locally, or forward it to the checkpointer.
     582                 :  *
     583                 :  * If retryOnError is true, we'll keep trying if there is no space in the
     584                 :  * queue.  Return true if we succeeded, or false if there wasn't space.
     585                 :  */
     586                 : bool
     587         1200898 : RegisterSyncRequest(const FileTag *ftag, SyncRequestType type,
     588                 :                     bool retryOnError)
     589                 : {
     590                 :     bool        ret;
     591                 : 
     592         1200898 :     if (pendingOps != NULL)
     593                 :     {
     594                 :         /* standalone backend or startup process: fsync state is local */
     595          664352 :         RememberSyncRequest(ftag, type);
     596          664352 :         return true;
     597                 :     }
     598                 : 
     599                 :     for (;;)
     600                 :     {
     601                 :         /*
     602                 :          * Notify the checkpointer about it.  If we fail to queue a message in
     603                 :          * retryOnError mode, we have to sleep and try again ... ugly, but
     604                 :          * hopefully won't happen often.
     605                 :          *
     606                 :          * XXX should we CHECK_FOR_INTERRUPTS in this loop?  Escaping with an
     607                 :          * error in the case of SYNC_UNLINK_REQUEST would leave the
     608                 :          * no-longer-used file still present on disk, which would be bad, so
     609                 :          * I'm inclined to assume that the checkpointer will always empty the
     610                 :          * queue soon.
     611                 :          */
     612          536554 :         ret = ForwardSyncRequest(ftag, type);
     613                 : 
     614                 :         /*
     615                 :          * If we are successful in queueing the request, or we failed and were
     616                 :          * instructed not to retry on error, break.
     617                 :          */
     618          536554 :         if (ret || (!ret && !retryOnError))
     619                 :             break;
     620                 : 
     621               8 :         WaitLatch(NULL, WL_EXIT_ON_PM_DEATH | WL_TIMEOUT, 10,
     622                 :                   WAIT_EVENT_REGISTER_SYNC_REQUEST);
     623                 :     }
     624                 : 
     625          536546 :     return ret;
     626                 : }

Generated by: LCOV version v1.16-55-g56c0a2a