LCOV - differential code coverage report
Current view: top level - src/backend/storage/sync - sync.c (source / functions) Coverage Total Hit UBC GNC CBC DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 77.2 % 127 98 29 8 90 8
Current Date: 2023-04-08 17:13:01 Functions: 100.0 % 6 6 1 5
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 (240..) days: 77.2 % 127 98 29 8 90
Legend: Lines: hit not hit Function coverage date bins:
(240..) days: 100.0 % 6 6 1 5

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*-------------------------------------------------------------------------
                                  2                 :  *
                                  3                 :  * sync.c
                                  4                 :  *    File synchronization management code.
                                  5                 :  *
                                  6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
                                  7                 :  * Portions Copyright (c) 1994, Regents of the University of California
                                  8                 :  *
                                  9                 :  *
                                 10                 :  * IDENTIFICATION
                                 11                 :  *    src/backend/storage/sync/sync.c
                                 12                 :  *
                                 13                 :  *-------------------------------------------------------------------------
                                 14                 :  */
                                 15                 : #include "postgres.h"
                                 16                 : 
                                 17                 : #include <unistd.h>
                                 18                 : #include <fcntl.h>
                                 19                 : #include <sys/file.h>
                                 20                 : 
                                 21                 : #include "access/commit_ts.h"
                                 22                 : #include "access/clog.h"
                                 23                 : #include "access/multixact.h"
                                 24                 : #include "access/xlog.h"
                                 25                 : #include "access/xlogutils.h"
                                 26                 : #include "commands/tablespace.h"
                                 27                 : #include "miscadmin.h"
                                 28                 : #include "pgstat.h"
                                 29                 : #include "portability/instr_time.h"
                                 30                 : #include "postmaster/bgwriter.h"
                                 31                 : #include "storage/bufmgr.h"
                                 32                 : #include "storage/fd.h"
                                 33                 : #include "storage/ipc.h"
                                 34                 : #include "storage/latch.h"
                                 35                 : #include "storage/md.h"
                                 36                 : #include "utils/hsearch.h"
                                 37                 : #include "utils/inval.h"
                                 38                 : #include "utils/memutils.h"
                                 39                 : 
                                 40                 : static MemoryContext pendingOpsCxt; /* context for the pending ops state  */
                                 41                 : 
                                 42                 : /*
                                 43                 :  * In some contexts (currently, standalone backends and the checkpointer)
                                 44                 :  * we keep track of pending fsync operations: we need to remember all relation
                                 45                 :  * segments that have been written since the last checkpoint, so that we can
                                 46                 :  * fsync them down to disk before completing the next checkpoint.  This hash
                                 47                 :  * table remembers the pending operations.  We use a hash table mostly as
                                 48                 :  * a convenient way of merging duplicate requests.
                                 49                 :  *
                                 50                 :  * We use a similar mechanism to remember no-longer-needed files that can
                                 51                 :  * be deleted after the next checkpoint, but we use a linked list instead of
                                 52                 :  * a hash table, because we don't expect there to be any duplicate requests.
                                 53                 :  *
                                 54                 :  * These mechanisms are only used for non-temp relations; we never fsync
                                 55                 :  * temp rels, nor do we need to postpone their deletion (see comments in
                                 56                 :  * mdunlink).
                                 57                 :  *
                                 58                 :  * (Regular backends do not track pending operations locally, but forward
                                 59                 :  * them to the checkpointer.)
                                 60                 :  */
                                 61                 : typedef uint16 CycleCtr;        /* can be any convenient integer size */
                                 62                 : 
                                 63                 : typedef struct
                                 64                 : {
                                 65                 :     FileTag     tag;            /* identifies handler and file */
                                 66                 :     CycleCtr    cycle_ctr;      /* sync_cycle_ctr of oldest request */
                                 67                 :     bool        canceled;       /* canceled is true if we canceled "recently" */
                                 68                 : } PendingFsyncEntry;
                                 69                 : 
                                 70                 : typedef struct
                                 71                 : {
                                 72                 :     FileTag     tag;            /* identifies handler and file */
                                 73                 :     CycleCtr    cycle_ctr;      /* checkpoint_cycle_ctr when request was made */
                                 74                 :     bool        canceled;       /* true if request has been canceled */
                                 75                 : } PendingUnlinkEntry;
                                 76                 : 
                                 77                 : static HTAB *pendingOps = NULL;
                                 78                 : static List *pendingUnlinks = NIL;
                                 79                 : static MemoryContext pendingOpsCxt; /* context for the above  */
                                 80                 : 
                                 81                 : static CycleCtr sync_cycle_ctr = 0;
                                 82                 : static CycleCtr checkpoint_cycle_ctr = 0;
                                 83                 : 
                                 84                 : /* Intervals for calling AbsorbSyncRequests */
                                 85                 : #define FSYNCS_PER_ABSORB       10
                                 86                 : #define UNLINKS_PER_ABSORB      10
                                 87                 : 
                                 88                 : /*
                                 89                 :  * Function pointers for handling sync and unlink requests.
                                 90                 :  */
                                 91                 : typedef struct SyncOps
                                 92                 : {
                                 93                 :     int         (*sync_syncfiletag) (const FileTag *ftag, char *path);
                                 94                 :     int         (*sync_unlinkfiletag) (const FileTag *ftag, char *path);
                                 95                 :     bool        (*sync_filetagmatches) (const FileTag *ftag,
                                 96                 :                                         const FileTag *candidate);
                                 97                 : } SyncOps;
                                 98                 : 
                                 99                 : /*
                                100                 :  * These indexes must correspond to the values of the SyncRequestHandler enum.
                                101                 :  */
                                102                 : static const SyncOps syncsw[] = {
                                103                 :     /* magnetic disk */
                                104                 :     [SYNC_HANDLER_MD] = {
                                105                 :         .sync_syncfiletag = mdsyncfiletag,
                                106                 :         .sync_unlinkfiletag = mdunlinkfiletag,
                                107                 :         .sync_filetagmatches = mdfiletagmatches
                                108                 :     },
                                109                 :     /* pg_xact */
                                110                 :     [SYNC_HANDLER_CLOG] = {
                                111                 :         .sync_syncfiletag = clogsyncfiletag
                                112                 :     },
                                113                 :     /* pg_commit_ts */
                                114                 :     [SYNC_HANDLER_COMMIT_TS] = {
                                115                 :         .sync_syncfiletag = committssyncfiletag
                                116                 :     },
                                117                 :     /* pg_multixact/offsets */
                                118                 :     [SYNC_HANDLER_MULTIXACT_OFFSET] = {
                                119                 :         .sync_syncfiletag = multixactoffsetssyncfiletag
                                120                 :     },
                                121                 :     /* pg_multixact/members */
                                122                 :     [SYNC_HANDLER_MULTIXACT_MEMBER] = {
                                123                 :         .sync_syncfiletag = multixactmemberssyncfiletag
                                124                 :     }
                                125                 : };
                                126                 : 
                                127                 : /*
                                128                 :  * Initialize data structures for the file sync tracking.
                                129                 :  */
                                130                 : void
 1466 tmunro                    131 CBC       13296 : InitSync(void)
                                132                 : {
                                133                 :     /*
                                134                 :      * Create pending-operations hashtable if we need it.  Currently, we need
                                135                 :      * it if we are standalone (not under a postmaster) or if we are a
                                136                 :      * checkpointer auxiliary process.
                                137                 :      */
  615                           138           13296 :     if (!IsUnderPostmaster || AmCheckpointerProcess())
                                139                 :     {
                                140                 :         HASHCTL     hash_ctl;
                                141                 : 
                                142                 :         /*
                                143                 :          * XXX: The checkpointer needs to add entries to the pending ops table
                                144                 :          * when absorbing fsync requests.  That is done within a critical
                                145                 :          * section, which isn't usually allowed, but we make an exception. It
                                146                 :          * means that there's a theoretical possibility that you run out of
                                147                 :          * memory while absorbing fsync requests, which leads to a PANIC.
                                148                 :          * Fortunately the hash table is small so that's unlikely to happen in
                                149                 :          * practice.
                                150                 :          */
 1466                           151             974 :         pendingOpsCxt = AllocSetContextCreate(TopMemoryContext,
                                152                 :                                               "Pending ops context",
                                153                 :                                               ALLOCSET_DEFAULT_SIZES);
                                154             974 :         MemoryContextAllowInCriticalSection(pendingOpsCxt, true);
                                155                 : 
                                156             974 :         hash_ctl.keysize = sizeof(FileTag);
                                157             974 :         hash_ctl.entrysize = sizeof(PendingFsyncEntry);
                                158             974 :         hash_ctl.hcxt = pendingOpsCxt;
                                159             974 :         pendingOps = hash_create("Pending Ops Table",
                                160                 :                                  100L,
                                161                 :                                  &hash_ctl,
                                162                 :                                  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
                                163             974 :         pendingUnlinks = NIL;
                                164                 :     }
                                165           13296 : }
                                166                 : 
                                167                 : /*
                                168                 :  * SyncPreCheckpoint() -- Do pre-checkpoint work
                                169                 :  *
                                170                 :  * To distinguish unlink requests that arrived before this checkpoint
                                171                 :  * started from those that arrived during the checkpoint, we use a cycle
                                172                 :  * counter similar to the one we use for fsync requests. That cycle
                                173                 :  * counter is incremented here.
                                174                 :  *
                                175                 :  * This must be called *before* the checkpoint REDO point is determined.
                                176                 :  * That ensures that we won't delete files too soon.  Since this calls
                                177                 :  * AbsorbSyncRequests(), which performs memory allocations, it cannot be
                                178                 :  * called within a critical section.
                                179                 :  *
                                180                 :  * Note that we can't do anything here that depends on the assumption
                                181                 :  * that the checkpoint will be completed.
                                182                 :  */
                                183                 : void
                                184            2340 : SyncPreCheckpoint(void)
                                185                 : {
                                186                 :     /*
                                187                 :      * Operations such as DROP TABLESPACE assume that the next checkpoint will
                                188                 :      * process all recently forwarded unlink requests, but if they aren't
                                189                 :      * absorbed prior to advancing the cycle counter, they won't be processed
                                190                 :      * until a future checkpoint.  The following absorb ensures that any
                                191                 :      * unlink requests forwarded before the checkpoint began will be processed
                                192                 :      * in the current checkpoint.
                                193                 :      */
  389                           194            2340 :     AbsorbSyncRequests();
                                195                 : 
                                196                 :     /*
                                197                 :      * Any unlink requests arriving after this point will be assigned the next
                                198                 :      * cycle counter, and won't be unlinked until next checkpoint.
                                199                 :      */
 1466                           200            2340 :     checkpoint_cycle_ctr++;
                                201            2340 : }
                                202                 : 
                                203                 : /*
                                204                 :  * SyncPostCheckpoint() -- Do post-checkpoint work
                                205                 :  *
                                206                 :  * Remove any lingering files that can now be safely removed.
                                207                 :  */
                                208                 : void
                                209            2335 : SyncPostCheckpoint(void)
                                210                 : {
                                211                 :     int         absorb_counter;
                                212                 :     ListCell   *lc;
                                213                 : 
                                214            2335 :     absorb_counter = UNLINKS_PER_ABSORB;
  523 tgl                       215           29764 :     foreach(lc, pendingUnlinks)
                                216                 :     {
                                217           27448 :         PendingUnlinkEntry *entry = (PendingUnlinkEntry *) lfirst(lc);
                                218                 :         char        path[MAXPGPATH];
                                219                 : 
                                220                 :         /* Skip over any canceled entries */
                                221           27448 :         if (entry->canceled)
                                222               1 :             continue;
                                223                 : 
                                224                 :         /*
                                225                 :          * New entries are appended to the end, so if the entry is new we've
                                226                 :          * reached the end of old entries.
                                227                 :          *
                                228                 :          * Note: if just the right number of consecutive checkpoints fail, we
                                229                 :          * could be fooled here by cycle_ctr wraparound.  However, the only
                                230                 :          * consequence is that we'd delay unlinking for one more checkpoint,
                                231                 :          * which is perfectly tolerable.
                                232                 :          */
 1466 tmunro                    233           27447 :         if (entry->cycle_ctr == checkpoint_cycle_ctr)
                                234              19 :             break;
                                235                 : 
                                236                 :         /* Unlink the file */
                                237           27428 :         if (syncsw[entry->tag.handler].sync_unlinkfiletag(&entry->tag,
                                238                 :                                                           path) < 0)
                                239                 :         {
                                240                 :             /*
                                241                 :              * There's a race condition, when the database is dropped at the
                                242                 :              * same time that we process the pending unlink requests. If the
                                243                 :              * DROP DATABASE deletes the file before we do, we will get ENOENT
                                244                 :              * here. rmtree() also has to ignore ENOENT errors, to deal with
                                245                 :              * the possibility that we delete the file first.
                                246                 :              */
                                247               3 :             if (errno != ENOENT)
 1466 tmunro                    248 UBC           0 :                 ereport(WARNING,
                                249                 :                         (errcode_for_file_access(),
                                250                 :                          errmsg("could not remove file \"%s\": %m", path)));
                                251                 :         }
                                252                 : 
                                253                 :         /* Mark the list entry as canceled, just in case */
  523 tgl                       254 CBC       27428 :         entry->canceled = true;
                                255                 : 
                                256                 :         /*
                                257                 :          * As in ProcessSyncRequests, we don't want to stop absorbing fsync
                                258                 :          * requests for a long time when there are many deletions to be done.
                                259                 :          * We can safely call AbsorbSyncRequests() at this point in the loop.
                                260                 :          */
 1466 tmunro                    261           27428 :         if (--absorb_counter <= 0)
                                262                 :         {
                                263            2694 :             AbsorbSyncRequests();
                                264            2694 :             absorb_counter = UNLINKS_PER_ABSORB;
                                265                 :         }
                                266                 :     }
                                267                 : 
                                268                 :     /*
                                269                 :      * If we reached the end of the list, we can just remove the whole list
                                270                 :      * (remembering to pfree all the PendingUnlinkEntry objects).  Otherwise,
                                271                 :      * we must keep the entries at or after "lc".
                                272                 :      */
  523 tgl                       273            2335 :     if (lc == NULL)
                                274                 :     {
                                275            2316 :         list_free_deep(pendingUnlinks);
                                276            2316 :         pendingUnlinks = NIL;
                                277                 :     }
                                278                 :     else
                                279                 :     {
                                280              19 :         int         ntodelete = list_cell_number(pendingUnlinks, lc);
                                281                 : 
                                282           15330 :         for (int i = 0; i < ntodelete; i++)
                                283           15311 :             pfree(list_nth(pendingUnlinks, i));
                                284                 : 
                                285              19 :         pendingUnlinks = list_delete_first_n(pendingUnlinks, ntodelete);
                                286                 :     }
 1466 tmunro                    287            2335 : }
                                288                 : 
                                289                 : /*
                                290                 :  *  ProcessSyncRequests() -- Process queued fsync requests.
                                291                 :  */
                                292                 : void
                                293            2363 : ProcessSyncRequests(void)
                                294                 : {
                                295                 :     static bool sync_in_progress = false;
                                296                 : 
                                297                 :     HASH_SEQ_STATUS hstat;
                                298                 :     PendingFsyncEntry *entry;
                                299                 :     int         absorb_counter;
                                300                 : 
                                301                 :     /* Statistics on sync times */
                                302            2363 :     int         processed = 0;
                                303                 :     instr_time  sync_start,
                                304                 :                 sync_end,
                                305                 :                 sync_diff;
                                306                 :     uint64      elapsed;
                                307            2363 :     uint64      longest = 0;
                                308            2363 :     uint64      total_elapsed = 0;
                                309                 : 
                                310                 :     /*
                                311                 :      * This is only called during checkpoints, and checkpoints should only
                                312                 :      * occur in processes that have created a pendingOps.
                                313                 :      */
                                314            2363 :     if (!pendingOps)
 1466 tmunro                    315 UBC           0 :         elog(ERROR, "cannot sync without a pendingOps table");
                                316                 : 
                                317                 :     /*
                                318                 :      * If we are in the checkpointer, the sync had better include all fsync
                                319                 :      * requests that were queued by backends up to this point.  The tightest
                                320                 :      * race condition that could occur is that a buffer that must be written
                                321                 :      * and fsync'd for the checkpoint could have been dumped by a backend just
                                322                 :      * before it was visited by BufferSync().  We know the backend will have
                                323                 :      * queued an fsync request before clearing the buffer's dirtybit, so we
                                324                 :      * are safe as long as we do an Absorb after completing BufferSync().
                                325                 :      */
 1466 tmunro                    326 CBC        2363 :     AbsorbSyncRequests();
                                327                 : 
                                328                 :     /*
                                329                 :      * To avoid excess fsync'ing (in the worst case, maybe a never-terminating
                                330                 :      * checkpoint), we want to ignore fsync requests that are entered into the
                                331                 :      * hashtable after this point --- they should be processed next time,
                                332                 :      * instead.  We use sync_cycle_ctr to tell old entries apart from new
                                333                 :      * ones: new ones will have cycle_ctr equal to the incremented value of
                                334                 :      * sync_cycle_ctr.
                                335                 :      *
                                336                 :      * In normal circumstances, all entries present in the table at this point
                                337                 :      * will have cycle_ctr exactly equal to the current (about to be old)
                                338                 :      * value of sync_cycle_ctr.  However, if we fail partway through the
                                339                 :      * fsync'ing loop, then older values of cycle_ctr might remain when we
                                340                 :      * come back here to try again.  Repeated checkpoint failures would
                                341                 :      * eventually wrap the counter around to the point where an old entry
                                342                 :      * might appear new, causing us to skip it, possibly allowing a checkpoint
                                343                 :      * to succeed that should not have.  To forestall wraparound, any time the
                                344                 :      * previous ProcessSyncRequests() failed to complete, run through the
                                345                 :      * table and forcibly set cycle_ctr = sync_cycle_ctr.
                                346                 :      *
                                347                 :      * Think not to merge this loop with the main loop, as the problem is
                                348                 :      * exactly that that loop may fail before having visited all the entries.
                                349                 :      * From a performance point of view it doesn't matter anyway, as this path
                                350                 :      * will never be taken in a system that's functioning normally.
                                351                 :      */
                                352            2363 :     if (sync_in_progress)
                                353                 :     {
                                354                 :         /* prior try failed, so update any stale cycle_ctr values */
 1466 tmunro                    355 UBC           0 :         hash_seq_init(&hstat, pendingOps);
                                356               0 :         while ((entry = (PendingFsyncEntry *) hash_seq_search(&hstat)) != NULL)
                                357                 :         {
                                358               0 :             entry->cycle_ctr = sync_cycle_ctr;
                                359                 :         }
                                360                 :     }
                                361                 : 
                                362                 :     /* Advance counter so that new hashtable entries are distinguishable */
 1466 tmunro                    363 CBC        2363 :     sync_cycle_ctr++;
                                364                 : 
                                365                 :     /* Set flag to detect failure if we don't reach the end of the loop */
                                366            2363 :     sync_in_progress = true;
                                367                 : 
                                368                 :     /* Now scan the hashtable for fsync requests to process */
                                369            2363 :     absorb_counter = FSYNCS_PER_ABSORB;
                                370            2363 :     hash_seq_init(&hstat, pendingOps);
                                371          141438 :     while ((entry = (PendingFsyncEntry *) hash_seq_search(&hstat)) != NULL)
                                372                 :     {
                                373                 :         int         failures;
                                374                 : 
                                375                 :         /*
                                376                 :          * If the entry is new then don't process it this time; it is new.
                                377                 :          * Note "continue" bypasses the hash-remove call at the bottom of the
                                378                 :          * loop.
                                379                 :          */
                                380          139075 :         if (entry->cycle_ctr == sync_cycle_ctr)
 1466 tmunro                    381 UBC           0 :             continue;
                                382                 : 
                                383                 :         /* Else assert we haven't missed it */
 1466 tmunro                    384 CBC      139075 :         Assert((CycleCtr) (entry->cycle_ctr + 1) == sync_cycle_ctr);
                                385                 : 
                                386                 :         /*
                                387                 :          * If fsync is off then we don't have to bother opening the file at
                                388                 :          * all.  (We delay checking until this point so that changing fsync on
                                389                 :          * the fly behaves sensibly.)
                                390                 :          */
 1060 heikki.linnakangas        391          139075 :         if (enableFsync)
                                392                 :         {
                                393                 :             /*
                                394                 :              * If in checkpointer, we want to absorb pending requests every so
                                395                 :              * often to prevent overflow of the fsync request queue.  It is
                                396                 :              * unspecified whether newly-added entries will be visited by
                                397                 :              * hash_seq_search, but we don't care since we don't need to
                                398                 :              * process them anyway.
                                399                 :              */
 1060 heikki.linnakangas        400 UBC           0 :             if (--absorb_counter <= 0)
                                401                 :             {
                                402               0 :                 AbsorbSyncRequests();
                                403               0 :                 absorb_counter = FSYNCS_PER_ABSORB;
                                404                 :             }
                                405                 : 
                                406                 :             /*
                                407                 :              * The fsync table could contain requests to fsync segments that
                                408                 :              * have been deleted (unlinked) by the time we get to them. Rather
                                409                 :              * than just hoping an ENOENT (or EACCES on Windows) error can be
                                410                 :              * ignored, what we do on error is absorb pending requests and
                                411                 :              * then retry. Since mdunlink() queues a "cancel" message before
                                412                 :              * actually unlinking, the fsync request is guaranteed to be
                                413                 :              * marked canceled after the absorb if it really was this case.
                                414                 :              * DROP DATABASE likewise has to tell us to forget fsync requests
                                415                 :              * before it starts deletions.
                                416                 :              */
                                417               0 :             for (failures = 0; !entry->canceled; failures++)
                                418                 :             {
                                419                 :                 char        path[MAXPGPATH];
                                420                 : 
                                421               0 :                 INSTR_TIME_SET_CURRENT(sync_start);
                                422               0 :                 if (syncsw[entry->tag.handler].sync_syncfiletag(&entry->tag,
                                423                 :                                                                 path) == 0)
                                424                 :                 {
                                425                 :                     /* Success; update statistics about sync timing */
                                426               0 :                     INSTR_TIME_SET_CURRENT(sync_end);
                                427               0 :                     sync_diff = sync_end;
                                428               0 :                     INSTR_TIME_SUBTRACT(sync_diff, sync_start);
                                429               0 :                     elapsed = INSTR_TIME_GET_MICROSEC(sync_diff);
                                430               0 :                     if (elapsed > longest)
                                431               0 :                         longest = elapsed;
                                432               0 :                     total_elapsed += elapsed;
                                433               0 :                     processed++;
                                434                 : 
                                435               0 :                     if (log_checkpoints)
 1042 peter                     436               0 :                         elog(DEBUG1, "checkpoint sync: number=%d file=%s time=%.3f ms",
                                437                 :                              processed,
                                438                 :                              path,
                                439                 :                              (double) elapsed / 1000);
                                440                 : 
 1060 heikki.linnakangas        441               0 :                     break;      /* out of retry loop */
                                442                 :                 }
                                443                 : 
                                444                 :                 /*
                                445                 :                  * It is possible that the relation has been dropped or
                                446                 :                  * truncated since the fsync request was entered. Therefore,
                                447                 :                  * allow ENOENT, but only if we didn't fail already on this
                                448                 :                  * file.
                                449                 :                  */
                                450               0 :                 if (!FILE_POSSIBLY_DELETED(errno) || failures > 0)
                                451               0 :                     ereport(data_sync_elevel(ERROR),
                                452                 :                             (errcode_for_file_access(),
                                453                 :                              errmsg("could not fsync file \"%s\": %m",
                                454                 :                                     path)));
                                455                 :                 else
                                456               0 :                     ereport(DEBUG1,
                                457                 :                             (errcode_for_file_access(),
                                458                 :                              errmsg_internal("could not fsync file \"%s\" but retrying: %m",
                                459                 :                                              path)));
                                460                 : 
                                461                 :                 /*
                                462                 :                  * Absorb incoming requests and check to see if a cancel
                                463                 :                  * arrived for this relation fork.
                                464                 :                  */
                                465               0 :                 AbsorbSyncRequests();
                                466               0 :                 absorb_counter = FSYNCS_PER_ABSORB; /* might as well... */
                                467                 :             }                   /* end retry loop */
                                468                 :         }
                                469                 : 
                                470                 :         /* We are done with this entry, remove it */
 1466 tmunro                    471 CBC      139075 :         if (hash_search(pendingOps, &entry->tag, HASH_REMOVE, NULL) == NULL)
 1466 tmunro                    472 UBC           0 :             elog(ERROR, "pendingOps corrupted");
                                473                 :     }                           /* end loop over hashtable entries */
                                474                 : 
                                475                 :     /* Return sync performance metrics for report at checkpoint end */
 1466 tmunro                    476 CBC        2363 :     CheckpointStats.ckpt_sync_rels = processed;
                                477            2363 :     CheckpointStats.ckpt_longest_sync = longest;
                                478            2363 :     CheckpointStats.ckpt_agg_sync_time = total_elapsed;
                                479                 : 
                                480                 :     /* Flag successful completion of ProcessSyncRequests */
                                481            2363 :     sync_in_progress = false;
                                482            2363 : }
                                483                 : 
                                484                 : /*
                                485                 :  * RememberSyncRequest() -- callback from checkpointer side of sync request
                                486                 :  *
                                487                 :  * We stuff fsync requests into the local hash table for execution
                                488                 :  * during the checkpointer's next checkpoint.  UNLINK requests go into a
                                489                 :  * separate linked list, however, because they get processed separately.
                                490                 :  *
                                491                 :  * See sync.h for more information on the types of sync requests supported.
                                492                 :  */
                                493                 : void
                                494         1130751 : RememberSyncRequest(const FileTag *ftag, SyncRequestType type)
                                495                 : {
                                496         1130751 :     Assert(pendingOps);
                                497                 : 
                                498         1130751 :     if (type == SYNC_FORGET_REQUEST)
                                499                 :     {
                                500                 :         PendingFsyncEntry *entry;
                                501                 : 
                                502                 :         /* Cancel previously entered request */
                                503          106566 :         entry = (PendingFsyncEntry *) hash_search(pendingOps,
                                504                 :                                                   ftag,
                                505                 :                                                   HASH_FIND,
                                506                 :                                                   NULL);
                                507          106566 :         if (entry != NULL)
                                508            4783 :             entry->canceled = true;
                                509                 :     }
                                510         1024185 :     else if (type == SYNC_FILTER_REQUEST)
                                511                 :     {
                                512                 :         HASH_SEQ_STATUS hstat;
                                513                 :         PendingFsyncEntry *pfe;
                                514                 :         ListCell   *cell;
                                515                 : 
                                516                 :         /* Cancel matching fsync requests */
                                517              13 :         hash_seq_init(&hstat, pendingOps);
  271 tmunro                    518 GNC        4356 :         while ((pfe = (PendingFsyncEntry *) hash_seq_search(&hstat)) != NULL)
                                519                 :         {
                                520            8658 :             if (pfe->tag.handler == ftag->handler &&
                                521            4328 :                 syncsw[ftag->handler].sync_filetagmatches(ftag, &pfe->tag))
                                522            2524 :                 pfe->canceled = true;
                                523                 :         }
                                524                 : 
                                525                 :         /* Cancel matching unlink requests */
 1364 tgl                       526 CBC          14 :         foreach(cell, pendingUnlinks)
                                527                 :         {
  271 tmunro                    528 GNC           1 :             PendingUnlinkEntry *pue = (PendingUnlinkEntry *) lfirst(cell);
                                529                 : 
                                530               2 :             if (pue->tag.handler == ftag->handler &&
                                531               1 :                 syncsw[ftag->handler].sync_filetagmatches(ftag, &pue->tag))
                                532               1 :                 pue->canceled = true;
                                533                 :         }
                                534                 :     }
 1466 tmunro                    535 CBC     1024172 :     else if (type == SYNC_UNLINK_REQUEST)
                                536                 :     {
                                537                 :         /* Unlink request: put it in the linked list */
                                538           27429 :         MemoryContext oldcxt = MemoryContextSwitchTo(pendingOpsCxt);
                                539                 :         PendingUnlinkEntry *entry;
                                540                 : 
                                541           27429 :         entry = palloc(sizeof(PendingUnlinkEntry));
                                542           27429 :         entry->tag = *ftag;
                                543           27429 :         entry->cycle_ctr = checkpoint_cycle_ctr;
  523 tgl                       544           27429 :         entry->canceled = false;
                                545                 : 
 1466 tmunro                    546           27429 :         pendingUnlinks = lappend(pendingUnlinks, entry);
                                547                 : 
                                548           27429 :         MemoryContextSwitchTo(oldcxt);
                                549                 :     }
                                550                 :     else
                                551                 :     {
                                552                 :         /* Normal case: enter a request to fsync this segment */
                                553          996743 :         MemoryContext oldcxt = MemoryContextSwitchTo(pendingOpsCxt);
                                554                 :         PendingFsyncEntry *entry;
                                555                 :         bool        found;
                                556                 : 
                                557          996743 :         Assert(type == SYNC_REQUEST);
                                558                 : 
                                559          996743 :         entry = (PendingFsyncEntry *) hash_search(pendingOps,
                                560                 :                                                   ftag,
                                561                 :                                                   HASH_ENTER,
                                562                 :                                                   &found);
                                563                 :         /* if new entry, or was previously canceled, initialize it */
  926                           564          996743 :         if (!found || entry->canceled)
                                565                 :         {
 1466                           566          140378 :             entry->cycle_ctr = sync_cycle_ctr;
                                567          140378 :             entry->canceled = false;
                                568                 :         }
                                569                 : 
                                570                 :         /*
                                571                 :          * NB: it's intentional that we don't change cycle_ctr if the entry
                                572                 :          * already exists.  The cycle_ctr must represent the oldest fsync
                                573                 :          * request that could be in the entry.
                                574                 :          */
                                575                 : 
                                576          996743 :         MemoryContextSwitchTo(oldcxt);
                                577                 :     }
                                578         1130751 : }
                                579                 : 
                                580                 : /*
                                581                 :  * Register the sync request locally, or forward it to the checkpointer.
                                582                 :  *
                                583                 :  * If retryOnError is true, we'll keep trying if there is no space in the
                                584                 :  * queue.  Return true if we succeeded, or false if there wasn't space.
                                585                 :  */
                                586                 : bool
                                587         1200898 : RegisterSyncRequest(const FileTag *ftag, SyncRequestType type,
                                588                 :                     bool retryOnError)
                                589                 : {
                                590                 :     bool        ret;
                                591                 : 
                                592         1200898 :     if (pendingOps != NULL)
                                593                 :     {
                                594                 :         /* standalone backend or startup process: fsync state is local */
                                595          664352 :         RememberSyncRequest(ftag, type);
                                596          664352 :         return true;
                                597                 :     }
                                598                 : 
                                599                 :     for (;;)
                                600                 :     {
                                601                 :         /*
                                602                 :          * Notify the checkpointer about it.  If we fail to queue a message in
                                603                 :          * retryOnError mode, we have to sleep and try again ... ugly, but
                                604                 :          * hopefully won't happen often.
                                605                 :          *
                                606                 :          * XXX should we CHECK_FOR_INTERRUPTS in this loop?  Escaping with an
                                607                 :          * error in the case of SYNC_UNLINK_REQUEST would leave the
                                608                 :          * no-longer-used file still present on disk, which would be bad, so
                                609                 :          * I'm inclined to assume that the checkpointer will always empty the
                                610                 :          * queue soon.
                                611                 :          */
                                612          536554 :         ret = ForwardSyncRequest(ftag, type);
                                613                 : 
                                614                 :         /*
                                615                 :          * If we are successful in queueing the request, or we failed and were
                                616                 :          * instructed not to retry on error, break.
                                617                 :          */
                                618          536554 :         if (ret || (!ret && !retryOnError))
                                619                 :             break;
                                620                 : 
  389                           621               8 :         WaitLatch(NULL, WL_EXIT_ON_PM_DEATH | WL_TIMEOUT, 10,
                                622                 :                   WAIT_EVENT_REGISTER_SYNC_REQUEST);
                                623                 :     }
                                624                 : 
 1466                           625          536546 :     return ret;
                                626                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a