LCOV - differential code coverage report
Current view: top level - src/backend/access/common - syncscan.c (source / functions) Coverage Total Hit LBC UBC GIC GNC CBC EUB ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 94.6 % 56 53 2 1 16 8 29 2 16 6
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 5 5 2 2 1 2 1
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * syncscan.c
       4                 :  *    scan synchronization support
       5                 :  *
       6                 :  * When multiple backends run a sequential scan on the same table, we try
       7                 :  * to keep them synchronized to reduce the overall I/O needed.  The goal is
       8                 :  * to read each page into shared buffer cache only once, and let all backends
       9                 :  * that take part in the shared scan process the page before it falls out of
      10                 :  * the cache.
      11                 :  *
      12                 :  * Since the "leader" in a pack of backends doing a seqscan will have to wait
      13                 :  * for I/O, while the "followers" don't, there is a strong self-synchronizing
      14                 :  * effect once we can get the backends examining approximately the same part
      15                 :  * of the table at the same time.  Hence all that is really needed is to get
      16                 :  * a new backend beginning a seqscan to begin it close to where other backends
      17                 :  * are reading.  We can scan the table circularly, from block X up to the
      18                 :  * end and then from block 0 to X-1, to ensure we visit all rows while still
      19                 :  * participating in the common scan.
      20                 :  *
      21                 :  * To accomplish that, we keep track of the scan position of each table, and
      22                 :  * start new scans close to where the previous scan(s) are.  We don't try to
      23                 :  * do any extra synchronization to keep the scans together afterwards; some
      24                 :  * scans might progress much more slowly than others, for example if the
      25                 :  * results need to be transferred to the client over a slow network, and we
      26                 :  * don't want such queries to slow down others.
      27                 :  *
      28                 :  * There can realistically only be a few large sequential scans on different
      29                 :  * tables in progress at any time.  Therefore we just keep the scan positions
      30                 :  * in a small LRU list which we scan every time we need to look up or update a
      31                 :  * scan position.  The whole mechanism is only applied for tables exceeding
      32                 :  * a threshold size (but that is not the concern of this module).
      33                 :  *
      34                 :  * INTERFACE ROUTINES
      35                 :  *      ss_get_location     - return current scan location of a relation
      36                 :  *      ss_report_location  - update current scan location
      37                 :  *
      38                 :  *
      39                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      40                 :  * Portions Copyright (c) 1994, Regents of the University of California
      41                 :  *
      42                 :  * IDENTIFICATION
      43                 :  *    src/backend/access/common/syncscan.c
      44                 :  *
      45                 :  *-------------------------------------------------------------------------
      46                 :  */
      47                 : #include "postgres.h"
      48                 : 
      49                 : #include "access/syncscan.h"
      50                 : #include "miscadmin.h"
      51                 : #include "storage/lwlock.h"
      52                 : #include "storage/shmem.h"
      53                 : #include "utils/rel.h"
      54                 : 
      55                 : 
      56                 : /* GUC variables */
      57                 : #ifdef TRACE_SYNCSCAN
      58                 : bool        trace_syncscan = false;
      59                 : #endif
      60                 : 
      61                 : 
      62                 : /*
      63                 :  * Size of the LRU list.
      64                 :  *
      65                 :  * Note: the code assumes that SYNC_SCAN_NELEM > 1.
      66                 :  *
      67                 :  * XXX: What's a good value? It should be large enough to hold the
      68                 :  * maximum number of large tables scanned simultaneously.  But a larger value
      69                 :  * means more traversing of the LRU list when starting a new scan.
      70                 :  */
      71                 : #define SYNC_SCAN_NELEM 20
      72                 : 
      73                 : /*
      74                 :  * Interval between reports of the location of the current scan, in pages.
      75                 :  *
      76                 :  * Note: This should be smaller than the ring size (see buffer/freelist.c)
      77                 :  * we use for bulk reads.  Otherwise a scan joining other scans might start
      78                 :  * from a page that's no longer in the buffer cache.  This is a bit fuzzy;
      79                 :  * there's no guarantee that the new scan will read the page before it leaves
      80                 :  * the buffer cache anyway, and on the other hand the page is most likely
      81                 :  * still in the OS cache.
      82                 :  */
      83                 : #define SYNC_SCAN_REPORT_INTERVAL (128 * 1024 / BLCKSZ)
      84                 : 
      85                 : 
      86                 : /*
      87                 :  * The scan locations structure is essentially a doubly-linked LRU with head
      88                 :  * and tail pointer, but designed to hold a fixed maximum number of elements in
      89                 :  * fixed-size shared memory.
      90                 :  */
      91                 : typedef struct ss_scan_location_t
      92                 : {
      93                 :     RelFileLocator relfilelocator;  /* identity of a relation */
      94                 :     BlockNumber location;       /* last-reported location in the relation */
      95                 : } ss_scan_location_t;
      96                 : 
      97                 : typedef struct ss_lru_item_t
      98                 : {
      99                 :     struct ss_lru_item_t *prev;
     100                 :     struct ss_lru_item_t *next;
     101                 :     ss_scan_location_t location;
     102                 : } ss_lru_item_t;
     103                 : 
     104                 : typedef struct ss_scan_locations_t
     105                 : {
     106                 :     ss_lru_item_t *head;
     107                 :     ss_lru_item_t *tail;
     108                 :     ss_lru_item_t items[FLEXIBLE_ARRAY_MEMBER]; /* SYNC_SCAN_NELEM items */
     109                 : } ss_scan_locations_t;
     110                 : 
     111                 : #define SizeOfScanLocations(N) \
     112                 :     (offsetof(ss_scan_locations_t, items) + (N) * sizeof(ss_lru_item_t))
     113                 : 
     114                 : /* Pointer to struct in shared memory */
     115                 : static ss_scan_locations_t *scan_locations;
     116                 : 
     117                 : /* prototypes for internal functions */
     118                 : static BlockNumber ss_search(RelFileLocator relfilelocator,
     119                 :                              BlockNumber location, bool set);
     120                 : 
     121                 : 
     122                 : /*
     123                 :  * SyncScanShmemSize --- report amount of shared memory space needed
     124                 :  */
     125                 : Size
     126 CBC        2738 : SyncScanShmemSize(void)
     127                 : {
     128            2738 :     return SizeOfScanLocations(SYNC_SCAN_NELEM);
     129                 : }
     130                 : 
     131                 : /*
     132                 :  * SyncScanShmemInit --- initialize this module's shared memory
     133                 :  */
     134                 : void
     135            1826 : SyncScanShmemInit(void)
     136                 : {
     137                 :     int         i;
     138                 :     bool        found;
     139                 : 
     140            1826 :     scan_locations = (ss_scan_locations_t *)
     141            1826 :         ShmemInitStruct("Sync Scan Locations List",
     142                 :                         SizeOfScanLocations(SYNC_SCAN_NELEM),
     143                 :                         &found);
     144                 : 
     145            1826 :     if (!IsUnderPostmaster)
     146                 :     {
     147                 :         /* Initialize shared memory area */
     148            1826 :         Assert(!found);
     149                 : 
     150            1826 :         scan_locations->head = &scan_locations->items[0];
     151            1826 :         scan_locations->tail = &scan_locations->items[SYNC_SCAN_NELEM - 1];
     152                 : 
     153           38346 :         for (i = 0; i < SYNC_SCAN_NELEM; i++)
     154                 :         {
     155           36520 :             ss_lru_item_t *item = &scan_locations->items[i];
     156                 : 
     157                 :             /*
     158                 :              * Initialize all slots with invalid values. As scans are started,
     159                 :              * these invalid entries will fall off the LRU list and get
     160                 :              * replaced with real entries.
     161                 :              */
     162 GNC       36520 :             item->location.relfilelocator.spcOid = InvalidOid;
     163           36520 :             item->location.relfilelocator.dbOid = InvalidOid;
     164           36520 :             item->location.relfilelocator.relNumber = InvalidRelFileNumber;
     165 CBC       36520 :             item->location.location = InvalidBlockNumber;
     166                 : 
     167           36520 :             item->prev = (i > 0) ?
     168           36520 :                 (&scan_locations->items[i - 1]) : NULL;
     169           36520 :             item->next = (i < SYNC_SCAN_NELEM - 1) ?
     170           36520 :                 (&scan_locations->items[i + 1]) : NULL;
     171                 :         }
     172                 :     }
     173                 :     else
     174 UBC           0 :         Assert(found);
     175 CBC        1826 : }
     176                 : 
     177                 : /*
     178                 :  * ss_search --- search the scan_locations structure for an entry with the
     179                 :  *      given relfilelocator.
     180                 :  *
     181                 :  * If "set" is true, the location is updated to the given location.  If no
     182                 :  * entry for the given relfilelocator is found, it will be created at the head
     183                 :  * of the list with the given location, even if "set" is false.
     184                 :  *
     185                 :  * In any case, the location after possible update is returned.
     186                 :  *
     187                 :  * Caller is responsible for having acquired suitable lock on the shared
     188                 :  * data structure.
     189                 :  */
     190                 : static BlockNumber
     191 GNC        1408 : ss_search(RelFileLocator relfilelocator, BlockNumber location, bool set)
     192                 : {
     193                 :     ss_lru_item_t *item;
     194                 : 
     195 CBC        1408 :     item = scan_locations->head;
     196                 :     for (;;)
     197             437 :     {
     198                 :         bool        match;
     199                 : 
     200 GNC        1845 :         match = RelFileLocatorEquals(item->location.relfilelocator,
     201                 :                                      relfilelocator);
     202                 : 
     203 GIC        1845 :         if (match || item->next == NULL)
     204 ECB             :         {
     205                 :             /*
     206                 :              * If we reached the end of list and no match was found, take over
     207                 :              * the last entry
     208                 :              */
     209 GIC        1408 :             if (!match)
     210 ECB             :             {
     211 GNC          23 :                 item->location.relfilelocator = relfilelocator;
     212 CBC          23 :                 item->location.location = location;
     213 ECB             :             }
     214 GIC        1385 :             else if (set)
     215 CBC        1344 :                 item->location.location = location;
     216 ECB             : 
     217                 :             /* Move the entry to the front of the LRU list */
     218 GIC        1408 :             if (item != scan_locations->head)
     219 ECB             :             {
     220                 :                 /* unlink */
     221 GIC          23 :                 if (item == scan_locations->tail)
     222 CBC          23 :                     scan_locations->tail = item->prev;
     223              23 :                 item->prev->next = item->next;
     224              23 :                 if (item->next)
     225 LBC           0 :                     item->next->prev = item->prev;
     226 EUB             : 
     227                 :                 /* link */
     228 GIC          23 :                 item->prev = NULL;
     229 CBC          23 :                 item->next = scan_locations->head;
     230              23 :                 scan_locations->head->prev = item;
     231              23 :                 scan_locations->head = item;
     232 ECB             :             }
     233                 : 
     234 GIC        1408 :             return item->location.location;
     235 ECB             :         }
     236                 : 
     237 GIC         437 :         item = item->next;
     238 ECB             :     }
     239                 : 
     240                 :     /* not reached */
     241                 : }
     242                 : 
     243                 : /*
     244                 :  * ss_get_location --- get the optimal starting location for scan
     245                 :  *
     246                 :  * Returns the last-reported location of a sequential scan on the
     247                 :  * relation, or 0 if no valid location is found.
     248                 :  *
     249                 :  * We expect the caller has just done RelationGetNumberOfBlocks(), and
     250                 :  * so that number is passed in rather than computing it again.  The result
     251                 :  * is guaranteed less than relnblocks (assuming that's > 0).
     252                 :  */
     253                 : BlockNumber
     254 GIC          64 : ss_get_location(Relation rel, BlockNumber relnblocks)
     255 ECB             : {
     256                 :     BlockNumber startloc;
     257                 : 
     258 GIC          64 :     LWLockAcquire(SyncScanLock, LW_EXCLUSIVE);
     259 GNC          64 :     startloc = ss_search(rel->rd_locator, 0, false);
     260 CBC          64 :     LWLockRelease(SyncScanLock);
     261 ECB             : 
     262                 :     /*
     263                 :      * If the location is not a valid block number for this scan, start at 0.
     264                 :      *
     265                 :      * This can happen if for instance a VACUUM truncated the table since the
     266                 :      * location was saved.
     267                 :      */
     268 GIC          64 :     if (startloc >= relnblocks)
     269 LBC           0 :         startloc = 0;
     270 EUB             : 
     271                 : #ifdef TRACE_SYNCSCAN
     272                 :     if (trace_syncscan)
     273                 :         elog(LOG,
     274                 :              "SYNC_SCAN: start \"%s\" (size %u) at %u",
     275                 :              RelationGetRelationName(rel), relnblocks, startloc);
     276                 : #endif
     277                 : 
     278 GIC          64 :     return startloc;
     279 ECB             : }
     280                 : 
     281                 : /*
     282                 :  * ss_report_location --- update the current scan location
     283                 :  *
     284                 :  * Writes an entry into the shared Sync Scan state of the form
     285                 :  * (relfilelocator, blocknumber), overwriting any existing entry for the
     286                 :  * same relfilelocator.
     287                 :  */
     288                 : void
     289 GIC       22013 : ss_report_location(Relation rel, BlockNumber location)
     290 ECB             : {
     291                 : #ifdef TRACE_SYNCSCAN
     292                 :     if (trace_syncscan)
     293                 :     {
     294                 :         if ((location % 1024) == 0)
     295                 :             elog(LOG,
     296                 :                  "SYNC_SCAN: scanning \"%s\" at %u",
     297                 :                  RelationGetRelationName(rel), location);
     298                 :     }
     299                 : #endif
     300                 : 
     301                 :     /*
     302                 :      * To reduce lock contention, only report scan progress every N pages. For
     303                 :      * the same reason, don't block if the lock isn't immediately available.
     304                 :      * Missing a few updates isn't critical, it just means that a new scan
     305                 :      * that wants to join the pack will start a little bit behind the head of
     306                 :      * the scan.  Hopefully the pages are still in OS cache and the scan
     307                 :      * catches up quickly.
     308                 :      */
     309 GIC       22013 :     if ((location % SYNC_SCAN_REPORT_INTERVAL) == 0)
     310 ECB             :     {
     311 GIC        1344 :         if (LWLockConditionalAcquire(SyncScanLock, LW_EXCLUSIVE))
     312 ECB             :         {
     313 GNC        1344 :             (void) ss_search(rel->rd_locator, location, true);
     314 CBC        1344 :             LWLockRelease(SyncScanLock);
     315 ECB             :         }
     316                 : #ifdef TRACE_SYNCSCAN
     317                 :         else if (trace_syncscan)
     318                 :             elog(LOG,
     319                 :                  "SYNC_SCAN: missed update for \"%s\" at %u",
     320                 :                  RelationGetRelationName(rel), location);
     321                 : #endif
     322                 :     }
     323 GIC       22013 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a