LCOV - differential code coverage report
Current view: top level - src/backend/storage/freespace - freespace.c (source / functions) Coverage Total Hit LBC UIC UBC GBC GIC GNC CBC EUB ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 93.9 % 213 200 1 4 8 2 61 5 132 3 47 20
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 21 21 4 2 15 2 3
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * freespace.c
       4                 :  *    POSTGRES free space map for quickly finding free space in relations
       5                 :  *
       6                 :  *
       7                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       8                 :  * Portions Copyright (c) 1994, Regents of the University of California
       9                 :  *
      10                 :  * IDENTIFICATION
      11                 :  *    src/backend/storage/freespace/freespace.c
      12                 :  *
      13                 :  *
      14                 :  * NOTES:
      15                 :  *
      16                 :  *  Free Space Map keeps track of the amount of free space on pages, and
      17                 :  *  allows quickly searching for a page with enough free space. The FSM is
      18                 :  *  stored in a dedicated relation fork of all heap relations, and those
      19                 :  *  index access methods that need it (see also indexfsm.c). See README for
      20                 :  *  more information.
      21                 :  *
      22                 :  *-------------------------------------------------------------------------
      23                 :  */
      24                 : #include "postgres.h"
      25                 : 
      26                 : #include "access/htup_details.h"
      27                 : #include "access/xloginsert.h"
      28                 : #include "access/xlogutils.h"
      29                 : #include "miscadmin.h"
      30                 : #include "storage/freespace.h"
      31                 : #include "storage/fsm_internals.h"
      32                 : #include "storage/lmgr.h"
      33                 : #include "storage/smgr.h"
      34                 : 
      35                 : 
      36                 : /*
      37                 :  * We use just one byte to store the amount of free space on a page, so we
      38                 :  * divide the amount of free space a page can have into 256 different
      39                 :  * categories. The highest category, 255, represents a page with at least
      40                 :  * MaxFSMRequestSize bytes of free space, and the second highest category
      41                 :  * represents the range from 254 * FSM_CAT_STEP, inclusive, to
      42                 :  * MaxFSMRequestSize, exclusive.
      43                 :  *
      44                 :  * MaxFSMRequestSize depends on the architecture and BLCKSZ, but assuming
      45                 :  * default 8k BLCKSZ, and that MaxFSMRequestSize is 8164 bytes, the
      46                 :  * categories look like this:
      47                 :  *
      48                 :  *
      49                 :  * Range     Category
      50                 :  * 0    - 31   0
      51                 :  * 32   - 63   1
      52                 :  * ...    ...  ...
      53                 :  * 8096 - 8127 253
      54                 :  * 8128 - 8163 254
      55                 :  * 8164 - 8192 255
      56                 :  *
      57                 :  * The reason that MaxFSMRequestSize is special is that if MaxFSMRequestSize
      58                 :  * isn't equal to a range boundary, a page with exactly MaxFSMRequestSize
      59                 :  * bytes of free space wouldn't satisfy a request for MaxFSMRequestSize
      60                 :  * bytes. If there isn't more than MaxFSMRequestSize bytes of free space on a
      61                 :  * completely empty page, that would mean that we could never satisfy a
      62                 :  * request of exactly MaxFSMRequestSize bytes.
      63                 :  */
      64                 : #define FSM_CATEGORIES  256
      65                 : #define FSM_CAT_STEP    (BLCKSZ / FSM_CATEGORIES)
      66                 : #define MaxFSMRequestSize   MaxHeapTupleSize
      67                 : 
      68                 : /*
      69                 :  * Depth of the on-disk tree. We need to be able to address 2^32-1 blocks,
      70                 :  * and 1626 is the smallest number that satisfies X^3 >= 2^32-1. Likewise,
      71                 :  * 256 is the smallest number that satisfies X^4 >= 2^32-1. In practice,
      72                 :  * this means that 4096 bytes is the smallest BLCKSZ that we can get away
      73                 :  * with a 3-level tree, and 512 is the smallest we support.
      74                 :  */
      75                 : #define FSM_TREE_DEPTH  ((SlotsPerFSMPage >= 1626) ? 3 : 4)
      76                 : 
      77                 : #define FSM_ROOT_LEVEL  (FSM_TREE_DEPTH - 1)
      78                 : #define FSM_BOTTOM_LEVEL 0
      79                 : 
      80                 : /*
      81                 :  * The internal FSM routines work on a logical addressing scheme. Each
      82                 :  * level of the tree can be thought of as a separately addressable file.
      83                 :  */
      84                 : typedef struct
      85                 : {
      86                 :     int         level;          /* level */
      87                 :     int         logpageno;      /* page number within the level */
      88                 : } FSMAddress;
      89                 : 
      90                 : /* Address of the root page. */
      91                 : static const FSMAddress FSM_ROOT_ADDRESS = {FSM_ROOT_LEVEL, 0};
      92                 : 
      93                 : /* functions to navigate the tree */
      94                 : static FSMAddress fsm_get_child(FSMAddress parent, uint16 slot);
      95                 : static FSMAddress fsm_get_parent(FSMAddress child, uint16 *slot);
      96                 : static FSMAddress fsm_get_location(BlockNumber heapblk, uint16 *slot);
      97                 : static BlockNumber fsm_get_heap_blk(FSMAddress addr, uint16 slot);
      98                 : static BlockNumber fsm_logical_to_physical(FSMAddress addr);
      99                 : 
     100                 : static Buffer fsm_readbuf(Relation rel, FSMAddress addr, bool extend);
     101                 : static Buffer fsm_extend(Relation rel, BlockNumber fsm_nblocks);
     102                 : 
     103                 : /* functions to convert amount of free space to a FSM category */
     104                 : static uint8 fsm_space_avail_to_cat(Size avail);
     105                 : static uint8 fsm_space_needed_to_cat(Size needed);
     106                 : static Size fsm_space_cat_to_avail(uint8 cat);
     107                 : 
     108                 : /* workhorse functions for various operations */
     109                 : static int  fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
     110                 :                                uint8 newValue, uint8 minValue);
     111                 : static BlockNumber fsm_search(Relation rel, uint8 min_cat);
     112                 : static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr,
     113                 :                              BlockNumber start, BlockNumber end,
     114                 :                              bool *eof_p);
     115                 : 
     116                 : 
     117                 : /******** Public API ********/
     118                 : 
     119                 : /*
     120                 :  * GetPageWithFreeSpace - try to find a page in the given relation with
     121                 :  *      at least the specified amount of free space.
     122                 :  *
     123                 :  * If successful, return the block number; if not, return InvalidBlockNumber.
     124                 :  *
     125                 :  * The caller must be prepared for the possibility that the returned page
     126                 :  * will turn out to have too little space available by the time the caller
     127                 :  * gets a lock on it.  In that case, the caller should report the actual
     128                 :  * amount of free space available on that page and then try again (see
     129                 :  * RecordAndGetPageWithFreeSpace).  If InvalidBlockNumber is returned,
     130                 :  * extend the relation.
     131                 :  */
     132                 : BlockNumber
     133 CBC      177585 : GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
     134                 : {
     135          177585 :     uint8       min_cat = fsm_space_needed_to_cat(spaceNeeded);
     136                 : 
     137          177585 :     return fsm_search(rel, min_cat);
     138                 : }
     139                 : 
     140                 : /*
     141                 :  * RecordAndGetPageWithFreeSpace - update info about a page and try again.
     142                 :  *
     143                 :  * We provide this combo form to save some locking overhead, compared to
     144                 :  * separate RecordPageWithFreeSpace + GetPageWithFreeSpace calls. There's
     145                 :  * also some effort to return a page close to the old page; if there's a
     146                 :  * page with enough free space on the same FSM page where the old one page
     147                 :  * is located, it is preferred.
     148                 :  */
     149                 : BlockNumber
     150          209060 : RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage,
     151                 :                               Size oldSpaceAvail, Size spaceNeeded)
     152                 : {
     153          209060 :     int         old_cat = fsm_space_avail_to_cat(oldSpaceAvail);
     154          209060 :     int         search_cat = fsm_space_needed_to_cat(spaceNeeded);
     155                 :     FSMAddress  addr;
     156                 :     uint16      slot;
     157                 :     int         search_slot;
     158                 : 
     159                 :     /* Get the location of the FSM byte representing the heap block */
     160          209060 :     addr = fsm_get_location(oldPage, &slot);
     161                 : 
     162          209060 :     search_slot = fsm_set_and_search(rel, addr, slot, old_cat, search_cat);
     163                 : 
     164                 :     /*
     165                 :      * If fsm_set_and_search found a suitable new block, return that.
     166                 :      * Otherwise, search as usual.
     167                 :      */
     168          209060 :     if (search_slot != -1)
     169           26430 :         return fsm_get_heap_blk(addr, search_slot);
     170                 :     else
     171          182630 :         return fsm_search(rel, search_cat);
     172                 : }
     173                 : 
     174                 : /*
     175                 :  * RecordPageWithFreeSpace - update info about a page.
     176                 :  *
     177                 :  * Note that if the new spaceAvail value is higher than the old value stored
     178                 :  * in the FSM, the space might not become visible to searchers until the next
     179                 :  * FreeSpaceMapVacuum call, which updates the upper level pages.
     180                 :  */
     181                 : void
     182          177963 : RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
     183                 : {
     184          177963 :     int         new_cat = fsm_space_avail_to_cat(spaceAvail);
     185                 :     FSMAddress  addr;
     186                 :     uint16      slot;
     187                 : 
     188                 :     /* Get the location of the FSM byte representing the heap block */
     189          177963 :     addr = fsm_get_location(heapBlk, &slot);
     190                 : 
     191          177963 :     fsm_set_and_search(rel, addr, slot, new_cat, 0);
     192          177963 : }
     193                 : 
     194                 : /*
     195                 :  * XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in
     196                 :  *      WAL replay
     197                 :  */
     198                 : void
     199 GNC      271448 : XLogRecordPageWithFreeSpace(RelFileLocator rlocator, BlockNumber heapBlk,
     200                 :                             Size spaceAvail)
     201                 : {
     202 CBC      271448 :     int         new_cat = fsm_space_avail_to_cat(spaceAvail);
     203                 :     FSMAddress  addr;
     204                 :     uint16      slot;
     205                 :     BlockNumber blkno;
     206                 :     Buffer      buf;
     207                 :     Page        page;
     208                 : 
     209                 :     /* Get the location of the FSM byte representing the heap block */
     210          271448 :     addr = fsm_get_location(heapBlk, &slot);
     211          271448 :     blkno = fsm_logical_to_physical(addr);
     212                 : 
     213                 :     /* If the page doesn't exist already, extend */
     214 GNC      271448 :     buf = XLogReadBufferExtended(rlocator, FSM_FORKNUM, blkno,
     215                 :                                  RBM_ZERO_ON_ERROR, InvalidBuffer);
     216 CBC      271448 :     LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     217                 : 
     218          271448 :     page = BufferGetPage(buf);
     219          271448 :     if (PageIsNew(page))
     220             408 :         PageInit(page, BLCKSZ, 0);
     221                 : 
     222          271448 :     if (fsm_set_avail(page, slot, new_cat))
     223          267733 :         MarkBufferDirtyHint(buf, false);
     224          271448 :     UnlockReleaseBuffer(buf);
     225          271448 : }
     226                 : 
     227                 : /*
     228                 :  * GetRecordedFreeSpace - return the amount of free space on a particular page,
     229                 :  *      according to the FSM.
     230                 :  */
     231                 : Size
     232              45 : GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
     233                 : {
     234                 :     FSMAddress  addr;
     235                 :     uint16      slot;
     236                 :     Buffer      buf;
     237                 :     uint8       cat;
     238                 : 
     239                 :     /* Get the location of the FSM byte representing the heap block */
     240              45 :     addr = fsm_get_location(heapBlk, &slot);
     241                 : 
     242              45 :     buf = fsm_readbuf(rel, addr, false);
     243              45 :     if (!BufferIsValid(buf))
     244              35 :         return 0;
     245              10 :     cat = fsm_get_avail(BufferGetPage(buf), slot);
     246              10 :     ReleaseBuffer(buf);
     247                 : 
     248              10 :     return fsm_space_cat_to_avail(cat);
     249                 : }
     250                 : 
     251                 : /*
     252                 :  * FreeSpaceMapPrepareTruncateRel - prepare for truncation of a relation.
     253                 :  *
     254                 :  * nblocks is the new size of the heap.
     255                 :  *
     256                 :  * Return the number of blocks of new FSM.
     257                 :  * If it's InvalidBlockNumber, there is nothing to truncate;
     258                 :  * otherwise the caller is responsible for calling smgrtruncate()
     259                 :  * to truncate the FSM pages, and FreeSpaceMapVacuumRange()
     260                 :  * to update upper-level pages in the FSM.
     261                 :  */
     262                 : BlockNumber
     263             116 : FreeSpaceMapPrepareTruncateRel(Relation rel, BlockNumber nblocks)
     264                 : {
     265                 :     BlockNumber new_nfsmblocks;
     266                 :     FSMAddress  first_removed_address;
     267                 :     uint16      first_removed_slot;
     268                 :     Buffer      buf;
     269                 : 
     270                 :     /*
     271                 :      * If no FSM has been created yet for this relation, there's nothing to
     272                 :      * truncate.
     273                 :      */
     274             116 :     if (!smgrexists(RelationGetSmgr(rel), FSM_FORKNUM))
     275 UBC           0 :         return InvalidBlockNumber;
     276                 : 
     277                 :     /* Get the location in the FSM of the first removed heap block */
     278 CBC         116 :     first_removed_address = fsm_get_location(nblocks, &first_removed_slot);
     279                 : 
     280                 :     /*
     281                 :      * Zero out the tail of the last remaining FSM page. If the slot
     282                 :      * representing the first removed heap block is at a page boundary, as the
     283                 :      * first slot on the FSM page that first_removed_address points to, we can
     284                 :      * just truncate that page altogether.
     285                 :      */
     286             116 :     if (first_removed_slot > 0)
     287                 :     {
     288              58 :         buf = fsm_readbuf(rel, first_removed_address, false);
     289              58 :         if (!BufferIsValid(buf))
     290 UBC           0 :             return InvalidBlockNumber;  /* nothing to do; the FSM was already
     291                 :                                          * smaller */
     292 CBC          58 :         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     293                 : 
     294                 :         /* NO EREPORT(ERROR) from here till changes are logged */
     295              58 :         START_CRIT_SECTION();
     296                 : 
     297              58 :         fsm_truncate_avail(BufferGetPage(buf), first_removed_slot);
     298                 : 
     299                 :         /*
     300                 :          * Truncation of a relation is WAL-logged at a higher-level, and we
     301                 :          * will be called at WAL replay. But if checksums are enabled, we need
     302                 :          * to still write a WAL record to protect against a torn page, if the
     303                 :          * page is flushed to disk before the truncation WAL record. We cannot
     304                 :          * use MarkBufferDirtyHint here, because that will not dirty the page
     305                 :          * during recovery.
     306                 :          */
     307              58 :         MarkBufferDirty(buf);
     308              58 :         if (!InRecovery && RelationNeedsWAL(rel) && XLogHintBitIsNeeded())
     309              15 :             log_newpage_buffer(buf, false);
     310                 : 
     311              58 :         END_CRIT_SECTION();
     312                 : 
     313              58 :         UnlockReleaseBuffer(buf);
     314                 : 
     315              58 :         new_nfsmblocks = fsm_logical_to_physical(first_removed_address) + 1;
     316                 :     }
     317                 :     else
     318                 :     {
     319              58 :         new_nfsmblocks = fsm_logical_to_physical(first_removed_address);
     320              58 :         if (smgrnblocks(RelationGetSmgr(rel), FSM_FORKNUM) <= new_nfsmblocks)
     321 UBC           0 :             return InvalidBlockNumber;  /* nothing to do; the FSM was already
     322                 :                                          * smaller */
     323                 :     }
     324                 : 
     325 CBC         116 :     return new_nfsmblocks;
     326                 : }
     327                 : 
     328                 : /*
     329                 :  * FreeSpaceMapVacuum - update upper-level pages in the rel's FSM
     330                 :  *
     331                 :  * We assume that the bottom-level pages have already been updated with
     332                 :  * new free-space information.
     333                 :  */
     334                 : void
     335             132 : FreeSpaceMapVacuum(Relation rel)
     336                 : {
     337                 :     bool        dummy;
     338                 : 
     339                 :     /* Recursively scan the tree, starting at the root */
     340             132 :     (void) fsm_vacuum_page(rel, FSM_ROOT_ADDRESS,
     341                 :                            (BlockNumber) 0, InvalidBlockNumber,
     342                 :                            &dummy);
     343             132 : }
     344                 : 
     345                 : /*
     346                 :  * FreeSpaceMapVacuumRange - update upper-level pages in the rel's FSM
     347                 :  *
     348                 :  * As above, but assume that only heap pages between start and end-1 inclusive
     349                 :  * have new free-space information, so update only the upper-level slots
     350                 :  * covering that block range.  end == InvalidBlockNumber is equivalent to
     351                 :  * "all the rest of the relation".
     352                 :  */
     353                 : void
     354           15454 : FreeSpaceMapVacuumRange(Relation rel, BlockNumber start, BlockNumber end)
     355                 : {
     356                 :     bool        dummy;
     357                 : 
     358                 :     /* Recursively scan the tree, starting at the root */
     359           15454 :     if (end > start)
     360           15343 :         (void) fsm_vacuum_page(rel, FSM_ROOT_ADDRESS, start, end, &dummy);
     361           15454 : }
     362                 : 
     363                 : /******** Internal routines ********/
     364                 : 
     365                 : /*
     366                 :  * Return category corresponding x bytes of free space
     367                 :  */
     368                 : static uint8
     369          658471 : fsm_space_avail_to_cat(Size avail)
     370                 : {
     371                 :     int         cat;
     372                 : 
     373          658471 :     Assert(avail < BLCKSZ);
     374                 : 
     375          658471 :     if (avail >= MaxFSMRequestSize)
     376           13013 :         return 255;
     377                 : 
     378          645458 :     cat = avail / FSM_CAT_STEP;
     379                 : 
     380                 :     /*
     381                 :      * The highest category, 255, is reserved for MaxFSMRequestSize bytes or
     382                 :      * more.
     383                 :      */
     384          645458 :     if (cat > 254)
     385 UBC           0 :         cat = 254;
     386                 : 
     387 CBC      645458 :     return (uint8) cat;
     388                 : }
     389                 : 
     390                 : /*
     391                 :  * Return the lower bound of the range of free space represented by given
     392                 :  * category.
     393                 :  */
     394                 : static Size
     395              10 : fsm_space_cat_to_avail(uint8 cat)
     396                 : {
     397                 :     /* The highest category represents exactly MaxFSMRequestSize bytes. */
     398              10 :     if (cat == 255)
     399 UBC           0 :         return MaxFSMRequestSize;
     400                 :     else
     401 CBC          10 :         return cat * FSM_CAT_STEP;
     402                 : }
     403                 : 
     404                 : /*
     405                 :  * Which category does a page need to have, to accommodate x bytes of data?
     406                 :  * While fsm_space_avail_to_cat() rounds down, this needs to round up.
     407                 :  */
     408                 : static uint8
     409          386645 : fsm_space_needed_to_cat(Size needed)
     410                 : {
     411                 :     int         cat;
     412                 : 
     413                 :     /* Can't ask for more space than the highest category represents */
     414          386645 :     if (needed > MaxFSMRequestSize)
     415 UBC           0 :         elog(ERROR, "invalid FSM request size %zu", needed);
     416                 : 
     417 CBC      386645 :     if (needed == 0)
     418 UBC           0 :         return 1;
     419                 : 
     420 CBC      386645 :     cat = (needed + FSM_CAT_STEP - 1) / FSM_CAT_STEP;
     421                 : 
     422          386645 :     if (cat > 255)
     423 UBC           0 :         cat = 255;
     424                 : 
     425 CBC      386645 :     return (uint8) cat;
     426                 : }
     427                 : 
     428                 : /*
     429                 :  * Returns the physical block number of a FSM page
     430                 :  */
     431                 : static BlockNumber
     432         1091767 : fsm_logical_to_physical(FSMAddress addr)
     433                 : {
     434                 :     BlockNumber pages;
     435                 :     int         leafno;
     436                 :     int         l;
     437                 : 
     438                 :     /*
     439                 :      * Calculate the logical page number of the first leaf page below the
     440                 :      * given page.
     441                 :      */
     442         1091767 :     leafno = addr.logpageno;
     443         1876099 :     for (l = 0; l < addr.level; l++)
     444          784332 :         leafno *= SlotsPerFSMPage;
     445                 : 
     446                 :     /* Count upper level nodes required to address the leaf page */
     447         1091767 :     pages = 0;
     448         4367068 :     for (l = 0; l < FSM_TREE_DEPTH; l++)
     449                 :     {
     450         3275301 :         pages += leafno + 1;
     451         3275301 :         leafno /= SlotsPerFSMPage;
     452                 :     }
     453                 : 
     454                 :     /*
     455                 :      * If the page we were asked for wasn't at the bottom level, subtract the
     456                 :      * additional lower level pages we counted above.
     457                 :      */
     458         1091767 :     pages -= addr.level;
     459                 : 
     460                 :     /* Turn the page count into 0-based block number */
     461         1091767 :     return pages - 1;
     462                 : }
     463                 : 
     464                 : /*
     465                 :  * Return the FSM location corresponding to given heap block.
     466                 :  */
     467                 : static FSMAddress
     468          720456 : fsm_get_location(BlockNumber heapblk, uint16 *slot)
     469                 : {
     470                 :     FSMAddress  addr;
     471                 : 
     472          720456 :     addr.level = FSM_BOTTOM_LEVEL;
     473          720456 :     addr.logpageno = heapblk / SlotsPerFSMPage;
     474          720456 :     *slot = heapblk % SlotsPerFSMPage;
     475                 : 
     476          720456 :     return addr;
     477                 : }
     478                 : 
     479                 : /*
     480                 :  * Return the heap block number corresponding to given location in the FSM.
     481                 :  */
     482                 : static BlockNumber
     483           36776 : fsm_get_heap_blk(FSMAddress addr, uint16 slot)
     484                 : {
     485           36776 :     Assert(addr.level == FSM_BOTTOM_LEVEL);
     486           36776 :     return ((unsigned int) addr.logpageno) * SlotsPerFSMPage + slot;
     487                 : }
     488                 : 
     489                 : /*
     490                 :  * Given a logical address of a child page, get the logical page number of
     491                 :  * the parent, and the slot within the parent corresponding to the child.
     492                 :  */
     493                 : static FSMAddress
     494           94274 : fsm_get_parent(FSMAddress child, uint16 *slot)
     495                 : {
     496                 :     FSMAddress  parent;
     497                 : 
     498           94274 :     Assert(child.level < FSM_ROOT_LEVEL);
     499                 : 
     500           94274 :     parent.level = child.level + 1;
     501           94274 :     parent.logpageno = child.logpageno / SlotsPerFSMPage;
     502           94274 :     *slot = child.logpageno % SlotsPerFSMPage;
     503                 : 
     504           94274 :     return parent;
     505                 : }
     506                 : 
     507                 : /*
     508                 :  * Given a logical address of a parent page and a slot number, get the
     509                 :  * logical address of the corresponding child page.
     510                 :  */
     511                 : static FSMAddress
     512           54311 : fsm_get_child(FSMAddress parent, uint16 slot)
     513                 : {
     514                 :     FSMAddress  child;
     515                 : 
     516           54311 :     Assert(parent.level > FSM_BOTTOM_LEVEL);
     517                 : 
     518           54311 :     child.level = parent.level - 1;
     519           54311 :     child.logpageno = parent.logpageno * SlotsPerFSMPage + slot;
     520                 : 
     521           54311 :     return child;
     522                 : }
     523                 : 
     524                 : /*
     525                 :  * Read a FSM page.
     526                 :  *
     527                 :  * If the page doesn't exist, InvalidBuffer is returned, or if 'extend' is
     528                 :  * true, the FSM file is extended.
     529                 :  */
     530                 : static Buffer
     531          820203 : fsm_readbuf(Relation rel, FSMAddress addr, bool extend)
     532                 : {
     533          820203 :     BlockNumber blkno = fsm_logical_to_physical(addr);
     534                 :     Buffer      buf;
     535                 :     SMgrRelation reln;
     536                 : 
     537                 :     /*
     538                 :      * Caution: re-using this smgr pointer could fail if the relcache entry
     539                 :      * gets closed.  It's safe as long as we only do smgr-level operations
     540                 :      * between here and the last use of the pointer.
     541                 :      */
     542          820203 :     reln = RelationGetSmgr(rel);
     543                 : 
     544                 :     /*
     545                 :      * If we haven't cached the size of the FSM yet, check it first.  Also
     546                 :      * recheck if the requested block seems to be past end, since our cached
     547                 :      * value might be stale.  (We send smgr inval messages on truncation, but
     548                 :      * not on extension.)
     549                 :      */
     550          820203 :     if (reln->smgr_cached_nblocks[FSM_FORKNUM] == InvalidBlockNumber ||
     551          669137 :         blkno >= reln->smgr_cached_nblocks[FSM_FORKNUM])
     552                 :     {
     553                 :         /* Invalidate the cache so smgrnblocks asks the kernel. */
     554          199913 :         reln->smgr_cached_nblocks[FSM_FORKNUM] = InvalidBlockNumber;
     555          199913 :         if (smgrexists(reln, FSM_FORKNUM))
     556          101414 :             smgrnblocks(reln, FSM_FORKNUM);
     557                 :         else
     558           98499 :             reln->smgr_cached_nblocks[FSM_FORKNUM] = 0;
     559                 :     }
     560                 : 
     561                 :     /*
     562                 :      * For reading we use ZERO_ON_ERROR mode, and initialize the page if
     563                 :      * necessary.  The FSM information is not accurate anyway, so it's better
     564                 :      * to clear corrupt pages than error out. Since the FSM changes are not
     565                 :      * WAL-logged, the so-called torn page problem on crash can lead to pages
     566                 :      * with corrupt headers, for example.
     567                 :      *
     568                 :      * We use the same path below to initialize pages when extending the
     569                 :      * relation, as a concurrent extension can end up with vm_extend()
     570                 :      * returning an already-initialized page.
     571                 :      */
     572 GIC      820203 :     if (blkno >= reln->smgr_cached_nblocks[FSM_FORKNUM])
     573                 :     {
     574           98967 :         if (extend)
     575 GNC       14421 :             buf = fsm_extend(rel, blkno + 1);
     576                 :         else
     577 GIC       84546 :             return InvalidBuffer;
     578                 :     }
     579                 :     else
     580 GNC      721236 :         buf = ReadBufferExtended(rel, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR, NULL);
     581                 : 
     582                 :     /*
     583                 :      * Initializing the page when needed is trickier than it looks, because of
     584                 :      * the possibility of multiple backends doing this concurrently, and our
     585                 :      * desire to not uselessly take the buffer lock in the normal path where
     586 ECB             :      * the page is OK.  We must take the lock to initialize the page, so
     587                 :      * recheck page newness after we have the lock, in case someone else
     588                 :      * already did it.  Also, because we initially check PageIsNew with no
     589                 :      * lock, it's possible to fall through and return the buffer while someone
     590                 :      * else is still initializing the page (i.e., we might see pd_upper as set
     591                 :      * but other page header fields are still zeroes).  This is harmless for
     592                 :      * callers that will take a buffer lock themselves, but some callers
     593                 :      * inspect the page without any lock at all.  The latter is OK only so
     594                 :      * long as it doesn't depend on the page header having correct contents.
     595                 :      * Current usage is safe because PageGetContents() does not require that.
     596                 :      */
     597 GIC      735657 :     if (PageIsNew(BufferGetPage(buf)))
     598                 :     {
     599           49080 :         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     600           49080 :         if (PageIsNew(BufferGetPage(buf)))
     601           49080 :             PageInit(BufferGetPage(buf), BLCKSZ, 0);
     602 CBC       49080 :         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     603                 :     }
     604          735657 :     return buf;
     605 ECB             : }
     606                 : 
     607                 : /*
     608                 :  * Ensure that the FSM fork is at least fsm_nblocks long, extending
     609                 :  * it if necessary with empty pages. And by empty, I mean pages filled
     610                 :  * with zeros, meaning there's no free space.
     611                 :  */
     612                 : static Buffer
     613 GIC       14421 : fsm_extend(Relation rel, BlockNumber fsm_nblocks)
     614                 : {
     615 GNC       14421 :     return ExtendBufferedRelTo(EB_REL(rel), FSM_FORKNUM, NULL,
     616                 :                                EB_CREATE_FORK_IF_NEEDED |
     617                 :                                EB_CLEAR_SIZE_CACHE,
     618                 :                                fsm_nblocks,
     619                 :                                RBM_ZERO_ON_ERROR);
     620                 : }
     621                 : 
     622 ECB             : /*
     623                 :  * Set value in given FSM page and slot.
     624                 :  *
     625                 :  * If minValue > 0, the updated page is also searched for a page with at
     626                 :  * least minValue of free space. If one is found, its slot number is
     627                 :  * returned, -1 otherwise.
     628                 :  */
     629                 : static int
     630 GIC      388561 : fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
     631 ECB             :                    uint8 newValue, uint8 minValue)
     632                 : {
     633                 :     Buffer      buf;
     634                 :     Page        page;
     635 GIC      388561 :     int         newslot = -1;
     636                 : 
     637 CBC      388561 :     buf = fsm_readbuf(rel, addr, true);
     638 GIC      388561 :     LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     639 ECB             : 
     640 CBC      388561 :     page = BufferGetPage(buf);
     641 ECB             : 
     642 GIC      388561 :     if (fsm_set_avail(page, slot, newValue))
     643 CBC      184486 :         MarkBufferDirtyHint(buf, false);
     644 ECB             : 
     645 CBC      388561 :     if (minValue != 0)
     646                 :     {
     647                 :         /* Search while we still hold the lock */
     648          209060 :         newslot = fsm_search_avail(buf, minValue,
     649 GIC      209060 :                                    addr.level == FSM_BOTTOM_LEVEL,
     650 ECB             :                                    true);
     651                 :     }
     652                 : 
     653 GIC      388561 :     UnlockReleaseBuffer(buf);
     654                 : 
     655          388561 :     return newslot;
     656 ECB             : }
     657                 : 
     658                 : /*
     659                 :  * Search the tree for a heap page with at least min_cat of free space
     660                 :  */
     661                 : static BlockNumber
     662 GIC      360215 : fsm_search(Relation rel, uint8 min_cat)
     663                 : {
     664          360215 :     int         restarts = 0;
     665          360215 :     FSMAddress  addr = FSM_ROOT_ADDRESS;
     666                 : 
     667 ECB             :     for (;;)
     668 GIC       24537 :     {
     669                 :         int         slot;
     670                 :         Buffer      buf;
     671          384752 :         uint8       max_avail = 0;
     672                 : 
     673                 :         /* Read the FSM page. */
     674          384752 :         buf = fsm_readbuf(rel, addr, false);
     675                 : 
     676                 :         /* Search within the page */
     677          384752 :         if (BufferIsValid(buf))
     678                 :         {
     679          300718 :             LockBuffer(buf, BUFFER_LOCK_SHARE);
     680          300718 :             slot = fsm_search_avail(buf, min_cat,
     681          300718 :                                     (addr.level == FSM_BOTTOM_LEVEL),
     682                 :                                     false);
     683          300718 :             if (slot == -1)
     684          267373 :                 max_avail = fsm_get_max_avail(BufferGetPage(buf));
     685          300718 :             UnlockReleaseBuffer(buf);
     686 ECB             :         }
     687                 :         else
     688 GIC       84034 :             slot = -1;
     689                 : 
     690          384752 :         if (slot != -1)
     691                 :         {
     692                 :             /*
     693                 :              * Descend the tree, or return the found block if we're at the
     694                 :              * bottom.
     695                 :              */
     696 CBC       33345 :             if (addr.level == FSM_BOTTOM_LEVEL)
     697 GBC       10346 :                 return fsm_get_heap_blk(addr, slot);
     698                 : 
     699 GIC       22999 :             addr = fsm_get_child(addr, slot);
     700 ECB             :         }
     701 GIC      351407 :         else if (addr.level == FSM_ROOT_LEVEL)
     702                 :         {
     703                 :             /*
     704                 :              * At the root, failure means there's no page with enough free
     705                 :              * space in the FSM. Give up.
     706                 :              */
     707          349869 :             return InvalidBlockNumber;
     708                 :         }
     709                 :         else
     710                 :         {
     711                 :             uint16      parentslot;
     712                 :             FSMAddress  parent;
     713                 : 
     714                 :             /*
     715                 :              * At lower level, failure can happen if the value in the upper-
     716                 :              * level node didn't reflect the value on the lower page. Update
     717                 :              * the upper node, to avoid falling into the same trap again, and
     718                 :              * start over.
     719                 :              *
     720 ECB             :              * There's a race condition here, if another backend updates this
     721                 :              * page right after we release it, and gets the lock on the parent
     722                 :              * page before us. We'll then update the parent page with the now
     723                 :              * stale information we had. It's OK, because it should happen
     724                 :              * rarely, and will be fixed by the next vacuum.
     725                 :              */
     726 GIC        1538 :             parent = fsm_get_parent(addr, &parentslot);
     727            1538 :             fsm_set_and_search(rel, parent, parentslot, max_avail, 0);
     728                 : 
     729 ECB             :             /*
     730                 :              * If the upper pages are badly out of date, we might need to loop
     731                 :              * quite a few times, updating them as we go. Any inconsistencies
     732                 :              * should eventually be corrected and the loop should end. Looping
     733                 :              * indefinitely is nevertheless scary, so provide an emergency
     734                 :              * valve.
     735                 :              */
     736 CBC        1538 :             if (restarts++ > 10000)
     737 UIC           0 :                 return InvalidBlockNumber;
     738 ECB             : 
     739                 :             /* Start search all over from the root */
     740 GIC        1538 :             addr = FSM_ROOT_ADDRESS;
     741                 :         }
     742                 :     }
     743                 : }
     744 ECB             : 
     745                 : 
     746                 : /*
     747                 :  * Recursive guts of FreeSpaceMapVacuum
     748                 :  *
     749                 :  * Examine the FSM page indicated by addr, as well as its children, updating
     750                 :  * upper-level nodes that cover the heap block range from start to end-1.
     751                 :  * (It's okay if end is beyond the actual end of the map.)
     752                 :  * Return the maximum freespace value on this page.
     753                 :  *
     754                 :  * If addr is past the end of the FSM, set *eof_p to true and return 0.
     755                 :  *
     756                 :  * This traverses the tree in depth-first order.  The tree is stored
     757                 :  * physically in depth-first order, so this should be pretty I/O efficient.
     758                 :  */
     759                 : static uint8
     760 GIC       46787 : fsm_vacuum_page(Relation rel, FSMAddress addr,
     761                 :                 BlockNumber start, BlockNumber end,
     762 ECB             :                 bool *eof_p)
     763                 : {
     764                 :     Buffer      buf;
     765                 :     Page        page;
     766                 :     uint8       max_avail;
     767                 : 
     768                 :     /* Read the page if it exists, or return EOF */
     769 GIC       46787 :     buf = fsm_readbuf(rel, addr, false);
     770 CBC       46787 :     if (!BufferIsValid(buf))
     771                 :     {
     772             477 :         *eof_p = true;
     773             477 :         return 0;
     774 EUB             :     }
     775                 :     else
     776 GIC       46310 :         *eof_p = false;
     777 EUB             : 
     778 GIC       46310 :     page = BufferGetPage(buf);
     779 ECB             : 
     780                 :     /*
     781                 :      * If we're above the bottom level, recurse into children, and fix the
     782                 :      * information stored about them at this level.
     783                 :      */
     784 GBC       46310 :     if (addr.level > FSM_BOTTOM_LEVEL)
     785                 :     {
     786 ECB             :         FSMAddress  fsm_start,
     787                 :                     fsm_end;
     788                 :         uint16      fsm_start_slot,
     789                 :                     fsm_end_slot;
     790                 :         int         slot,
     791                 :                     start_slot,
     792                 :                     end_slot;
     793 CBC       30912 :         bool        eof = false;
     794 ECB             : 
     795                 :         /*
     796                 :          * Compute the range of slots we need to update on this page, given
     797                 :          * the requested range of heap blocks to consider.  The first slot to
     798                 :          * update is the one covering the "start" block, and the last slot is
     799                 :          * the one covering "end - 1".  (Some of this work will be duplicated
     800                 :          * in each recursive call, but it's cheap enough to not worry about.)
     801                 :          */
     802 GIC       30912 :         fsm_start = fsm_get_location(start, &fsm_start_slot);
     803 CBC       30912 :         fsm_end = fsm_get_location(end - 1, &fsm_end_slot);
     804 ECB             : 
     805 CBC       77280 :         while (fsm_start.level < addr.level)
     806 ECB             :         {
     807 GIC       46368 :             fsm_start = fsm_get_parent(fsm_start, &fsm_start_slot);
     808           46368 :             fsm_end = fsm_get_parent(fsm_end, &fsm_end_slot);
     809                 :         }
     810           30912 :         Assert(fsm_start.level == addr.level);
     811                 : 
     812 CBC       30912 :         if (fsm_start.logpageno == addr.logpageno)
     813 GIC       30912 :             start_slot = fsm_start_slot;
     814 UIC           0 :         else if (fsm_start.logpageno > addr.logpageno)
     815               0 :             start_slot = SlotsPerFSMPage;   /* shouldn't get here... */
     816                 :         else
     817               0 :             start_slot = 0;
     818                 : 
     819 GIC       30912 :         if (fsm_end.logpageno == addr.logpageno)
     820 CBC       30683 :             end_slot = fsm_end_slot;
     821 GIC         229 :         else if (fsm_end.logpageno > addr.logpageno)
     822 CBC         229 :             end_slot = SlotsPerFSMPage - 1;
     823                 :         else
     824 LBC           0 :             end_slot = -1;      /* shouldn't get here... */
     825                 : 
     826 GIC     1052707 :         for (slot = start_slot; slot <= end_slot; slot++)
     827                 :         {
     828                 :             int         child_avail;
     829                 : 
     830         1021795 :             CHECK_FOR_INTERRUPTS();
     831                 : 
     832                 :             /* After we hit end-of-file, just clear the rest of the slots */
     833         1021795 :             if (!eof)
     834           31312 :                 child_avail = fsm_vacuum_page(rel, fsm_get_child(addr, slot),
     835                 :                                               start, end,
     836                 :                                               &eof);
     837                 :             else
     838          990483 :                 child_avail = 0;
     839                 : 
     840                 :             /* Update information about the child */
     841         1021795 :             if (fsm_get_avail(page, slot) != child_avail)
     842                 :             {
     843           28648 :                 LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     844           28648 :                 fsm_set_avail(page, slot, child_avail);
     845           28648 :                 MarkBufferDirtyHint(buf, false);
     846           28648 :                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     847                 :             }
     848                 :         }
     849                 :     }
     850                 : 
     851                 :     /* Now get the maximum value on the page, to return to caller */
     852           46310 :     max_avail = fsm_get_max_avail(page);
     853                 : 
     854                 :     /*
     855                 :      * Reset the next slot pointer. This encourages the use of low-numbered
     856                 :      * pages, increasing the chances that a later vacuum can truncate the
     857                 :      * relation.  We don't bother with a lock here, nor with marking the page
     858                 :      * dirty if it wasn't already, since this is just a hint.
     859                 :      */
     860           46310 :     ((FSMPage) PageGetContents(page))->fp_next_slot = 0;
     861                 : 
     862           46310 :     ReleaseBuffer(buf);
     863                 : 
     864           46310 :     return max_avail;
     865                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a