Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * fsmpage.c
4 : : * routines to search and manipulate one FSM page.
5 : : *
6 : : *
7 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8 : : * Portions Copyright (c) 1994, Regents of the University of California
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/storage/freespace/fsmpage.c
12 : : *
13 : : * NOTES:
14 : : *
15 : : * The public functions in this file form an API that hides the internal
16 : : * structure of a FSM page. This allows freespace.c to treat each FSM page
17 : : * as a black box with SlotsPerPage "slots". fsm_set_avail() and
18 : : * fsm_get_avail() let you get/set the value of a slot, and
19 : : * fsm_search_avail() lets you search for a slot with value >= X.
20 : : *
21 : : *-------------------------------------------------------------------------
22 : : */
23 : : #include "postgres.h"
24 : :
25 : : #include "storage/bufmgr.h"
26 : : #include "storage/fsm_internals.h"
27 : :
28 : : /* Macros to navigate the tree within a page. Root has index zero. */
29 : : #define leftchild(x) (2 * (x) + 1)
30 : : #define rightchild(x) (2 * (x) + 2)
31 : : #define parentof(x) (((x) - 1) / 2)
32 : :
33 : : /*
34 : : * Find right neighbor of x, wrapping around within the level
35 : : */
36 : : static int
5668 tgl@sss.pgh.pa.us 37 :CBC 64259 : rightneighbor(int x)
38 : : {
39 : : /*
40 : : * Move right. This might wrap around, stepping to the leftmost node at
41 : : * the next level.
42 : : */
5675 heikki.linnakangas@i 43 : 64259 : x++;
44 : :
45 : : /*
46 : : * Check if we stepped to the leftmost node at next level, and correct if
47 : : * so. The leftmost nodes at each level are numbered x = 2^level - 1, so
48 : : * check if (x + 1) is a power of two, using a standard
49 : : * twos-complement-arithmetic trick.
50 : : */
51 [ + + ]: 64259 : if (((x + 1) & x) == 0)
52 : 3530 : x = parentof(x);
53 : :
54 : 64259 : return x;
55 : : }
56 : :
57 : : /*
58 : : * Sets the value of a slot on page. Returns true if the page was modified.
59 : : *
60 : : * The caller must hold an exclusive lock on the page.
61 : : */
62 : : bool
63 : 802747 : fsm_set_avail(Page page, int slot, uint8 value)
64 : : {
5421 bruce@momjian.us 65 : 802747 : int nodeno = NonLeafNodesPerPage + slot;
66 : 802747 : FSMPage fsmpage = (FSMPage) PageGetContents(page);
67 : : uint8 oldvalue;
68 : :
5675 heikki.linnakangas@i 69 [ - + ]: 802747 : Assert(slot < LeafNodesPerPage);
70 : :
71 : 802747 : oldvalue = fsmpage->fp_nodes[nodeno];
72 : :
73 : : /* If the value hasn't changed, we don't need to do anything */
74 [ + + + - ]: 802747 : if (oldvalue == value && value <= fsmpage->fp_nodes[0])
75 : 350418 : return false;
76 : :
77 : 452329 : fsmpage->fp_nodes[nodeno] = value;
78 : :
79 : : /*
80 : : * Propagate up, until we hit the root or a node that doesn't need to be
81 : : * updated.
82 : : */
83 : : do
84 : : {
5421 bruce@momjian.us 85 : 4264058 : uint8 newvalue = 0;
86 : : int lchild;
87 : : int rchild;
88 : :
5675 heikki.linnakangas@i 89 : 4264058 : nodeno = parentof(nodeno);
90 : 4264058 : lchild = leftchild(nodeno);
91 : 4264058 : rchild = lchild + 1;
92 : :
93 : 4264058 : newvalue = fsmpage->fp_nodes[lchild];
94 [ + + ]: 4264058 : if (rchild < NodesPerPage)
95 : 4264056 : newvalue = Max(newvalue,
96 : : fsmpage->fp_nodes[rchild]);
97 : :
98 : 4264058 : oldvalue = fsmpage->fp_nodes[nodeno];
99 [ + + ]: 4264058 : if (oldvalue == newvalue)
100 : 122872 : break;
101 : :
102 : 4141186 : fsmpage->fp_nodes[nodeno] = newvalue;
103 [ + + ]: 4141186 : } while (nodeno > 0);
104 : :
105 : : /*
106 : : * sanity check: if the new value is (still) higher than the value at the
107 : : * top, the tree is corrupt. If so, rebuild.
108 : : */
109 [ - + ]: 452329 : if (value > fsmpage->fp_nodes[0])
5675 heikki.linnakangas@i 110 :UBC 0 : fsm_rebuild_page(page);
111 : :
5675 heikki.linnakangas@i 112 :CBC 452329 : return true;
113 : : }
114 : :
115 : : /*
116 : : * Returns the value of given slot on page.
117 : : *
118 : : * Since this is just a read-only access of a single byte, the page doesn't
119 : : * need to be locked.
120 : : */
121 : : uint8
122 : 1571496 : fsm_get_avail(Page page, int slot)
123 : : {
5421 bruce@momjian.us 124 : 1571496 : FSMPage fsmpage = (FSMPage) PageGetContents(page);
125 : :
5668 tgl@sss.pgh.pa.us 126 [ - + ]: 1571496 : Assert(slot < LeafNodesPerPage);
127 : :
5675 heikki.linnakangas@i 128 : 1571496 : return fsmpage->fp_nodes[NonLeafNodesPerPage + slot];
129 : : }
130 : :
131 : : /*
132 : : * Returns the value at the root of a page.
133 : : *
134 : : * Since this is just a read-only access of a single byte, the page doesn't
135 : : * need to be locked.
136 : : */
137 : : uint8
138 : 199566 : fsm_get_max_avail(Page page)
139 : : {
5421 bruce@momjian.us 140 : 199566 : FSMPage fsmpage = (FSMPage) PageGetContents(page);
141 : :
5675 heikki.linnakangas@i 142 : 199566 : return fsmpage->fp_nodes[0];
143 : : }
144 : :
145 : : /*
146 : : * Searches for a slot with category at least minvalue.
147 : : * Returns slot number, or -1 if none found.
148 : : *
149 : : * The caller must hold at least a shared lock on the page, and this
150 : : * function can unlock and lock the page again in exclusive mode if it
151 : : * needs to be updated. exclusive_lock_held should be set to true if the
152 : : * caller is already holding an exclusive lock, to avoid extra work.
153 : : *
154 : : * If advancenext is false, fp_next_slot is set to point to the returned
155 : : * slot, and if it's true, to the slot after the returned slot.
156 : : */
157 : : int
158 : 225104 : fsm_search_avail(Buffer buf, uint8 minvalue, bool advancenext,
159 : : bool exclusive_lock_held)
160 : : {
2916 kgrittn@postgresql.o 161 : 225104 : Page page = BufferGetPage(buf);
5421 bruce@momjian.us 162 : 225104 : FSMPage fsmpage = (FSMPage) PageGetContents(page);
163 : : int nodeno;
164 : : int target;
165 : : uint16 slot;
166 : :
167 : 225104 : restart:
168 : :
169 : : /*
170 : : * Check the root first, and exit quickly if there's no leaf with enough
171 : : * free space
172 : : */
5675 heikki.linnakangas@i 173 [ + + ]: 225104 : if (fsmpage->fp_nodes[0] < minvalue)
174 : 176251 : return -1;
175 : :
176 : : /*
177 : : * Start search using fp_next_slot. It's just a hint, so check that it's
178 : : * sane. (This also handles wrapping around when the prior call returned
179 : : * the last slot on the page.)
180 : : */
181 : 48853 : target = fsmpage->fp_next_slot;
182 [ + - - + ]: 48853 : if (target < 0 || target >= LeafNodesPerPage)
5675 heikki.linnakangas@i 183 :UBC 0 : target = 0;
5675 heikki.linnakangas@i 184 :CBC 48853 : target += NonLeafNodesPerPage;
185 : :
186 : : /*----------
187 : : * Start the search from the target slot. At every step, move one
188 : : * node to the right, then climb up to the parent. Stop when we reach
189 : : * a node with enough free space (as we must, since the root has enough
190 : : * space).
191 : : *
192 : : * The idea is to gradually expand our "search triangle", that is, all
193 : : * nodes covered by the current node, and to be sure we search to the
194 : : * right from the start point. At the first step, only the target slot
195 : : * is examined. When we move up from a left child to its parent, we are
196 : : * adding the right-hand subtree of that parent to the search triangle.
197 : : * When we move right then up from a right child, we are dropping the
198 : : * current search triangle (which we know doesn't contain any suitable
199 : : * page) and instead looking at the next-larger-size triangle to its
200 : : * right. So we never look left from our original start point, and at
201 : : * each step the size of the search triangle doubles, ensuring it takes
202 : : * only log2(N) work to search N pages.
203 : : *
204 : : * The "move right" operation will wrap around if it hits the right edge
205 : : * of the tree, so the behavior is still good if we start near the right.
206 : : * Note also that the move-and-climb behavior ensures that we can't end
207 : : * up on one of the missing nodes at the right of the leaf level.
208 : : *
209 : : * For example, consider this tree:
210 : : *
211 : : * 7
212 : : * 7 6
213 : : * 5 7 6 5
214 : : * 4 5 5 7 2 6 5 2
215 : : * T
216 : : *
217 : : * Assume that the target node is the node indicated by the letter T,
218 : : * and we're searching for a node with value of 6 or higher. The search
219 : : * begins at T. At the first iteration, we move to the right, then to the
220 : : * parent, arriving at the rightmost 5. At the second iteration, we move
221 : : * to the right, wrapping around, then climb up, arriving at the 7 on the
222 : : * third level. 7 satisfies our search, so we descend down to the bottom,
223 : : * following the path of sevens. This is in fact the first suitable page
224 : : * to the right of (allowing for wraparound) our start point.
225 : : *----------
226 : : */
227 : 48853 : nodeno = target;
228 [ + + ]: 113112 : while (nodeno > 0)
229 : : {
230 [ + + ]: 109582 : if (fsmpage->fp_nodes[nodeno] >= minvalue)
231 : 45323 : break;
232 : :
233 : : /*
234 : : * Move to the right, wrapping around on same level if necessary, then
235 : : * climb up.
236 : : */
5668 tgl@sss.pgh.pa.us 237 : 64259 : nodeno = parentof(rightneighbor(nodeno));
238 : : }
239 : :
240 : : /*
241 : : * We're now at a node with enough free space, somewhere in the middle of
242 : : * the tree. Descend to the bottom, following a path with enough free
243 : : * space, preferring to move left if there's a choice.
244 : : */
5675 heikki.linnakangas@i 245 [ + + ]: 113112 : while (nodeno < NonLeafNodesPerPage)
246 : : {
5421 bruce@momjian.us 247 : 64259 : int childnodeno = leftchild(nodeno);
248 : :
5604 tgl@sss.pgh.pa.us 249 [ + - ]: 64259 : if (childnodeno < NodesPerPage &&
250 [ + + ]: 64259 : fsmpage->fp_nodes[childnodeno] >= minvalue)
251 : : {
252 : 46424 : nodeno = childnodeno;
253 : 46424 : continue;
254 : : }
255 : 17835 : childnodeno++; /* point to right child */
256 [ + - ]: 17835 : if (childnodeno < NodesPerPage &&
257 [ + - ]: 17835 : fsmpage->fp_nodes[childnodeno] >= minvalue)
258 : : {
259 : 17835 : nodeno = childnodeno;
260 : : }
261 : : else
262 : : {
263 : : /*
264 : : * Oops. The parent node promised that either left or right child
265 : : * has enough space, but neither actually did. This can happen in
266 : : * case of a "torn page", IOW if we crashed earlier while writing
267 : : * the page to disk, and only part of the page made it to disk.
268 : : *
269 : : * Fix the corruption and restart.
270 : : */
271 : : RelFileLocator rlocator;
272 : : ForkNumber forknum;
273 : : BlockNumber blknum;
274 : :
648 rhaas@postgresql.org 275 :UBC 0 : BufferGetTag(buf, &rlocator, &forknum, &blknum);
564 276 [ # # ]: 0 : elog(DEBUG1, "fixing corrupt FSM block %u, relation %u/%u/%u",
277 : : blknum, rlocator.spcOid, rlocator.dbOid, rlocator.relNumber);
278 : :
279 : : /* make sure we hold an exclusive lock */
5675 heikki.linnakangas@i 280 [ # # ]: 0 : if (!exclusive_lock_held)
281 : : {
282 : 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
283 : 0 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
284 : 0 : exclusive_lock_held = true;
285 : : }
286 : 0 : fsm_rebuild_page(page);
3954 jdavis@postgresql.or 287 : 0 : MarkBufferDirtyHint(buf, false);
5675 heikki.linnakangas@i 288 : 0 : goto restart;
289 : : }
290 : : }
291 : :
292 : : /* We're now at the bottom level, at a node with enough space. */
5675 heikki.linnakangas@i 293 :CBC 48853 : slot = nodeno - NonLeafNodesPerPage;
294 : :
295 : : /*
296 : : * Update the next-target pointer. Note that we do this even if we're only
297 : : * holding a shared lock, on the grounds that it's better to use a shared
298 : : * lock and get a garbled next pointer every now and then, than take the
299 : : * concurrency hit of an exclusive lock.
300 : : *
301 : : * Wrap-around is handled at the beginning of this function.
302 : : */
303 : 48853 : fsmpage->fp_next_slot = slot + (advancenext ? 1 : 0);
304 : :
305 : 48853 : return slot;
306 : : }
307 : :
308 : : /*
309 : : * Sets the available space to zero for all slots numbered >= nslots.
310 : : * Returns true if the page was modified.
311 : : */
312 : : bool
313 : 122 : fsm_truncate_avail(Page page, int nslots)
314 : : {
5421 bruce@momjian.us 315 : 122 : FSMPage fsmpage = (FSMPage) PageGetContents(page);
316 : : uint8 *ptr;
317 : 122 : bool changed = false;
318 : :
5675 heikki.linnakangas@i 319 [ + - - + ]: 122 : Assert(nslots >= 0 && nslots < LeafNodesPerPage);
320 : :
321 : : /* Clear all truncated leaf nodes */
322 : 122 : ptr = &fsmpage->fp_nodes[NonLeafNodesPerPage + nslots];
323 [ + + ]: 490351 : for (; ptr < &fsmpage->fp_nodes[NodesPerPage]; ptr++)
324 : : {
325 [ + + ]: 490229 : if (*ptr != 0)
326 : 2734 : changed = true;
327 : 490229 : *ptr = 0;
328 : : }
329 : :
330 : : /* Fix upper nodes. */
331 [ + + ]: 122 : if (changed)
332 : 106 : fsm_rebuild_page(page);
333 : :
334 : 122 : return changed;
335 : : }
336 : :
337 : : /*
338 : : * Reconstructs the upper levels of a page. Returns true if the page
339 : : * was modified.
340 : : */
341 : : bool
342 : 106 : fsm_rebuild_page(Page page)
343 : : {
5421 bruce@momjian.us 344 : 106 : FSMPage fsmpage = (FSMPage) PageGetContents(page);
345 : 106 : bool changed = false;
346 : : int nodeno;
347 : :
348 : : /*
349 : : * Start from the lowest non-leaf level, at last node, working our way
350 : : * backwards, through all non-leaf nodes at all levels, up to the root.
351 : : */
5675 heikki.linnakangas@i 352 [ + + ]: 434176 : for (nodeno = NonLeafNodesPerPage - 1; nodeno >= 0; nodeno--)
353 : : {
5421 bruce@momjian.us 354 : 434070 : int lchild = leftchild(nodeno);
355 : 434070 : int rchild = lchild + 1;
356 : 434070 : uint8 newvalue = 0;
357 : :
358 : : /* The first few nodes we examine might have zero or one child. */
5675 heikki.linnakangas@i 359 [ + + ]: 434070 : if (lchild < NodesPerPage)
360 : 432692 : newvalue = fsmpage->fp_nodes[lchild];
361 : :
362 [ + + ]: 434070 : if (rchild < NodesPerPage)
363 : 432586 : newvalue = Max(newvalue,
364 : : fsmpage->fp_nodes[rchild]);
365 : :
366 [ + + ]: 434070 : if (fsmpage->fp_nodes[nodeno] != newvalue)
367 : : {
368 : 3619 : fsmpage->fp_nodes[nodeno] = newvalue;
369 : 3619 : changed = true;
370 : : }
371 : : }
372 : :
373 : 106 : return changed;
374 : : }
|