Age Owner Branch data TLA Line data Source code
1 : : /*
2 : : * brin_pageops.c
3 : : * Page-handling routines for BRIN indexes
4 : : *
5 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
6 : : * Portions Copyright (c) 1994, Regents of the University of California
7 : : *
8 : : * IDENTIFICATION
9 : : * src/backend/access/brin/brin_pageops.c
10 : : */
11 : : #include "postgres.h"
12 : :
13 : : #include "access/brin_page.h"
14 : : #include "access/brin_pageops.h"
15 : : #include "access/brin_revmap.h"
16 : : #include "access/brin_xlog.h"
17 : : #include "access/xloginsert.h"
18 : : #include "miscadmin.h"
19 : : #include "storage/bufmgr.h"
20 : : #include "storage/freespace.h"
21 : : #include "storage/lmgr.h"
22 : : #include "utils/rel.h"
23 : :
24 : : /*
25 : : * Maximum size of an entry in a BRIN_PAGETYPE_REGULAR page. We can tolerate
26 : : * a single item per page, unlike other index AMs.
27 : : */
28 : : #define BrinMaxItemSize \
29 : : MAXALIGN_DOWN(BLCKSZ - \
30 : : (MAXALIGN(SizeOfPageHeaderData + \
31 : : sizeof(ItemIdData)) + \
32 : : MAXALIGN(sizeof(BrinSpecialSpace))))
33 : :
34 : : static Buffer brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
35 : : bool *extended);
36 : : static Size br_page_get_freespace(Page page);
37 : : static void brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer);
38 : :
39 : :
40 : : /*
41 : : * Update tuple origtup (size origsz), located in offset oldoff of buffer
42 : : * oldbuf, to newtup (size newsz) as summary tuple for the page range starting
43 : : * at heapBlk. oldbuf must not be locked on entry, and is not locked at exit.
44 : : *
45 : : * If samepage is true, attempt to put the new tuple in the same page, but if
46 : : * there's no room, use some other one.
47 : : *
48 : : * If the update is successful, return true; the revmap is updated to point to
49 : : * the new tuple. If the update is not done for whatever reason, return false.
50 : : * Caller may retry the update if this happens.
51 : : */
52 : : bool
3446 alvherre@alvh.no-ip. 53 :CBC 13718 : brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
54 : : BrinRevmap *revmap, BlockNumber heapBlk,
55 : : Buffer oldbuf, OffsetNumber oldoff,
56 : : const BrinTuple *origtup, Size origsz,
57 : : const BrinTuple *newtup, Size newsz,
58 : : bool samepage)
59 : : {
60 : : Page oldpage;
61 : : ItemId oldlp;
62 : : BrinTuple *oldtup;
63 : : Size oldsz;
64 : : Buffer newbuf;
2202 tgl@sss.pgh.pa.us 65 : 13718 : BlockNumber newblk = InvalidBlockNumber;
66 : : bool extended;
67 : :
3247 68 [ - + ]: 13718 : Assert(newsz == MAXALIGN(newsz));
69 : :
70 : : /* If the item is oversized, don't bother. */
3092 alvherre@alvh.no-ip. 71 [ - + ]: 13718 : if (newsz > BrinMaxItemSize)
72 : : {
3092 alvherre@alvh.no-ip. 73 [ # # ]:UBC 0 : ereport(ERROR,
74 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
75 : : errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
76 : : newsz, BrinMaxItemSize, RelationGetRelationName(idxrel))));
77 : : return false; /* keep compiler quiet */
78 : : }
79 : :
80 : : /* make sure the revmap is long enough to contain the entry we need */
3446 alvherre@alvh.no-ip. 81 :CBC 13718 : brinRevmapExtend(revmap, heapBlk);
82 : :
83 [ + + ]: 13718 : if (!samepage)
84 : : {
85 : : /* need a page on which to put the item */
86 : 307 : newbuf = brin_getinsertbuffer(idxrel, oldbuf, newsz, &extended);
87 [ - + ]: 307 : if (!BufferIsValid(newbuf))
88 : : {
3168 alvherre@alvh.no-ip. 89 [ # # ]:UBC 0 : Assert(!extended);
3446 90 : 0 : return false;
91 : : }
92 : :
93 : : /*
94 : : * Note: it's possible (though unlikely) that the returned newbuf is
95 : : * the same as oldbuf, if brin_getinsertbuffer determined that the old
96 : : * buffer does in fact have enough space.
97 : : */
3446 alvherre@alvh.no-ip. 98 [ - + ]:CBC 307 : if (newbuf == oldbuf)
99 : : {
3168 alvherre@alvh.no-ip. 100 [ # # ]:UBC 0 : Assert(!extended);
3446 101 : 0 : newbuf = InvalidBuffer;
102 : : }
103 : : else
2202 tgl@sss.pgh.pa.us 104 :CBC 307 : newblk = BufferGetBlockNumber(newbuf);
105 : : }
106 : : else
107 : : {
3446 alvherre@alvh.no-ip. 108 : 13411 : LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
109 : 13411 : newbuf = InvalidBuffer;
3167 110 : 13411 : extended = false;
111 : : }
2916 kgrittn@postgresql.o 112 : 13718 : oldpage = BufferGetPage(oldbuf);
3446 alvherre@alvh.no-ip. 113 : 13718 : oldlp = PageGetItemId(oldpage, oldoff);
114 : :
115 : : /*
116 : : * Check that the old tuple wasn't updated concurrently: it might have
117 : : * moved someplace else entirely, and for that matter the whole page
118 : : * might've become a revmap page. Note that in the first two cases
119 : : * checked here, the "oldlp" we just calculated is garbage; but
120 : : * PageGetItemId() is simple enough that it was safe to do that
121 : : * calculation anyway.
122 : : */
2355 tgl@sss.pgh.pa.us 123 [ + - + - ]: 27436 : if (!BRIN_IS_REGULAR_PAGE(oldpage) ||
124 : 13718 : oldoff > PageGetMaxOffsetNumber(oldpage) ||
125 [ - + ]: 13718 : !ItemIdIsNormal(oldlp))
126 : : {
3446 alvherre@alvh.no-ip. 127 :UBC 0 : LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
128 : :
129 : : /*
130 : : * If this happens, and the new buffer was obtained by extending the
131 : : * relation, then we need to ensure we don't leave it uninitialized or
132 : : * forget about it.
133 : : */
134 [ # # ]: 0 : if (BufferIsValid(newbuf))
135 : : {
3168 136 [ # # ]: 0 : if (extended)
137 : 0 : brin_initialize_empty_new_buffer(idxrel, newbuf);
3446 138 : 0 : UnlockReleaseBuffer(newbuf);
3168 139 [ # # ]: 0 : if (extended)
2202 tgl@sss.pgh.pa.us 140 : 0 : FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
141 : : }
3446 alvherre@alvh.no-ip. 142 : 0 : return false;
143 : : }
144 : :
3446 alvherre@alvh.no-ip. 145 :CBC 13718 : oldsz = ItemIdGetLength(oldlp);
146 : 13718 : oldtup = (BrinTuple *) PageGetItem(oldpage, oldlp);
147 : :
148 : : /*
149 : : * ... or it might have been updated in place to different contents.
150 : : */
151 [ - + ]: 13718 : if (!brin_tuples_equal(oldtup, oldsz, origtup, origsz))
152 : : {
3446 alvherre@alvh.no-ip. 153 :UBC 0 : LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
154 [ # # ]: 0 : if (BufferIsValid(newbuf))
155 : : {
156 : : /* As above, initialize and record new page if we got one */
3168 157 [ # # ]: 0 : if (extended)
158 : 0 : brin_initialize_empty_new_buffer(idxrel, newbuf);
3446 159 : 0 : UnlockReleaseBuffer(newbuf);
3168 160 [ # # ]: 0 : if (extended)
2202 tgl@sss.pgh.pa.us 161 : 0 : FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
162 : : }
3446 alvherre@alvh.no-ip. 163 : 0 : return false;
164 : : }
165 : :
166 : : /*
167 : : * Great, the old tuple is intact. We can proceed with the update.
168 : : *
169 : : * If there's enough room in the old page for the new tuple, replace it.
170 : : *
171 : : * Note that there might now be enough space on the page even though the
172 : : * caller told us there isn't, if a concurrent update moved another tuple
173 : : * elsewhere or replaced a tuple with a smaller one.
174 : : */
3323 alvherre@alvh.no-ip. 175 [ + + + + ]:CBC 27145 : if (((BrinPageFlags(oldpage) & BRIN_EVACUATE_PAGE) == 0) &&
3446 176 : 13427 : brin_can_do_samepage_update(oldbuf, origsz, newsz))
177 : : {
178 : 13411 : START_CRIT_SECTION();
1902 peter@eisentraut.org 179 [ - + ]: 13411 : if (!PageIndexTupleOverwrite(oldpage, oldoff, (Item) unconstify(BrinTuple *, newtup), newsz))
2774 tgl@sss.pgh.pa.us 180 [ # # ]:UBC 0 : elog(ERROR, "failed to replace BRIN tuple");
3446 alvherre@alvh.no-ip. 181 :CBC 13411 : MarkBufferDirty(oldbuf);
182 : :
183 : : /* XLOG stuff */
184 [ + + + + : 13411 : if (RelationNeedsWAL(idxrel))
+ - + - ]
185 : : {
186 : : xl_brin_samepage_update xlrec;
187 : : XLogRecPtr recptr;
188 : 13408 : uint8 info = XLOG_BRIN_SAMEPAGE_UPDATE;
189 : :
3433 heikki.linnakangas@i 190 : 13408 : xlrec.offnum = oldoff;
191 : :
192 : 13408 : XLogBeginInsert();
193 : 13408 : XLogRegisterData((char *) &xlrec, SizeOfBrinSamepageUpdate);
194 : :
195 : 13408 : XLogRegisterBuffer(0, oldbuf, REGBUF_STANDARD);
1902 peter@eisentraut.org 196 : 13408 : XLogRegisterBufData(0, (char *) unconstify(BrinTuple *, newtup), newsz);
197 : :
3433 heikki.linnakangas@i 198 : 13408 : recptr = XLogInsert(RM_BRIN_ID, info);
199 : :
3446 alvherre@alvh.no-ip. 200 : 13408 : PageSetLSN(oldpage, recptr);
201 : : }
202 : :
203 [ - + ]: 13411 : END_CRIT_SECTION();
204 : :
205 : 13411 : LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
206 : :
2202 tgl@sss.pgh.pa.us 207 [ - + ]: 13411 : if (BufferIsValid(newbuf))
208 : : {
209 : : /* As above, initialize and record new page if we got one */
2202 tgl@sss.pgh.pa.us 210 [ # # ]:UBC 0 : if (extended)
211 : 0 : brin_initialize_empty_new_buffer(idxrel, newbuf);
212 : 0 : UnlockReleaseBuffer(newbuf);
213 [ # # ]: 0 : if (extended)
214 : 0 : FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
215 : : }
216 : :
3446 alvherre@alvh.no-ip. 217 :CBC 13411 : return true;
218 : : }
219 [ - + ]: 307 : else if (newbuf == InvalidBuffer)
220 : : {
221 : : /*
222 : : * Not enough space, but caller said that there was. Tell them to
223 : : * start over.
224 : : */
3446 alvherre@alvh.no-ip. 225 :UBC 0 : LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
226 : 0 : return false;
227 : : }
228 : : else
229 : : {
230 : : /*
231 : : * Not enough free space on the oldpage. Put the new tuple on the new
232 : : * page, and update the revmap.
233 : : */
2916 kgrittn@postgresql.o 234 :CBC 307 : Page newpage = BufferGetPage(newbuf);
235 : : Buffer revmapbuf;
236 : : ItemPointerData newtid;
237 : : OffsetNumber newoff;
3168 alvherre@alvh.no-ip. 238 : 307 : Size freespace = 0;
239 : :
3446 240 : 307 : revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
241 : :
242 : 307 : START_CRIT_SECTION();
243 : :
244 : : /*
245 : : * We need to initialize the page if it's newly obtained. Note we
246 : : * will WAL-log the initialization as part of the update, so we don't
247 : : * need to do that here.
248 : : */
3168 249 [ + + ]: 307 : if (extended)
2202 tgl@sss.pgh.pa.us 250 : 11 : brin_page_init(newpage, BRIN_PAGETYPE_REGULAR);
251 : :
2774 252 : 307 : PageIndexTupleDeleteNoCompact(oldpage, oldoff);
1902 peter@eisentraut.org 253 : 307 : newoff = PageAddItem(newpage, (Item) unconstify(BrinTuple *, newtup), newsz,
254 : : InvalidOffsetNumber, false, false);
3446 alvherre@alvh.no-ip. 255 [ - + ]: 307 : if (newoff == InvalidOffsetNumber)
3446 alvherre@alvh.no-ip. 256 [ # # ]:UBC 0 : elog(ERROR, "failed to add BRIN tuple to new page");
3446 alvherre@alvh.no-ip. 257 :CBC 307 : MarkBufferDirty(oldbuf);
258 : 307 : MarkBufferDirty(newbuf);
259 : :
260 : : /* needed to update FSM below */
3168 261 [ + + ]: 307 : if (extended)
262 : 11 : freespace = br_page_get_freespace(newpage);
263 : :
2202 tgl@sss.pgh.pa.us 264 : 307 : ItemPointerSet(&newtid, newblk, newoff);
3446 alvherre@alvh.no-ip. 265 : 307 : brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, newtid);
266 : 307 : MarkBufferDirty(revmapbuf);
267 : :
268 : : /* XLOG stuff */
269 [ + - + + : 307 : if (RelationNeedsWAL(idxrel))
+ - + - ]
270 : : {
271 : : xl_brin_update xlrec;
272 : : XLogRecPtr recptr;
273 : : uint8 info;
274 : :
275 [ + + ]: 307 : info = XLOG_BRIN_UPDATE | (extended ? XLOG_BRIN_INIT_PAGE : 0);
276 : :
3433 heikki.linnakangas@i 277 : 307 : xlrec.insert.offnum = newoff;
3445 alvherre@alvh.no-ip. 278 : 307 : xlrec.insert.heapBlk = heapBlk;
279 : 307 : xlrec.insert.pagesPerRange = pagesPerRange;
3433 heikki.linnakangas@i 280 : 307 : xlrec.oldOffnum = oldoff;
281 : :
282 : 307 : XLogBeginInsert();
283 : :
284 : : /* new page */
285 : 307 : XLogRegisterData((char *) &xlrec, SizeOfBrinUpdate);
286 : :
287 [ + + ]: 307 : XLogRegisterBuffer(0, newbuf, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
1902 peter@eisentraut.org 288 : 307 : XLogRegisterBufData(0, (char *) unconstify(BrinTuple *, newtup), newsz);
289 : :
290 : : /* revmap page */
2652 alvherre@alvh.no-ip. 291 : 307 : XLogRegisterBuffer(1, revmapbuf, 0);
292 : :
293 : : /* old page */
3433 heikki.linnakangas@i 294 : 307 : XLogRegisterBuffer(2, oldbuf, REGBUF_STANDARD);
295 : :
296 : 307 : recptr = XLogInsert(RM_BRIN_ID, info);
297 : :
3446 alvherre@alvh.no-ip. 298 : 307 : PageSetLSN(oldpage, recptr);
299 : 307 : PageSetLSN(newpage, recptr);
2916 kgrittn@postgresql.o 300 : 307 : PageSetLSN(BufferGetPage(revmapbuf), recptr);
301 : : }
302 : :
3446 alvherre@alvh.no-ip. 303 [ - + ]: 307 : END_CRIT_SECTION();
304 : :
305 : 307 : LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
306 : 307 : LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
307 : 307 : UnlockReleaseBuffer(newbuf);
308 : :
3168 309 [ + + ]: 307 : if (extended)
310 : : {
1804 akapila@postgresql.o 311 : 11 : RecordPageWithFreeSpace(idxrel, newblk, freespace);
2202 tgl@sss.pgh.pa.us 312 : 11 : FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
313 : : }
314 : :
3446 alvherre@alvh.no-ip. 315 : 307 : return true;
316 : : }
317 : : }
318 : :
319 : : /*
320 : : * Return whether brin_doupdate can do a samepage update.
321 : : */
322 : : bool
323 : 26854 : brin_can_do_samepage_update(Buffer buffer, Size origsz, Size newsz)
324 : : {
325 : : return
326 [ + + ]: 30804 : ((newsz <= origsz) ||
2916 kgrittn@postgresql.o 327 [ + + ]: 3950 : PageGetExactFreeSpace(BufferGetPage(buffer)) >= (newsz - origsz));
328 : : }
329 : :
330 : : /*
331 : : * Insert an index tuple into the index relation. The revmap is updated to
332 : : * mark the range containing the given page as pointing to the inserted entry.
333 : : * A WAL record is written.
334 : : *
335 : : * The buffer, if valid, is first checked for free space to insert the new
336 : : * entry; if there isn't enough, a new buffer is obtained and pinned. No
337 : : * buffer lock must be held on entry, no buffer lock is held on exit.
338 : : *
339 : : * Return value is the offset number where the tuple was inserted.
340 : : */
341 : : OffsetNumber
3446 alvherre@alvh.no-ip. 342 : 2792 : brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
343 : : BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk,
344 : : BrinTuple *tup, Size itemsz)
345 : : {
346 : : Page page;
347 : : BlockNumber blk;
348 : : OffsetNumber off;
2202 tgl@sss.pgh.pa.us 349 : 2792 : Size freespace = 0;
350 : : Buffer revmapbuf;
351 : : ItemPointerData tid;
352 : : bool extended;
353 : :
3247 354 [ - + ]: 2792 : Assert(itemsz == MAXALIGN(itemsz));
355 : :
356 : : /* If the item is oversized, don't even bother. */
3092 alvherre@alvh.no-ip. 357 [ - + ]: 2792 : if (itemsz > BrinMaxItemSize)
358 : : {
3092 alvherre@alvh.no-ip. 359 [ # # ]:UBC 0 : ereport(ERROR,
360 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
361 : : errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
362 : : itemsz, BrinMaxItemSize, RelationGetRelationName(idxrel))));
363 : : return InvalidOffsetNumber; /* keep compiler quiet */
364 : : }
365 : :
366 : : /* Make sure the revmap is long enough to contain the entry we need */
3446 alvherre@alvh.no-ip. 367 :CBC 2792 : brinRevmapExtend(revmap, heapBlk);
368 : :
369 : : /*
370 : : * Acquire lock on buffer supplied by caller, if any. If it doesn't have
371 : : * enough space, unpin it to obtain a new one below.
372 : : */
373 [ + + ]: 2792 : if (BufferIsValid(*buffer))
374 : : {
375 : : /*
376 : : * It's possible that another backend (or ourselves!) extended the
377 : : * revmap over the page we held a pin on, so we cannot assume that
378 : : * it's still a regular page.
379 : : */
380 : 1153 : LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
2916 kgrittn@postgresql.o 381 [ + + ]: 1153 : if (br_page_get_freespace(BufferGetPage(*buffer)) < itemsz)
382 : : {
3446 alvherre@alvh.no-ip. 383 : 57 : UnlockReleaseBuffer(*buffer);
384 : 57 : *buffer = InvalidBuffer;
385 : : }
386 : : }
387 : :
388 : : /*
389 : : * If we still don't have a usable buffer, have brin_getinsertbuffer
390 : : * obtain one for us.
391 : : */
392 [ + + ]: 2792 : if (!BufferIsValid(*buffer))
393 : : {
394 : : do
3092 395 : 1696 : *buffer = brin_getinsertbuffer(idxrel, InvalidBuffer, itemsz, &extended);
396 [ - + ]: 1696 : while (!BufferIsValid(*buffer));
397 : : }
398 : : else
3167 399 : 1096 : extended = false;
400 : :
401 : : /* Now obtain lock on revmap buffer */
3446 402 : 2792 : revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
403 : :
2916 kgrittn@postgresql.o 404 : 2792 : page = BufferGetPage(*buffer);
3446 alvherre@alvh.no-ip. 405 : 2792 : blk = BufferGetBlockNumber(*buffer);
406 : :
407 : : /* Execute the actual insertion */
408 : 2792 : START_CRIT_SECTION();
3168 409 [ + + ]: 2792 : if (extended)
2202 tgl@sss.pgh.pa.us 410 : 223 : brin_page_init(page, BRIN_PAGETYPE_REGULAR);
3446 alvherre@alvh.no-ip. 411 : 2792 : off = PageAddItem(page, (Item) tup, itemsz, InvalidOffsetNumber,
412 : : false, false);
413 [ - + ]: 2792 : if (off == InvalidOffsetNumber)
2202 tgl@sss.pgh.pa.us 414 [ # # ]:UBC 0 : elog(ERROR, "failed to add BRIN tuple to new page");
3446 alvherre@alvh.no-ip. 415 :CBC 2792 : MarkBufferDirty(*buffer);
416 : :
417 : : /* needed to update FSM below */
2202 tgl@sss.pgh.pa.us 418 [ + + ]: 2792 : if (extended)
419 : 223 : freespace = br_page_get_freespace(page);
420 : :
3446 alvherre@alvh.no-ip. 421 : 2792 : ItemPointerSet(&tid, blk, off);
422 : 2792 : brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, tid);
423 : 2792 : MarkBufferDirty(revmapbuf);
424 : :
425 : : /* XLOG stuff */
426 [ + + + + : 2792 : if (RelationNeedsWAL(idxrel))
+ + + + ]
427 : : {
428 : : xl_brin_insert xlrec;
429 : : XLogRecPtr recptr;
430 : : uint8 info;
431 : :
432 [ + + ]: 2343 : info = XLOG_BRIN_INSERT | (extended ? XLOG_BRIN_INIT_PAGE : 0);
433 : 2343 : xlrec.heapBlk = heapBlk;
434 : 2343 : xlrec.pagesPerRange = pagesPerRange;
3433 heikki.linnakangas@i 435 : 2343 : xlrec.offnum = off;
436 : :
437 : 2343 : XLogBeginInsert();
438 : 2343 : XLogRegisterData((char *) &xlrec, SizeOfBrinInsert);
439 : :
440 [ + + ]: 2343 : XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
441 : 2343 : XLogRegisterBufData(0, (char *) tup, itemsz);
442 : :
443 : 2343 : XLogRegisterBuffer(1, revmapbuf, 0);
444 : :
445 : 2343 : recptr = XLogInsert(RM_BRIN_ID, info);
446 : :
3446 alvherre@alvh.no-ip. 447 : 2343 : PageSetLSN(page, recptr);
2916 kgrittn@postgresql.o 448 : 2343 : PageSetLSN(BufferGetPage(revmapbuf), recptr);
449 : : }
450 : :
3446 alvherre@alvh.no-ip. 451 [ - + ]: 2792 : END_CRIT_SECTION();
452 : :
453 : : /* Tuple is firmly on buffer; we can release our locks */
454 : 2792 : LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
455 : 2792 : LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
456 : :
457 : : BRIN_elog((DEBUG2, "inserted tuple (%u,%u) for range starting at %u",
458 : : blk, off, heapBlk));
459 : :
460 [ + + ]: 2792 : if (extended)
461 : : {
1804 akapila@postgresql.o 462 : 223 : RecordPageWithFreeSpace(idxrel, blk, freespace);
2202 tgl@sss.pgh.pa.us 463 : 223 : FreeSpaceMapVacuumRange(idxrel, blk, blk + 1);
464 : : }
465 : :
3446 alvherre@alvh.no-ip. 466 : 2792 : return off;
467 : : }
468 : :
469 : : /*
470 : : * Initialize a page with the given type.
471 : : *
472 : : * Caller is responsible for marking it dirty, as appropriate.
473 : : */
474 : : void
475 : 708 : brin_page_init(Page page, uint16 type)
476 : : {
477 : 708 : PageInit(page, BLCKSZ, sizeof(BrinSpecialSpace));
478 : :
3323 479 : 708 : BrinPageType(page) = type;
3446 480 : 708 : }
481 : :
482 : : /*
483 : : * Initialize a new BRIN index's metapage.
484 : : */
485 : : void
486 : 206 : brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
487 : : {
488 : : BrinMetaPageData *metadata;
489 : :
490 : 206 : brin_page_init(page, BRIN_PAGETYPE_META);
491 : :
492 : 206 : metadata = (BrinMetaPageData *) PageGetContents(page);
493 : :
494 : 206 : metadata->brinMagic = BRIN_META_MAGIC;
495 : 206 : metadata->brinVersion = version;
496 : 206 : metadata->pagesPerRange = pagesPerRange;
497 : :
498 : : /*
499 : : * Note we cheat here a little. 0 is not a valid revmap block number
500 : : * (because it's the metapage buffer), but doing this enables the first
501 : : * revmap page to be created when the index is.
502 : : */
503 : 206 : metadata->lastRevmapPage = 0;
504 : :
505 : : /*
506 : : * Set pd_lower just past the end of the metadata. This is essential,
507 : : * because without doing so, metadata will be lost if xlog.c compresses
508 : : * the page.
509 : : */
2355 tgl@sss.pgh.pa.us 510 : 206 : ((PageHeader) page)->pd_lower =
511 : 206 : ((char *) metadata + sizeof(BrinMetaPageData)) - (char *) page;
3446 alvherre@alvh.no-ip. 512 : 206 : }
513 : :
514 : : /*
515 : : * Initiate page evacuation protocol.
516 : : *
517 : : * The page must be locked in exclusive mode by the caller.
518 : : *
519 : : * If the page is not yet initialized or empty, return false without doing
520 : : * anything; it can be used for revmap without any further changes. If it
521 : : * contains tuples, mark it for evacuation and return true.
522 : : */
523 : : bool
524 : 168 : brin_start_evacuating_page(Relation idxRel, Buffer buf)
525 : : {
526 : : OffsetNumber off;
527 : : OffsetNumber maxoff;
528 : : Page page;
529 : :
2916 kgrittn@postgresql.o 530 : 168 : page = BufferGetPage(buf);
531 : :
3446 alvherre@alvh.no-ip. 532 [ + + ]: 168 : if (PageIsNew(page))
533 : 166 : return false;
534 : :
535 : 2 : maxoff = PageGetMaxOffsetNumber(page);
536 [ + + ]: 292 : for (off = FirstOffsetNumber; off <= maxoff; off++)
537 : : {
538 : : ItemId lp;
539 : :
540 : 291 : lp = PageGetItemId(page, off);
541 [ + + ]: 291 : if (ItemIdIsUsed(lp))
542 : : {
543 : : /*
544 : : * Prevent other backends from adding more stuff to this page:
545 : : * BRIN_EVACUATE_PAGE informs br_page_get_freespace that this page
546 : : * can no longer be used to add new tuples. Note that this flag
547 : : * is not WAL-logged, except accidentally.
548 : : */
3323 549 : 1 : BrinPageFlags(page) |= BRIN_EVACUATE_PAGE;
3446 550 : 1 : MarkBufferDirtyHint(buf, true);
551 : :
552 : 1 : return true;
553 : : }
554 : : }
555 : 1 : return false;
556 : : }
557 : :
558 : : /*
559 : : * Move all tuples out of a page.
560 : : *
561 : : * The caller must hold lock on the page. The lock and pin are released.
562 : : */
563 : : void
564 : 1 : brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange,
565 : : BrinRevmap *revmap, Buffer buf)
566 : : {
567 : : OffsetNumber off;
568 : : OffsetNumber maxoff;
569 : : Page page;
2564 570 : 1 : BrinTuple *btup = NULL;
571 : 1 : Size btupsz = 0;
572 : :
2916 kgrittn@postgresql.o 573 : 1 : page = BufferGetPage(buf);
574 : :
3323 alvherre@alvh.no-ip. 575 [ - + ]: 1 : Assert(BrinPageFlags(page) & BRIN_EVACUATE_PAGE);
576 : :
3446 577 : 1 : maxoff = PageGetMaxOffsetNumber(page);
578 [ + + ]: 292 : for (off = FirstOffsetNumber; off <= maxoff; off++)
579 : : {
580 : : BrinTuple *tup;
581 : : Size sz;
582 : : ItemId lp;
583 : :
584 [ - + ]: 291 : CHECK_FOR_INTERRUPTS();
585 : :
586 : 291 : lp = PageGetItemId(page, off);
587 [ + - ]: 291 : if (ItemIdIsUsed(lp))
588 : : {
589 : 291 : sz = ItemIdGetLength(lp);
590 : 291 : tup = (BrinTuple *) PageGetItem(page, lp);
2564 591 : 291 : tup = brin_copy_tuple(tup, sz, btup, &btupsz);
592 : :
3446 593 : 291 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
594 : :
595 [ - + ]: 291 : if (!brin_doupdate(idxRel, pagesPerRange, revmap, tup->bt_blkno,
596 : : buf, off, tup, sz, tup, sz, false))
3446 alvherre@alvh.no-ip. 597 :UBC 0 : off--; /* retry */
598 : :
3446 alvherre@alvh.no-ip. 599 :CBC 291 : LockBuffer(buf, BUFFER_LOCK_SHARE);
600 : :
601 : : /* It's possible that someone extended the revmap over this page */
602 [ - + ]: 291 : if (!BRIN_IS_REGULAR_PAGE(page))
3446 alvherre@alvh.no-ip. 603 :UBC 0 : break;
604 : : }
605 : : }
606 : :
3446 alvherre@alvh.no-ip. 607 :CBC 1 : UnlockReleaseBuffer(buf);
608 : 1 : }
609 : :
610 : : /*
611 : : * Given a BRIN index page, initialize it if necessary, and record its
612 : : * current free space in the FSM.
613 : : *
614 : : * The main use for this is when, during vacuuming, an uninitialized page is
615 : : * found, which could be the result of relation extension followed by a crash
616 : : * before the page can be used.
617 : : *
618 : : * Here, we don't bother to update upper FSM pages, instead expecting that our
619 : : * caller (brin_vacuum_scan) will fix them at the end of the scan. Elsewhere
620 : : * in this file, it's generally a good idea to propagate additions of free
621 : : * space into the upper FSM pages immediately.
622 : : */
623 : : void
3168 624 : 291 : brin_page_cleanup(Relation idxrel, Buffer buf)
625 : : {
2916 kgrittn@postgresql.o 626 : 291 : Page page = BufferGetPage(buf);
627 : :
628 : : /*
629 : : * If a page was left uninitialized, initialize it now; also record it in
630 : : * FSM.
631 : : *
632 : : * Somebody else might be extending the relation concurrently. To avoid
633 : : * re-initializing the page before they can grab the buffer lock, we
634 : : * acquire the extension lock momentarily. Since they hold the extension
635 : : * lock from before getting the page and after its been initialized, we're
636 : : * sure to see their initialization.
637 : : */
3168 alvherre@alvh.no-ip. 638 [ - + ]: 291 : if (PageIsNew(page))
639 : : {
3168 alvherre@alvh.no-ip. 640 :UBC 0 : LockRelationForExtension(idxrel, ShareLock);
641 : 0 : UnlockRelationForExtension(idxrel, ShareLock);
642 : :
643 : 0 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
644 [ # # ]: 0 : if (PageIsNew(page))
645 : : {
646 : 0 : brin_initialize_empty_new_buffer(idxrel, buf);
647 : 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
2202 tgl@sss.pgh.pa.us 648 : 0 : return;
649 : : }
3168 alvherre@alvh.no-ip. 650 : 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
651 : : }
652 : :
653 : : /* Nothing to be done for non-regular index pages */
2916 kgrittn@postgresql.o 654 [ + + ]:CBC 291 : if (BRIN_IS_META_PAGE(BufferGetPage(buf)) ||
655 [ + + ]: 224 : BRIN_IS_REVMAP_PAGE(BufferGetPage(buf)))
2202 tgl@sss.pgh.pa.us 656 : 134 : return;
657 : :
658 : : /* Measure free space and record it */
659 : 157 : RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buf),
660 : : br_page_get_freespace(page));
661 : : }
662 : :
663 : : /*
664 : : * Return a pinned and exclusively locked buffer which can be used to insert an
665 : : * index item of size itemsz (caller must ensure not to request sizes
666 : : * impossible to fulfill). If oldbuf is a valid buffer, it is also locked (in
667 : : * an order determined to avoid deadlocks).
668 : : *
669 : : * If we find that the old page is no longer a regular index page (because
670 : : * of a revmap extension), the old buffer is unlocked and we return
671 : : * InvalidBuffer.
672 : : *
673 : : * If there's no existing page with enough free space to accommodate the new
674 : : * item, the relation is extended. If this happens, *extended is set to true,
675 : : * and it is the caller's responsibility to initialize the page (and WAL-log
676 : : * that fact) prior to use. The caller should also update the FSM with the
677 : : * page's remaining free space after the insertion.
678 : : *
679 : : * Note that the caller is not expected to update FSM unless *extended is set
680 : : * true. This policy means that we'll update FSM when a page is created, and
681 : : * when it's found to have too little space for a desired tuple insertion,
682 : : * but not every single time we add a tuple to the page.
683 : : *
684 : : * Note that in some corner cases it is possible for this routine to extend
685 : : * the relation and then not return the new page. It is this routine's
686 : : * responsibility to WAL-log the page initialization and to record the page in
687 : : * FSM if that happens, since the caller certainly can't do it.
688 : : */
689 : : static Buffer
3446 alvherre@alvh.no-ip. 690 : 2003 : brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
691 : : bool *extended)
692 : : {
693 : : BlockNumber oldblk;
694 : : BlockNumber newblk;
695 : : Page page;
696 : : Size freespace;
697 : :
698 : : /* callers must have checked */
3092 699 [ - + ]: 2003 : Assert(itemsz <= BrinMaxItemSize);
700 : :
3446 701 [ + + ]: 2003 : if (BufferIsValid(oldbuf))
702 : 307 : oldblk = BufferGetBlockNumber(oldbuf);
703 : : else
704 : 1696 : oldblk = InvalidBlockNumber;
705 : :
706 : : /* Choose initial target page, re-using existing target if known */
2202 tgl@sss.pgh.pa.us 707 [ + - ]: 2003 : newblk = RelationGetTargetBlock(irel);
708 [ + + ]: 2003 : if (newblk == InvalidBlockNumber)
1804 akapila@postgresql.o 709 : 203 : newblk = GetPageWithFreeSpace(irel, itemsz);
710 : :
711 : : /*
712 : : * Loop until we find a page with sufficient free space. By the time we
713 : : * return to caller out of this loop, both buffers are valid and locked;
714 : : * if we have to restart here, neither page is locked and newblk isn't
715 : : * pinned (if it's even valid).
716 : : */
717 : : for (;;)
3446 alvherre@alvh.no-ip. 718 : 74 : {
719 : : Buffer buf;
720 : 2077 : bool extensionLockHeld = false;
721 : :
722 [ - + ]: 2077 : CHECK_FOR_INTERRUPTS();
723 : :
2202 tgl@sss.pgh.pa.us 724 : 2077 : *extended = false;
725 : :
3446 alvherre@alvh.no-ip. 726 [ + + ]: 2077 : if (newblk == InvalidBlockNumber)
727 : : {
728 : : /*
729 : : * There's not enough free space in any existing index page,
730 : : * according to the FSM: extend the relation to obtain a shiny new
731 : : * page.
732 : : *
733 : : * XXX: It's likely possible to use RBM_ZERO_AND_LOCK here,
734 : : * which'd avoid the need to hold the extension lock during buffer
735 : : * reclaim.
736 : : */
737 [ + - + + ]: 234 : if (!RELATION_IS_LOCAL(irel))
738 : : {
739 : 20 : LockRelationForExtension(irel, ExclusiveLock);
740 : 20 : extensionLockHeld = true;
741 : : }
742 : 234 : buf = ReadBuffer(irel, P_NEW);
743 : 234 : newblk = BufferGetBlockNumber(buf);
3168 744 : 234 : *extended = true;
745 : :
746 : : BRIN_elog((DEBUG2, "brin_getinsertbuffer: extending to page %u",
747 : : BufferGetBlockNumber(buf)));
748 : : }
3446 749 [ + + ]: 1843 : else if (newblk == oldblk)
750 : : {
751 : : /*
752 : : * There's an odd corner-case here where the FSM is out-of-date,
753 : : * and gave us the old page.
754 : : */
755 : 13 : buf = oldbuf;
756 : : }
757 : : else
758 : : {
759 : 1830 : buf = ReadBuffer(irel, newblk);
760 : : }
761 : :
762 : : /*
763 : : * We lock the old buffer first, if it's earlier than the new one; but
764 : : * then we need to check that it hasn't been turned into a revmap page
765 : : * concurrently. If we detect that that happened, give up and tell
766 : : * caller to start over.
767 : : */
768 [ + + + + ]: 2077 : if (BufferIsValid(oldbuf) && oldblk < newblk)
769 : : {
770 : 311 : LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
2916 kgrittn@postgresql.o 771 [ - + ]: 311 : if (!BRIN_IS_REGULAR_PAGE(BufferGetPage(oldbuf)))
772 : : {
3446 alvherre@alvh.no-ip. 773 :UBC 0 : LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
774 : :
775 : : /*
776 : : * It is possible that the new page was obtained from
777 : : * extending the relation. In that case, we must be sure to
778 : : * record it in the FSM before leaving, because otherwise the
779 : : * space would be lost forever. However, we cannot let an
780 : : * uninitialized page get in the FSM, so we need to initialize
781 : : * it first.
782 : : */
3168 783 [ # # ]: 0 : if (*extended)
784 : 0 : brin_initialize_empty_new_buffer(irel, buf);
785 : :
786 [ # # ]: 0 : if (extensionLockHeld)
787 : 0 : UnlockRelationForExtension(irel, ExclusiveLock);
788 : :
3446 789 : 0 : ReleaseBuffer(buf);
790 : :
2202 tgl@sss.pgh.pa.us 791 [ # # ]: 0 : if (*extended)
792 : : {
793 : 0 : FreeSpaceMapVacuumRange(irel, newblk, newblk + 1);
794 : : /* shouldn't matter, but don't confuse caller */
795 : 0 : *extended = false;
796 : : }
797 : :
3446 alvherre@alvh.no-ip. 798 : 0 : return InvalidBuffer;
799 : : }
800 : : }
801 : :
3446 alvherre@alvh.no-ip. 802 :CBC 2077 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
803 : :
804 [ + + ]: 2077 : if (extensionLockHeld)
805 : 20 : UnlockRelationForExtension(irel, ExclusiveLock);
806 : :
2916 kgrittn@postgresql.o 807 : 2077 : page = BufferGetPage(buf);
808 : :
809 : : /*
810 : : * We have a new buffer to insert into. Check that the new page has
811 : : * enough free space, and return it if it does; otherwise start over.
812 : : * (br_page_get_freespace also checks that the FSM didn't hand us a
813 : : * page that has since been repurposed for the revmap.)
814 : : */
3168 alvherre@alvh.no-ip. 815 : 4154 : freespace = *extended ?
3092 816 [ + + ]: 2077 : BrinMaxItemSize : br_page_get_freespace(page);
3446 817 [ + + ]: 2077 : if (freespace >= itemsz)
818 : : {
2202 tgl@sss.pgh.pa.us 819 : 2003 : RelationSetTargetBlock(irel, newblk);
820 : :
821 : : /*
822 : : * Lock the old buffer if not locked already. Note that in this
823 : : * case we know for sure it's a regular page: it's later than the
824 : : * new page we just got, which is not a revmap page, and revmap
825 : : * pages are always consecutive.
826 : : */
3446 alvherre@alvh.no-ip. 827 [ + + - + ]: 2003 : if (BufferIsValid(oldbuf) && oldblk > newblk)
828 : : {
3446 alvherre@alvh.no-ip. 829 :UBC 0 : LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
2916 kgrittn@postgresql.o 830 [ # # ]: 0 : Assert(BRIN_IS_REGULAR_PAGE(BufferGetPage(oldbuf)));
831 : : }
832 : :
3446 alvherre@alvh.no-ip. 833 :CBC 2003 : return buf;
834 : : }
835 : :
836 : : /* This page is no good. */
837 : :
838 : : /*
839 : : * If an entirely new page does not contain enough free space for the
840 : : * new item, then surely that item is oversized. Complain loudly; but
841 : : * first make sure we initialize the page and record it as free, for
842 : : * next time.
843 : : */
3168 844 [ - + ]: 74 : if (*extended)
845 : : {
3168 alvherre@alvh.no-ip. 846 :UBC 0 : brin_initialize_empty_new_buffer(irel, buf);
847 : : /* since this should not happen, skip FreeSpaceMapVacuum */
848 : :
3446 849 [ # # ]: 0 : ereport(ERROR,
850 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
851 : : errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
852 : : itemsz, freespace, RelationGetRelationName(irel))));
853 : : return InvalidBuffer; /* keep compiler quiet */
854 : : }
855 : :
3446 alvherre@alvh.no-ip. 856 [ + + ]:CBC 74 : if (newblk != oldblk)
857 : 61 : UnlockReleaseBuffer(buf);
858 [ + + + - ]: 74 : if (BufferIsValid(oldbuf) && oldblk <= newblk)
859 : 17 : LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
860 : :
861 : : /*
862 : : * Update the FSM with the new, presumably smaller, freespace value
863 : : * for this page, then search for a new target page.
864 : : */
865 : 74 : newblk = RecordAndGetPageWithFreeSpace(irel, newblk, freespace, itemsz);
866 : : }
867 : : }
868 : :
869 : : /*
870 : : * Initialize a page as an empty regular BRIN page, WAL-log this, and record
871 : : * the page in FSM.
872 : : *
873 : : * There are several corner situations in which we extend the relation to
874 : : * obtain a new page and later find that we cannot use it immediately. When
875 : : * that happens, we don't want to leave the page go unrecorded in FSM, because
876 : : * there is no mechanism to get the space back and the index would bloat.
877 : : * Also, because we would not WAL-log the action that would initialize the
878 : : * page, the page would go uninitialized in a standby (or after recovery).
879 : : *
880 : : * While we record the page in FSM here, caller is responsible for doing FSM
881 : : * upper-page update if that seems appropriate.
882 : : */
883 : : static void
3168 alvherre@alvh.no-ip. 884 :UBC 0 : brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer)
885 : : {
886 : : Page page;
887 : :
888 : : BRIN_elog((DEBUG2,
889 : : "brin_initialize_empty_new_buffer: initializing blank page %u",
890 : : BufferGetBlockNumber(buffer)));
891 : :
892 : 0 : START_CRIT_SECTION();
2916 kgrittn@postgresql.o 893 : 0 : page = BufferGetPage(buffer);
3168 alvherre@alvh.no-ip. 894 : 0 : brin_page_init(page, BRIN_PAGETYPE_REGULAR);
895 : 0 : MarkBufferDirty(buffer);
896 : 0 : log_newpage_buffer(buffer, true);
897 [ # # ]: 0 : END_CRIT_SECTION();
898 : :
899 : : /*
900 : : * We update the FSM for this page, but this is not WAL-logged. This is
901 : : * acceptable because VACUUM will scan the index and update the FSM with
902 : : * pages whose FSM records were forgotten in a crash.
903 : : */
904 : 0 : RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buffer),
905 : : br_page_get_freespace(page));
906 : 0 : }
907 : :
908 : :
909 : : /*
910 : : * Return the amount of free space on a regular BRIN index page.
911 : : *
912 : : * If the page is not a regular page, or has been marked with the
913 : : * BRIN_EVACUATE_PAGE flag, returns 0.
914 : : */
915 : : static Size
3446 alvherre@alvh.no-ip. 916 :CBC 3387 : br_page_get_freespace(Page page)
917 : : {
918 [ + - ]: 3387 : if (!BRIN_IS_REGULAR_PAGE(page) ||
3323 919 [ - + ]: 3387 : (BrinPageFlags(page) & BRIN_EVACUATE_PAGE) != 0)
3446 alvherre@alvh.no-ip. 920 :UBC 0 : return 0;
921 : : else
3446 alvherre@alvh.no-ip. 922 :CBC 3387 : return PageGetFreeSpace(page);
923 : : }
|