Age Owner TLA Line data Source code
1 : /*
2 : * brin_pageops.c
3 : * Page-handling routines for BRIN indexes
4 : *
5 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
6 : * Portions Copyright (c) 1994, Regents of the University of California
7 : *
8 : * IDENTIFICATION
9 : * src/backend/access/brin/brin_pageops.c
10 : */
11 : #include "postgres.h"
12 :
13 : #include "access/brin_page.h"
14 : #include "access/brin_pageops.h"
15 : #include "access/brin_revmap.h"
16 : #include "access/brin_xlog.h"
17 : #include "access/xloginsert.h"
18 : #include "miscadmin.h"
19 : #include "storage/bufmgr.h"
20 : #include "storage/freespace.h"
21 : #include "storage/lmgr.h"
22 : #include "storage/smgr.h"
23 : #include "utils/rel.h"
24 :
25 : /*
26 : * Maximum size of an entry in a BRIN_PAGETYPE_REGULAR page. We can tolerate
27 : * a single item per page, unlike other index AMs.
28 : */
29 : #define BrinMaxItemSize \
30 : MAXALIGN_DOWN(BLCKSZ - \
31 : (MAXALIGN(SizeOfPageHeaderData + \
32 : sizeof(ItemIdData)) + \
33 : MAXALIGN(sizeof(BrinSpecialSpace))))
34 :
35 : static Buffer brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
36 : bool *extended);
37 : static Size br_page_get_freespace(Page page);
38 : static void brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer);
39 :
40 :
41 : /*
42 : * Update tuple origtup (size origsz), located in offset oldoff of buffer
43 : * oldbuf, to newtup (size newsz) as summary tuple for the page range starting
44 : * at heapBlk. oldbuf must not be locked on entry, and is not locked at exit.
45 : *
46 : * If samepage is true, attempt to put the new tuple in the same page, but if
47 : * there's no room, use some other one.
48 : *
49 : * If the update is successful, return true; the revmap is updated to point to
50 : * the new tuple. If the update is not done for whatever reason, return false.
51 : * Caller may retry the update if this happens.
52 : */
53 : bool
3075 alvherre 54 CBC 3436 : brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
55 : BrinRevmap *revmap, BlockNumber heapBlk,
56 : Buffer oldbuf, OffsetNumber oldoff,
57 : const BrinTuple *origtup, Size origsz,
58 : const BrinTuple *newtup, Size newsz,
59 : bool samepage)
60 : {
61 : Page oldpage;
62 : ItemId oldlp;
63 : BrinTuple *oldtup;
64 : Size oldsz;
65 : Buffer newbuf;
1831 tgl 66 3436 : BlockNumber newblk = InvalidBlockNumber;
67 : bool extended;
68 :
2876 69 3436 : Assert(newsz == MAXALIGN(newsz));
70 :
71 : /* If the item is oversized, don't bother. */
2721 alvherre 72 3436 : if (newsz > BrinMaxItemSize)
73 : {
2721 alvherre 74 UBC 0 : ereport(ERROR,
75 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
76 : errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
77 : newsz, BrinMaxItemSize, RelationGetRelationName(idxrel))));
78 : return false; /* keep compiler quiet */
79 : }
80 :
81 : /* make sure the revmap is long enough to contain the entry we need */
3075 alvherre 82 CBC 3436 : brinRevmapExtend(revmap, heapBlk);
83 :
84 3436 : if (!samepage)
85 : {
86 : /* need a page on which to put the item */
87 307 : newbuf = brin_getinsertbuffer(idxrel, oldbuf, newsz, &extended);
88 307 : if (!BufferIsValid(newbuf))
89 : {
2797 alvherre 90 UBC 0 : Assert(!extended);
3075 91 0 : return false;
92 : }
93 :
94 : /*
95 : * Note: it's possible (though unlikely) that the returned newbuf is
96 : * the same as oldbuf, if brin_getinsertbuffer determined that the old
97 : * buffer does in fact have enough space.
98 : */
3075 alvherre 99 CBC 307 : if (newbuf == oldbuf)
100 : {
2797 alvherre 101 UBC 0 : Assert(!extended);
3075 102 0 : newbuf = InvalidBuffer;
103 : }
104 : else
1831 tgl 105 CBC 307 : newblk = BufferGetBlockNumber(newbuf);
106 : }
107 : else
108 : {
3075 alvherre 109 3129 : LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
110 3129 : newbuf = InvalidBuffer;
2796 111 3129 : extended = false;
112 : }
2545 kgrittn 113 3436 : oldpage = BufferGetPage(oldbuf);
3075 alvherre 114 3436 : oldlp = PageGetItemId(oldpage, oldoff);
115 :
116 : /*
117 : * Check that the old tuple wasn't updated concurrently: it might have
118 : * moved someplace else entirely, and for that matter the whole page
119 : * might've become a revmap page. Note that in the first two cases
120 : * checked here, the "oldlp" we just calculated is garbage; but
121 : * PageGetItemId() is simple enough that it was safe to do that
122 : * calculation anyway.
123 : */
1984 tgl 124 6872 : if (!BRIN_IS_REGULAR_PAGE(oldpage) ||
125 3436 : oldoff > PageGetMaxOffsetNumber(oldpage) ||
126 3436 : !ItemIdIsNormal(oldlp))
127 : {
3075 alvherre 128 UBC 0 : LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
129 :
130 : /*
131 : * If this happens, and the new buffer was obtained by extending the
132 : * relation, then we need to ensure we don't leave it uninitialized or
133 : * forget about it.
134 : */
135 0 : if (BufferIsValid(newbuf))
136 : {
2797 137 0 : if (extended)
138 0 : brin_initialize_empty_new_buffer(idxrel, newbuf);
3075 139 0 : UnlockReleaseBuffer(newbuf);
2797 140 0 : if (extended)
1831 tgl 141 0 : FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
142 : }
3075 alvherre 143 0 : return false;
144 : }
145 :
3075 alvherre 146 CBC 3436 : oldsz = ItemIdGetLength(oldlp);
147 3436 : oldtup = (BrinTuple *) PageGetItem(oldpage, oldlp);
148 :
149 : /*
150 : * ... or it might have been updated in place to different contents.
151 : */
152 3436 : if (!brin_tuples_equal(oldtup, oldsz, origtup, origsz))
153 : {
3075 alvherre 154 UBC 0 : LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
155 0 : if (BufferIsValid(newbuf))
156 : {
157 : /* As above, initialize and record new page if we got one */
2797 158 0 : if (extended)
159 0 : brin_initialize_empty_new_buffer(idxrel, newbuf);
3075 160 0 : UnlockReleaseBuffer(newbuf);
2797 161 0 : if (extended)
1831 tgl 162 0 : FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
163 : }
3075 alvherre 164 0 : return false;
165 : }
166 :
167 : /*
168 : * Great, the old tuple is intact. We can proceed with the update.
169 : *
170 : * If there's enough room in the old page for the new tuple, replace it.
171 : *
172 : * Note that there might now be enough space on the page even though the
173 : * caller told us there isn't, if a concurrent update moved another tuple
174 : * elsewhere or replaced a tuple with a smaller one.
175 : */
2952 alvherre 176 CBC 6581 : if (((BrinPageFlags(oldpage) & BRIN_EVACUATE_PAGE) == 0) &&
3075 177 3145 : brin_can_do_samepage_update(oldbuf, origsz, newsz))
178 : {
179 3129 : START_CRIT_SECTION();
1531 peter 180 3129 : if (!PageIndexTupleOverwrite(oldpage, oldoff, (Item) unconstify(BrinTuple *, newtup), newsz))
2403 tgl 181 UBC 0 : elog(ERROR, "failed to replace BRIN tuple");
3075 alvherre 182 CBC 3129 : MarkBufferDirty(oldbuf);
183 :
184 : /* XLOG stuff */
185 3129 : if (RelationNeedsWAL(idxrel))
186 : {
187 : xl_brin_samepage_update xlrec;
188 : XLogRecPtr recptr;
189 3126 : uint8 info = XLOG_BRIN_SAMEPAGE_UPDATE;
190 :
3062 heikki.linnakangas 191 3126 : xlrec.offnum = oldoff;
192 :
193 3126 : XLogBeginInsert();
194 3126 : XLogRegisterData((char *) &xlrec, SizeOfBrinSamepageUpdate);
195 :
196 3126 : XLogRegisterBuffer(0, oldbuf, REGBUF_STANDARD);
1531 peter 197 3126 : XLogRegisterBufData(0, (char *) unconstify(BrinTuple *, newtup), newsz);
198 :
3062 heikki.linnakangas 199 3126 : recptr = XLogInsert(RM_BRIN_ID, info);
200 :
3075 alvherre 201 3126 : PageSetLSN(oldpage, recptr);
202 : }
203 :
204 3129 : END_CRIT_SECTION();
205 :
206 3129 : LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
207 :
1831 tgl 208 3129 : if (BufferIsValid(newbuf))
209 : {
210 : /* As above, initialize and record new page if we got one */
1831 tgl 211 UBC 0 : if (extended)
212 0 : brin_initialize_empty_new_buffer(idxrel, newbuf);
213 0 : UnlockReleaseBuffer(newbuf);
214 0 : if (extended)
215 0 : FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
216 : }
217 :
3075 alvherre 218 CBC 3129 : return true;
219 : }
220 307 : else if (newbuf == InvalidBuffer)
221 : {
222 : /*
223 : * Not enough space, but caller said that there was. Tell them to
224 : * start over.
225 : */
3075 alvherre 226 UBC 0 : LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
227 0 : return false;
228 : }
229 : else
230 : {
231 : /*
232 : * Not enough free space on the oldpage. Put the new tuple on the new
233 : * page, and update the revmap.
234 : */
2545 kgrittn 235 CBC 307 : Page newpage = BufferGetPage(newbuf);
236 : Buffer revmapbuf;
237 : ItemPointerData newtid;
238 : OffsetNumber newoff;
2797 alvherre 239 307 : Size freespace = 0;
240 :
3075 241 307 : revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
242 :
243 307 : START_CRIT_SECTION();
244 :
245 : /*
246 : * We need to initialize the page if it's newly obtained. Note we
247 : * will WAL-log the initialization as part of the update, so we don't
248 : * need to do that here.
249 : */
2797 250 307 : if (extended)
1831 tgl 251 11 : brin_page_init(newpage, BRIN_PAGETYPE_REGULAR);
252 :
2403 253 307 : PageIndexTupleDeleteNoCompact(oldpage, oldoff);
1531 peter 254 307 : newoff = PageAddItem(newpage, (Item) unconstify(BrinTuple *, newtup), newsz,
255 : InvalidOffsetNumber, false, false);
3075 alvherre 256 307 : if (newoff == InvalidOffsetNumber)
3075 alvherre 257 UBC 0 : elog(ERROR, "failed to add BRIN tuple to new page");
3075 alvherre 258 CBC 307 : MarkBufferDirty(oldbuf);
259 307 : MarkBufferDirty(newbuf);
260 :
261 : /* needed to update FSM below */
2797 262 307 : if (extended)
263 11 : freespace = br_page_get_freespace(newpage);
264 :
1831 tgl 265 307 : ItemPointerSet(&newtid, newblk, newoff);
3075 alvherre 266 307 : brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, newtid);
267 307 : MarkBufferDirty(revmapbuf);
268 :
269 : /* XLOG stuff */
270 307 : if (RelationNeedsWAL(idxrel))
271 : {
272 : xl_brin_update xlrec;
273 : XLogRecPtr recptr;
274 : uint8 info;
275 :
276 307 : info = XLOG_BRIN_UPDATE | (extended ? XLOG_BRIN_INIT_PAGE : 0);
277 :
3062 heikki.linnakangas 278 307 : xlrec.insert.offnum = newoff;
3074 alvherre 279 307 : xlrec.insert.heapBlk = heapBlk;
280 307 : xlrec.insert.pagesPerRange = pagesPerRange;
3062 heikki.linnakangas 281 307 : xlrec.oldOffnum = oldoff;
282 :
283 307 : XLogBeginInsert();
284 :
285 : /* new page */
286 307 : XLogRegisterData((char *) &xlrec, SizeOfBrinUpdate);
287 :
288 307 : XLogRegisterBuffer(0, newbuf, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
1531 peter 289 307 : XLogRegisterBufData(0, (char *) unconstify(BrinTuple *, newtup), newsz);
290 :
291 : /* revmap page */
2281 alvherre 292 307 : XLogRegisterBuffer(1, revmapbuf, 0);
293 :
294 : /* old page */
3062 heikki.linnakangas 295 307 : XLogRegisterBuffer(2, oldbuf, REGBUF_STANDARD);
296 :
297 307 : recptr = XLogInsert(RM_BRIN_ID, info);
298 :
3075 alvherre 299 307 : PageSetLSN(oldpage, recptr);
300 307 : PageSetLSN(newpage, recptr);
2545 kgrittn 301 307 : PageSetLSN(BufferGetPage(revmapbuf), recptr);
302 : }
303 :
3075 alvherre 304 307 : END_CRIT_SECTION();
305 :
306 307 : LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
307 307 : LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
308 307 : UnlockReleaseBuffer(newbuf);
309 :
2797 310 307 : if (extended)
311 : {
1433 akapila 312 11 : RecordPageWithFreeSpace(idxrel, newblk, freespace);
1831 tgl 313 11 : FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
314 : }
315 :
3075 alvherre 316 307 : return true;
317 : }
318 : }
319 :
320 : /*
321 : * Return whether brin_doupdate can do a samepage update.
322 : */
323 : bool
324 6290 : brin_can_do_samepage_update(Buffer buffer, Size origsz, Size newsz)
325 : {
326 : return
327 9930 : ((newsz <= origsz) ||
2545 kgrittn 328 3640 : PageGetExactFreeSpace(BufferGetPage(buffer)) >= (newsz - origsz));
329 : }
330 :
331 : /*
332 : * Insert an index tuple into the index relation. The revmap is updated to
333 : * mark the range containing the given page as pointing to the inserted entry.
334 : * A WAL record is written.
335 : *
336 : * The buffer, if valid, is first checked for free space to insert the new
337 : * entry; if there isn't enough, a new buffer is obtained and pinned. No
338 : * buffer lock must be held on entry, no buffer lock is held on exit.
339 : *
340 : * Return value is the offset number where the tuple was inserted.
341 : */
342 : OffsetNumber
3075 alvherre 343 2591 : brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
344 : BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk,
345 : BrinTuple *tup, Size itemsz)
346 : {
347 : Page page;
348 : BlockNumber blk;
349 : OffsetNumber off;
1831 tgl 350 2591 : Size freespace = 0;
351 : Buffer revmapbuf;
352 : ItemPointerData tid;
353 : bool extended;
354 :
2876 355 2591 : Assert(itemsz == MAXALIGN(itemsz));
356 :
357 : /* If the item is oversized, don't even bother. */
2721 alvherre 358 2591 : if (itemsz > BrinMaxItemSize)
359 : {
2721 alvherre 360 UBC 0 : ereport(ERROR,
361 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
362 : errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
363 : itemsz, BrinMaxItemSize, RelationGetRelationName(idxrel))));
364 : return InvalidOffsetNumber; /* keep compiler quiet */
365 : }
366 :
367 : /* Make sure the revmap is long enough to contain the entry we need */
3075 alvherre 368 CBC 2591 : brinRevmapExtend(revmap, heapBlk);
369 :
370 : /*
371 : * Acquire lock on buffer supplied by caller, if any. If it doesn't have
372 : * enough space, unpin it to obtain a new one below.
373 : */
374 2591 : if (BufferIsValid(*buffer))
375 : {
376 : /*
377 : * It's possible that another backend (or ourselves!) extended the
378 : * revmap over the page we held a pin on, so we cannot assume that
379 : * it's still a regular page.
380 : */
381 1005 : LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
2545 kgrittn 382 1005 : if (br_page_get_freespace(BufferGetPage(*buffer)) < itemsz)
383 : {
3075 alvherre 384 57 : UnlockReleaseBuffer(*buffer);
385 57 : *buffer = InvalidBuffer;
386 : }
387 : }
388 :
389 : /*
390 : * If we still don't have a usable buffer, have brin_getinsertbuffer
391 : * obtain one for us.
392 : */
393 2591 : if (!BufferIsValid(*buffer))
394 : {
395 : do
2721 396 1643 : *buffer = brin_getinsertbuffer(idxrel, InvalidBuffer, itemsz, &extended);
397 1643 : while (!BufferIsValid(*buffer));
398 : }
399 : else
2796 400 948 : extended = false;
401 :
402 : /* Now obtain lock on revmap buffer */
3075 403 2591 : revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
404 :
2545 kgrittn 405 2591 : page = BufferGetPage(*buffer);
3075 alvherre 406 2591 : blk = BufferGetBlockNumber(*buffer);
407 :
408 : /* Execute the actual insertion */
409 2591 : START_CRIT_SECTION();
2797 410 2591 : if (extended)
1831 tgl 411 176 : brin_page_init(page, BRIN_PAGETYPE_REGULAR);
3075 alvherre 412 2591 : off = PageAddItem(page, (Item) tup, itemsz, InvalidOffsetNumber,
413 : false, false);
414 2591 : if (off == InvalidOffsetNumber)
1831 tgl 415 UBC 0 : elog(ERROR, "failed to add BRIN tuple to new page");
3075 alvherre 416 CBC 2591 : MarkBufferDirty(*buffer);
417 :
418 : /* needed to update FSM below */
1831 tgl 419 2591 : if (extended)
420 176 : freespace = br_page_get_freespace(page);
421 :
3075 alvherre 422 2591 : ItemPointerSet(&tid, blk, off);
423 2591 : brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, tid);
424 2591 : MarkBufferDirty(revmapbuf);
425 :
426 : /* XLOG stuff */
427 2591 : if (RelationNeedsWAL(idxrel))
428 : {
429 : xl_brin_insert xlrec;
430 : XLogRecPtr recptr;
431 : uint8 info;
432 :
433 2189 : info = XLOG_BRIN_INSERT | (extended ? XLOG_BRIN_INIT_PAGE : 0);
434 2189 : xlrec.heapBlk = heapBlk;
435 2189 : xlrec.pagesPerRange = pagesPerRange;
3062 heikki.linnakangas 436 2189 : xlrec.offnum = off;
437 :
438 2189 : XLogBeginInsert();
439 2189 : XLogRegisterData((char *) &xlrec, SizeOfBrinInsert);
440 :
441 2189 : XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
442 2189 : XLogRegisterBufData(0, (char *) tup, itemsz);
443 :
444 2189 : XLogRegisterBuffer(1, revmapbuf, 0);
445 :
446 2189 : recptr = XLogInsert(RM_BRIN_ID, info);
447 :
3075 alvherre 448 2189 : PageSetLSN(page, recptr);
2545 kgrittn 449 2189 : PageSetLSN(BufferGetPage(revmapbuf), recptr);
450 : }
451 :
3075 alvherre 452 2591 : END_CRIT_SECTION();
453 :
454 : /* Tuple is firmly on buffer; we can release our locks */
455 2591 : LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
456 2591 : LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
457 :
458 : BRIN_elog((DEBUG2, "inserted tuple (%u,%u) for range starting at %u",
459 : blk, off, heapBlk));
460 :
461 2591 : if (extended)
462 : {
1433 akapila 463 176 : RecordPageWithFreeSpace(idxrel, blk, freespace);
1831 tgl 464 176 : FreeSpaceMapVacuumRange(idxrel, blk, blk + 1);
465 : }
466 :
3075 alvherre 467 2591 : return off;
468 : }
469 :
470 : /*
471 : * Initialize a page with the given type.
472 : *
473 : * Caller is responsible for marking it dirty, as appropriate.
474 : */
475 : void
476 516 : brin_page_init(Page page, uint16 type)
477 : {
478 516 : PageInit(page, BLCKSZ, sizeof(BrinSpecialSpace));
479 :
2952 480 516 : BrinPageType(page) = type;
3075 481 516 : }
482 :
483 : /*
484 : * Initialize a new BRIN index's metapage.
485 : */
486 : void
487 144 : brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
488 : {
489 : BrinMetaPageData *metadata;
490 :
491 144 : brin_page_init(page, BRIN_PAGETYPE_META);
492 :
493 144 : metadata = (BrinMetaPageData *) PageGetContents(page);
494 :
495 144 : metadata->brinMagic = BRIN_META_MAGIC;
496 144 : metadata->brinVersion = version;
497 144 : metadata->pagesPerRange = pagesPerRange;
498 :
499 : /*
500 : * Note we cheat here a little. 0 is not a valid revmap block number
501 : * (because it's the metapage buffer), but doing this enables the first
502 : * revmap page to be created when the index is.
503 : */
504 144 : metadata->lastRevmapPage = 0;
505 :
506 : /*
507 : * Set pd_lower just past the end of the metadata. This is essential,
508 : * because without doing so, metadata will be lost if xlog.c compresses
509 : * the page.
510 : */
1984 tgl 511 144 : ((PageHeader) page)->pd_lower =
512 144 : ((char *) metadata + sizeof(BrinMetaPageData)) - (char *) page;
3075 alvherre 513 144 : }
514 :
515 : /*
516 : * Initiate page evacuation protocol.
517 : *
518 : * The page must be locked in exclusive mode by the caller.
519 : *
520 : * If the page is not yet initialized or empty, return false without doing
521 : * anything; it can be used for revmap without any further changes. If it
522 : * contains tuples, mark it for evacuation and return true.
523 : */
524 : bool
525 121 : brin_start_evacuating_page(Relation idxRel, Buffer buf)
526 : {
527 : OffsetNumber off;
528 : OffsetNumber maxoff;
529 : Page page;
530 :
2545 kgrittn 531 121 : page = BufferGetPage(buf);
532 :
3075 alvherre 533 121 : if (PageIsNew(page))
534 119 : return false;
535 :
536 2 : maxoff = PageGetMaxOffsetNumber(page);
537 292 : for (off = FirstOffsetNumber; off <= maxoff; off++)
538 : {
539 : ItemId lp;
540 :
541 291 : lp = PageGetItemId(page, off);
542 291 : if (ItemIdIsUsed(lp))
543 : {
544 : /*
545 : * Prevent other backends from adding more stuff to this page:
546 : * BRIN_EVACUATE_PAGE informs br_page_get_freespace that this page
547 : * can no longer be used to add new tuples. Note that this flag
548 : * is not WAL-logged, except accidentally.
549 : */
2952 550 1 : BrinPageFlags(page) |= BRIN_EVACUATE_PAGE;
3075 551 1 : MarkBufferDirtyHint(buf, true);
552 :
553 1 : return true;
554 : }
555 : }
556 1 : return false;
557 : }
558 :
559 : /*
560 : * Move all tuples out of a page.
561 : *
562 : * The caller must hold lock on the page. The lock and pin are released.
563 : */
564 : void
565 1 : brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange,
566 : BrinRevmap *revmap, Buffer buf)
567 : {
568 : OffsetNumber off;
569 : OffsetNumber maxoff;
570 : Page page;
2193 571 1 : BrinTuple *btup = NULL;
572 1 : Size btupsz = 0;
573 :
2545 kgrittn 574 1 : page = BufferGetPage(buf);
575 :
2952 alvherre 576 1 : Assert(BrinPageFlags(page) & BRIN_EVACUATE_PAGE);
577 :
3075 578 1 : maxoff = PageGetMaxOffsetNumber(page);
579 292 : for (off = FirstOffsetNumber; off <= maxoff; off++)
580 : {
581 : BrinTuple *tup;
582 : Size sz;
583 : ItemId lp;
584 :
585 291 : CHECK_FOR_INTERRUPTS();
586 :
587 291 : lp = PageGetItemId(page, off);
588 291 : if (ItemIdIsUsed(lp))
589 : {
590 291 : sz = ItemIdGetLength(lp);
591 291 : tup = (BrinTuple *) PageGetItem(page, lp);
2193 592 291 : tup = brin_copy_tuple(tup, sz, btup, &btupsz);
593 :
3075 594 291 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
595 :
596 291 : if (!brin_doupdate(idxRel, pagesPerRange, revmap, tup->bt_blkno,
597 : buf, off, tup, sz, tup, sz, false))
3075 alvherre 598 UBC 0 : off--; /* retry */
599 :
3075 alvherre 600 CBC 291 : LockBuffer(buf, BUFFER_LOCK_SHARE);
601 :
602 : /* It's possible that someone extended the revmap over this page */
603 291 : if (!BRIN_IS_REGULAR_PAGE(page))
3075 alvherre 604 UBC 0 : break;
605 : }
606 : }
607 :
3075 alvherre 608 CBC 1 : UnlockReleaseBuffer(buf);
609 1 : }
610 :
611 : /*
612 : * Given a BRIN index page, initialize it if necessary, and record its
613 : * current free space in the FSM.
614 : *
615 : * The main use for this is when, during vacuuming, an uninitialized page is
616 : * found, which could be the result of relation extension followed by a crash
617 : * before the page can be used.
618 : *
619 : * Here, we don't bother to update upper FSM pages, instead expecting that our
620 : * caller (brin_vacuum_scan) will fix them at the end of the scan. Elsewhere
621 : * in this file, it's generally a good idea to propagate additions of free
622 : * space into the upper FSM pages immediately.
623 : */
624 : void
2797 625 183 : brin_page_cleanup(Relation idxrel, Buffer buf)
626 : {
2545 kgrittn 627 183 : Page page = BufferGetPage(buf);
628 :
629 : /*
630 : * If a page was left uninitialized, initialize it now; also record it in
631 : * FSM.
632 : *
633 : * Somebody else might be extending the relation concurrently. To avoid
634 : * re-initializing the page before they can grab the buffer lock, we
635 : * acquire the extension lock momentarily. Since they hold the extension
636 : * lock from before getting the page and after its been initialized, we're
637 : * sure to see their initialization.
638 : */
2797 alvherre 639 183 : if (PageIsNew(page))
640 : {
2797 alvherre 641 UBC 0 : LockRelationForExtension(idxrel, ShareLock);
642 0 : UnlockRelationForExtension(idxrel, ShareLock);
643 :
644 0 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
645 0 : if (PageIsNew(page))
646 : {
647 0 : brin_initialize_empty_new_buffer(idxrel, buf);
648 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1831 tgl 649 0 : return;
650 : }
2797 alvherre 651 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
652 : }
653 :
654 : /* Nothing to be done for non-regular index pages */
2545 kgrittn 655 CBC 183 : if (BRIN_IS_META_PAGE(BufferGetPage(buf)) ||
656 141 : BRIN_IS_REVMAP_PAGE(BufferGetPage(buf)))
1831 tgl 657 84 : return;
658 :
659 : /* Measure free space and record it */
660 99 : RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buf),
661 : br_page_get_freespace(page));
662 : }
663 :
664 : /*
665 : * Return a pinned and exclusively locked buffer which can be used to insert an
666 : * index item of size itemsz (caller must ensure not to request sizes
667 : * impossible to fulfill). If oldbuf is a valid buffer, it is also locked (in
668 : * an order determined to avoid deadlocks).
669 : *
670 : * If we find that the old page is no longer a regular index page (because
671 : * of a revmap extension), the old buffer is unlocked and we return
672 : * InvalidBuffer.
673 : *
674 : * If there's no existing page with enough free space to accommodate the new
675 : * item, the relation is extended. If this happens, *extended is set to true,
676 : * and it is the caller's responsibility to initialize the page (and WAL-log
677 : * that fact) prior to use. The caller should also update the FSM with the
678 : * page's remaining free space after the insertion.
679 : *
680 : * Note that the caller is not expected to update FSM unless *extended is set
681 : * true. This policy means that we'll update FSM when a page is created, and
682 : * when it's found to have too little space for a desired tuple insertion,
683 : * but not every single time we add a tuple to the page.
684 : *
685 : * Note that in some corner cases it is possible for this routine to extend
686 : * the relation and then not return the new page. It is this routine's
687 : * responsibility to WAL-log the page initialization and to record the page in
688 : * FSM if that happens, since the caller certainly can't do it.
689 : */
690 : static Buffer
3075 alvherre 691 1950 : brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
692 : bool *extended)
693 : {
694 : BlockNumber oldblk;
695 : BlockNumber newblk;
696 : Page page;
697 : Size freespace;
698 :
699 : /* callers must have checked */
2721 700 1950 : Assert(itemsz <= BrinMaxItemSize);
701 :
3075 702 1950 : if (BufferIsValid(oldbuf))
703 307 : oldblk = BufferGetBlockNumber(oldbuf);
704 : else
705 1643 : oldblk = InvalidBlockNumber;
706 :
707 : /* Choose initial target page, re-using existing target if known */
1831 tgl 708 1950 : newblk = RelationGetTargetBlock(irel);
709 1950 : if (newblk == InvalidBlockNumber)
1433 akapila 710 150 : newblk = GetPageWithFreeSpace(irel, itemsz);
711 :
712 : /*
713 : * Loop until we find a page with sufficient free space. By the time we
714 : * return to caller out of this loop, both buffers are valid and locked;
715 : * if we have to restart here, neither page is locked and newblk isn't
716 : * pinned (if it's even valid).
717 : */
718 : for (;;)
3075 alvherre 719 74 : {
720 : Buffer buf;
721 2024 : bool extensionLockHeld = false;
722 :
723 2024 : CHECK_FOR_INTERRUPTS();
724 :
1831 tgl 725 2024 : *extended = false;
726 :
3075 alvherre 727 2024 : if (newblk == InvalidBlockNumber)
728 : {
729 : /*
730 : * There's not enough free space in any existing index page,
731 : * according to the FSM: extend the relation to obtain a shiny new
732 : * page.
733 : *
734 : * XXX: It's likely possible to use RBM_ZERO_AND_LOCK here,
735 : * which'd avoid the need to hold the extension lock during buffer
736 : * reclaim.
737 : */
3075 alvherre 738 GIC 187 : if (!RELATION_IS_LOCAL(irel))
739 : {
740 11 : LockRelationForExtension(irel, ExclusiveLock);
741 11 : extensionLockHeld = true;
3075 alvherre 742 ECB : }
3075 alvherre 743 GIC 187 : buf = ReadBuffer(irel, P_NEW);
3075 alvherre 744 CBC 187 : newblk = BufferGetBlockNumber(buf);
2797 745 187 : *extended = true;
746 :
3074 alvherre 747 ECB : BRIN_elog((DEBUG2, "brin_getinsertbuffer: extending to page %u",
748 : BufferGetBlockNumber(buf)));
3075 749 : }
3075 alvherre 750 GIC 1837 : else if (newblk == oldblk)
751 : {
752 : /*
753 : * There's an odd corner-case here where the FSM is out-of-date,
3075 alvherre 754 ECB : * and gave us the old page.
755 : */
3075 alvherre 756 GIC 13 : buf = oldbuf;
757 : }
758 : else
759 : {
3075 alvherre 760 CBC 1824 : buf = ReadBuffer(irel, newblk);
761 : }
762 :
763 : /*
3075 alvherre 764 ECB : * We lock the old buffer first, if it's earlier than the new one; but
765 : * then we need to check that it hasn't been turned into a revmap page
766 : * concurrently. If we detect that that happened, give up and tell
767 : * caller to start over.
768 : */
3075 alvherre 769 GIC 2024 : if (BufferIsValid(oldbuf) && oldblk < newblk)
770 : {
771 311 : LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
2545 kgrittn 772 311 : if (!BRIN_IS_REGULAR_PAGE(BufferGetPage(oldbuf)))
3075 alvherre 773 ECB : {
3075 alvherre 774 UIC 0 : LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
2797 alvherre 775 ECB :
776 : /*
777 : * It is possible that the new page was obtained from
2797 alvherre 778 EUB : * extending the relation. In that case, we must be sure to
779 : * record it in the FSM before leaving, because otherwise the
780 : * space would be lost forever. However, we cannot let an
781 : * uninitialized page get in the FSM, so we need to initialize
782 : * it first.
783 : */
2797 alvherre 784 UIC 0 : if (*extended)
785 0 : brin_initialize_empty_new_buffer(irel, buf);
786 :
787 0 : if (extensionLockHeld)
2797 alvherre 788 UBC 0 : UnlockRelationForExtension(irel, ExclusiveLock);
2797 alvherre 789 EUB :
3075 alvherre 790 UIC 0 : ReleaseBuffer(buf);
1831 tgl 791 EUB :
1831 tgl 792 UBC 0 : if (*extended)
793 : {
794 0 : FreeSpaceMapVacuumRange(irel, newblk, newblk + 1);
795 : /* shouldn't matter, but don't confuse caller */
796 0 : *extended = false;
797 : }
1831 tgl 798 EUB :
3075 alvherre 799 UIC 0 : return InvalidBuffer;
3075 alvherre 800 EUB : }
801 : }
802 :
3075 alvherre 803 GBC 2024 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
804 :
3075 alvherre 805 GIC 2024 : if (extensionLockHeld)
806 11 : UnlockRelationForExtension(irel, ExclusiveLock);
3075 alvherre 807 ECB :
2545 kgrittn 808 GIC 2024 : page = BufferGetPage(buf);
3075 alvherre 809 ECB :
810 : /*
811 : * We have a new buffer to insert into. Check that the new page has
812 : * enough free space, and return it if it does; otherwise start over.
813 : * (br_page_get_freespace also checks that the FSM didn't hand us a
814 : * page that has since been repurposed for the revmap.)
815 : */
2797 alvherre 816 GIC 4048 : freespace = *extended ?
2721 817 2024 : BrinMaxItemSize : br_page_get_freespace(page);
3075 818 2024 : if (freespace >= itemsz)
819 : {
1831 tgl 820 CBC 1950 : RelationSetTargetBlock(irel, newblk);
3075 alvherre 821 ECB :
822 : /*
823 : * Lock the old buffer if not locked already. Note that in this
824 : * case we know for sure it's a regular page: it's later than the
825 : * new page we just got, which is not a revmap page, and revmap
826 : * pages are always consecutive.
827 : */
3075 alvherre 828 GIC 1950 : if (BufferIsValid(oldbuf) && oldblk > newblk)
829 : {
3075 alvherre 830 UIC 0 : LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
2545 kgrittn 831 0 : Assert(BRIN_IS_REGULAR_PAGE(BufferGetPage(oldbuf)));
3075 alvherre 832 ECB : }
833 :
3075 alvherre 834 GBC 1950 : return buf;
3075 alvherre 835 EUB : }
836 :
837 : /* This page is no good. */
3075 alvherre 838 ECB :
839 : /*
840 : * If an entirely new page does not contain enough free space for the
841 : * new item, then surely that item is oversized. Complain loudly; but
842 : * first make sure we initialize the page and record it as free, for
843 : * next time.
844 : */
2797 alvherre 845 GIC 74 : if (*extended)
846 : {
2797 alvherre 847 UIC 0 : brin_initialize_empty_new_buffer(irel, buf);
848 : /* since this should not happen, skip FreeSpaceMapVacuum */
2797 alvherre 849 ECB :
3075 alvherre 850 UIC 0 : ereport(ERROR,
3075 alvherre 851 EUB : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
852 : errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
853 : itemsz, freespace, RelationGetRelationName(irel))));
2118 tgl 854 : return InvalidBuffer; /* keep compiler quiet */
855 : }
856 :
3075 alvherre 857 GIC 74 : if (newblk != oldblk)
858 61 : UnlockReleaseBuffer(buf);
859 74 : if (BufferIsValid(oldbuf) && oldblk <= newblk)
860 17 : LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
3075 alvherre 861 ECB :
1831 tgl 862 : /*
863 : * Update the FSM with the new, presumably smaller, freespace value
864 : * for this page, then search for a new target page.
865 : */
3075 alvherre 866 GIC 74 : newblk = RecordAndGetPageWithFreeSpace(irel, newblk, freespace, itemsz);
867 : }
868 : }
869 :
2797 alvherre 870 ECB : /*
871 : * Initialize a page as an empty regular BRIN page, WAL-log this, and record
872 : * the page in FSM.
873 : *
874 : * There are several corner situations in which we extend the relation to
875 : * obtain a new page and later find that we cannot use it immediately. When
876 : * that happens, we don't want to leave the page go unrecorded in FSM, because
877 : * there is no mechanism to get the space back and the index would bloat.
878 : * Also, because we would not WAL-log the action that would initialize the
879 : * page, the page would go uninitialized in a standby (or after recovery).
880 : *
881 : * While we record the page in FSM here, caller is responsible for doing FSM
882 : * upper-page update if that seems appropriate.
883 : */
884 : static void
2797 alvherre 885 UIC 0 : brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer)
886 : {
887 : Page page;
888 :
2797 alvherre 889 EUB : BRIN_elog((DEBUG2,
890 : "brin_initialize_empty_new_buffer: initializing blank page %u",
891 : BufferGetBlockNumber(buffer)));
892 :
2797 alvherre 893 UIC 0 : START_CRIT_SECTION();
2545 kgrittn 894 0 : page = BufferGetPage(buffer);
2797 alvherre 895 0 : brin_page_init(page, BRIN_PAGETYPE_REGULAR);
896 0 : MarkBufferDirty(buffer);
2797 alvherre 897 UBC 0 : log_newpage_buffer(buffer, true);
898 0 : END_CRIT_SECTION();
2797 alvherre 899 EUB :
900 : /*
901 : * We update the FSM for this page, but this is not WAL-logged. This is
902 : * acceptable because VACUUM will scan the index and update the FSM with
903 : * pages whose FSM records were forgotten in a crash.
904 : */
2797 alvherre 905 UIC 0 : RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buffer),
906 : br_page_get_freespace(page));
907 0 : }
908 :
2797 alvherre 909 EUB :
910 : /*
3075 911 : * Return the amount of free space on a regular BRIN index page.
912 : *
913 : * If the page is not a regular page, or has been marked with the
914 : * BRIN_EVACUATE_PAGE flag, returns 0.
915 : */
916 : static Size
3075 alvherre 917 GIC 3128 : br_page_get_freespace(Page page)
918 : {
919 3128 : if (!BRIN_IS_REGULAR_PAGE(page) ||
2952 920 3128 : (BrinPageFlags(page) & BRIN_EVACUATE_PAGE) != 0)
3075 alvherre 921 LBC 0 : return 0;
922 : else
3075 alvherre 923 CBC 3128 : return PageGetFreeSpace(page);
3075 alvherre 924 ECB : }
|