Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * gistvacuum.c
4 : : * vacuuming routines for the postgres GiST index access method.
5 : : *
6 : : *
7 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8 : : * Portions Copyright (c) 1994, Regents of the University of California
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/access/gist/gistvacuum.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : #include "postgres.h"
16 : :
17 : : #include "access/genam.h"
18 : : #include "access/gist_private.h"
19 : : #include "access/transam.h"
20 : : #include "commands/vacuum.h"
21 : : #include "lib/integerset.h"
22 : : #include "miscadmin.h"
23 : : #include "storage/indexfsm.h"
24 : : #include "storage/lmgr.h"
25 : : #include "utils/memutils.h"
26 : :
27 : : /* Working state needed by gistbulkdelete */
28 : : typedef struct
29 : : {
30 : : IndexVacuumInfo *info;
31 : : IndexBulkDeleteResult *stats;
32 : : IndexBulkDeleteCallback callback;
33 : : void *callback_state;
34 : : GistNSN startNSN;
35 : :
36 : : /*
37 : : * These are used to memorize all internal and empty leaf pages. They are
38 : : * used for deleting all the empty pages.
39 : : */
40 : : IntegerSet *internal_page_set;
41 : : IntegerSet *empty_leaf_set;
42 : : MemoryContext page_set_context;
43 : : } GistVacState;
44 : :
45 : : static void gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
46 : : IndexBulkDeleteCallback callback, void *callback_state);
47 : : static void gistvacuumpage(GistVacState *vstate, BlockNumber blkno,
48 : : BlockNumber orig_blkno);
49 : : static void gistvacuum_delete_empty_pages(IndexVacuumInfo *info,
50 : : GistVacState *vstate);
51 : : static bool gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
52 : : Buffer parentBuffer, OffsetNumber downlink,
53 : : Buffer leafBuffer);
54 : :
55 : : /*
56 : : * VACUUM bulkdelete stage: remove index entries.
57 : : */
58 : : IndexBulkDeleteResult *
1867 heikki.linnakangas@i 59 :CBC 21 : gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
60 : : IndexBulkDeleteCallback callback, void *callback_state)
61 : : {
62 : : /* allocate stats if first time through, else re-use existing struct */
1553 akapila@postgresql.o 63 [ + - ]: 21 : if (stats == NULL)
64 : 21 : stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
65 : :
66 : 21 : gistvacuumscan(info, stats, callback, callback_state);
67 : :
68 : 21 : return stats;
69 : : }
70 : :
71 : : /*
72 : : * VACUUM cleanup stage: delete empty pages, and update index statistics.
73 : : */
74 : : IndexBulkDeleteResult *
1867 heikki.linnakangas@i 75 : 47 : gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
76 : : {
77 : : /* No-op in ANALYZE ONLY mode */
5500 tgl@sss.pgh.pa.us 78 [ + + ]: 47 : if (info->analyze_only)
3010 79 : 12 : return stats;
80 : :
81 : : /*
82 : : * If gistbulkdelete was called, we need not do anything, just return the
83 : : * stats from the latest gistbulkdelete call. If it wasn't called, we
84 : : * still need to do a pass over the index, to obtain index statistics.
85 : : */
1553 akapila@postgresql.o 86 [ + + ]: 35 : if (stats == NULL)
87 : : {
88 : 23 : stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
89 : 23 : gistvacuumscan(info, stats, NULL, NULL);
90 : : }
91 : :
92 : : /*
93 : : * It's quite possible for us to be fooled by concurrent page splits into
94 : : * double-counting some index tuples, so disbelieve any total that exceeds
95 : : * the underlying heap's count ... if we know that accurately. Otherwise
96 : : * this might just make matters worse.
97 : : */
1867 heikki.linnakangas@i 98 [ + - ]: 35 : if (!info->estimated_count)
99 : : {
1553 akapila@postgresql.o 100 [ - + ]: 35 : if (stats->num_index_tuples > info->num_heap_tuples)
1553 akapila@postgresql.o 101 :UBC 0 : stats->num_index_tuples = info->num_heap_tuples;
102 : : }
103 : :
1553 akapila@postgresql.o 104 :CBC 35 : return stats;
105 : : }
106 : :
107 : : /*
108 : : * gistvacuumscan --- scan the index for VACUUMing purposes
109 : : *
110 : : * This scans the index for leaf tuples that are deletable according to the
111 : : * vacuum callback, and updates the stats. Both btbulkdelete and
112 : : * btvacuumcleanup invoke this (the latter only if no btbulkdelete call
113 : : * occurred).
114 : : *
115 : : * This also makes note of any empty leaf pages, as well as all internal
116 : : * pages while looping over all index pages. After scanning all the pages, we
117 : : * remove the empty pages so that they can be reused. Any deleted pages are
118 : : * added directly to the free space map. (They should've been added there
119 : : * when they were originally deleted, already, but it's possible that the FSM
120 : : * was lost at a crash, for example.)
121 : : *
122 : : * The caller is responsible for initially allocating/zeroing a stats struct.
123 : : */
124 : : static void
125 : 44 : gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
126 : : IndexBulkDeleteCallback callback, void *callback_state)
127 : : {
1867 heikki.linnakangas@i 128 : 44 : Relation rel = info->index;
129 : : GistVacState vstate;
130 : : BlockNumber num_pages;
131 : : bool needLock;
132 : : BlockNumber blkno;
133 : : MemoryContext oldctx;
134 : :
135 : : /*
136 : : * Reset fields that track information about the entire index now. This
137 : : * avoids double-counting in the case where a single VACUUM command
138 : : * requires multiple scans of the index.
139 : : *
140 : : * Avoid resetting the tuples_removed and pages_newly_deleted fields here,
141 : : * since they track information about the VACUUM command, and so must last
142 : : * across each call to gistvacuumscan().
143 : : *
144 : : * (Note that pages_free is treated as state about the whole index, not
145 : : * the current VACUUM. This is appropriate because RecordFreeIndexPage()
146 : : * calls are idempotent, and get repeated for the same deleted pages in
147 : : * some scenarios. The point for us is to track the number of recyclable
148 : : * pages in the index at the end of the VACUUM command.)
149 : : */
1144 pg@bowt.ie 150 : 44 : stats->num_pages = 0;
1553 akapila@postgresql.o 151 : 44 : stats->estimated_count = false;
152 : 44 : stats->num_index_tuples = 0;
153 : 44 : stats->pages_deleted = 0;
154 : 44 : stats->pages_free = 0;
155 : :
156 : : /*
157 : : * Create the integer sets to remember all the internal and the empty leaf
158 : : * pages in page_set_context. Internally, the integer set will remember
159 : : * this context so that the subsequent allocations for these integer sets
160 : : * will be done from the same context.
161 : : *
162 : : * XXX the allocation sizes used below pre-date generation context's block
163 : : * growing code. These values should likely be benchmarked and set to
164 : : * more suitable values.
165 : : */
166 : 44 : vstate.page_set_context = GenerationContextCreate(CurrentMemoryContext,
167 : : "GiST VACUUM page set context",
168 : : 16 * 1024,
169 : : 16 * 1024,
170 : : 16 * 1024);
171 : 44 : oldctx = MemoryContextSwitchTo(vstate.page_set_context);
172 : 44 : vstate.internal_page_set = intset_create();
173 : 44 : vstate.empty_leaf_set = intset_create();
1641 174 : 44 : MemoryContextSwitchTo(oldctx);
175 : :
176 : : /* Set up info to pass down to gistvacuumpage */
1850 heikki.linnakangas@i 177 : 44 : vstate.info = info;
1867 178 : 44 : vstate.stats = stats;
179 : 44 : vstate.callback = callback;
180 : 44 : vstate.callback_state = callback_state;
181 [ + - + + : 44 : if (RelationNeedsWAL(rel))
+ - + - ]
182 : 44 : vstate.startNSN = GetInsertRecPtr();
183 : : else
1867 heikki.linnakangas@i 184 :UBC 0 : vstate.startNSN = gistGetFakeLSN(rel);
185 : :
186 : : /*
187 : : * The outer loop iterates over all index pages, in physical order (we
188 : : * hope the kernel will cooperate in providing read-ahead for speed). It
189 : : * is critical that we visit all leaf pages, including ones added after we
190 : : * start the scan, else we might fail to delete some deletable tuples.
191 : : * Hence, we must repeatedly check the relation length. We must acquire
192 : : * the relation-extension lock while doing so to avoid a race condition:
193 : : * if someone else is extending the relation, there is a window where
194 : : * bufmgr/smgr have created a new all-zero page but it hasn't yet been
195 : : * write-locked by gistNewBuffer(). If we manage to scan such a page
196 : : * here, we'll improperly assume it can be recycled. Taking the lock
197 : : * synchronizes things enough to prevent a problem: either num_pages won't
198 : : * include the new page, or gistNewBuffer already has write lock on the
199 : : * buffer and it will be fully initialized before we can examine it. (See
200 : : * also vacuumlazy.c, which has the same issue.) Also, we need not worry
201 : : * if a page is added immediately after we look; the page splitting code
202 : : * already has write-lock on the left page before it adds a right page, so
203 : : * we must already have processed any tuples due to be moved into such a
204 : : * page.
205 : : *
206 : : * We can skip locking for new or temp relations, however, since no one
207 : : * else could be accessing them.
208 : : */
1867 heikki.linnakangas@i 209 [ + - + - ]:CBC 44 : needLock = !RELATION_IS_LOCAL(rel);
210 : :
211 : 44 : blkno = GIST_ROOT_BLKNO;
212 : : for (;;)
213 : : {
214 : : /* Get the current relation length */
215 [ + - ]: 88 : if (needLock)
216 : 88 : LockRelationForExtension(rel, ExclusiveLock);
217 : 88 : num_pages = RelationGetNumberOfBlocks(rel);
218 [ + - ]: 88 : if (needLock)
219 : 88 : UnlockRelationForExtension(rel, ExclusiveLock);
220 : :
221 : : /* Quit if we've scanned the whole relation */
222 [ + + ]: 88 : if (blkno >= num_pages)
223 : 44 : break;
224 : : /* Iterate over pages, then loop back to recheck length */
225 [ + + ]: 1545 : for (; blkno < num_pages; blkno++)
226 : 1501 : gistvacuumpage(&vstate, blkno, blkno);
227 : : }
228 : :
229 : : /*
230 : : * If we found any recyclable pages (and recorded them in the FSM), then
231 : : * forcibly update the upper-level FSM pages to ensure that searchers can
232 : : * find them. It's possible that the pages were also found during
233 : : * previous scans and so this is a waste of time, but it's cheap enough
234 : : * relative to scanning the index that it shouldn't matter much, and
235 : : * making sure that free pages are available sooner not later seems
236 : : * worthwhile.
237 : : *
238 : : * Note that if no recyclable pages exist, we don't bother vacuuming the
239 : : * FSM at all.
240 : : */
1553 akapila@postgresql.o 241 [ - + ]: 44 : if (stats->pages_free > 0)
1867 heikki.linnakangas@i 242 :UBC 0 : IndexFreeSpaceMapVacuum(rel);
243 : :
244 : : /* update statistics */
1553 akapila@postgresql.o 245 :CBC 44 : stats->num_pages = num_pages;
246 : :
247 : : /*
248 : : * If we saw any empty pages, try to unlink them from the tree so that
249 : : * they can be reused.
250 : : */
251 : 44 : gistvacuum_delete_empty_pages(info, &vstate);
252 : :
253 : : /* we don't need the internal and empty page sets anymore */
254 : 44 : MemoryContextDelete(vstate.page_set_context);
255 : 44 : vstate.page_set_context = NULL;
256 : 44 : vstate.internal_page_set = NULL;
257 : 44 : vstate.empty_leaf_set = NULL;
6779 bruce@momjian.us 258 : 44 : }
259 : :
260 : : /*
261 : : * gistvacuumpage --- VACUUM one page
262 : : *
263 : : * This processes a single page for gistbulkdelete(). In some cases we
264 : : * must go back and re-examine previously-scanned pages; this routine
265 : : * recurses when necessary to handle that case.
266 : : *
267 : : * blkno is the page to process. orig_blkno is the highest block number
268 : : * reached by the outer gistvacuumscan loop (the same as blkno, unless we
269 : : * are recursing to re-examine a previous page).
270 : : */
271 : : static void
1867 heikki.linnakangas@i 272 : 1501 : gistvacuumpage(GistVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
273 : : {
1850 274 : 1501 : IndexVacuumInfo *info = vstate->info;
1867 275 : 1501 : IndexBulkDeleteCallback callback = vstate->callback;
276 : 1501 : void *callback_state = vstate->callback_state;
6557 tgl@sss.pgh.pa.us 277 : 1501 : Relation rel = info->index;
278 : : Buffer buffer;
279 : : Page page;
280 : : BlockNumber recurse_to;
281 : :
1867 heikki.linnakangas@i 282 : 1501 : restart:
283 : 1501 : recurse_to = InvalidBlockNumber;
284 : :
285 : : /* call vacuum_delay_point while not holding any buffer lock */
286 : 1501 : vacuum_delay_point();
287 : :
288 : 1501 : buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
289 : : info->strategy);
290 : :
291 : : /*
292 : : * We are not going to stay here for a long time, aggressively grab an
293 : : * exclusive lock.
294 : : */
295 : 1501 : LockBuffer(buffer, GIST_EXCLUSIVE);
296 : 1501 : page = (Page) BufferGetPage(buffer);
297 : :
1850 298 [ - + ]: 1501 : if (gistPageRecyclable(page))
299 : : {
300 : : /* Okay to recycle this page */
1867 heikki.linnakangas@i 301 :UBC 0 : RecordFreeIndexPage(rel, blkno);
1553 akapila@postgresql.o 302 : 0 : vstate->stats->pages_deleted++;
1144 pg@bowt.ie 303 : 0 : vstate->stats->pages_free++;
304 : : }
1850 heikki.linnakangas@i 305 [ - + ]:CBC 1501 : else if (GistPageIsDeleted(page))
306 : : {
307 : : /* Already deleted, but can't recycle yet */
1553 akapila@postgresql.o 308 :UBC 0 : vstate->stats->pages_deleted++;
309 : : }
1867 heikki.linnakangas@i 310 [ + + ]:CBC 1501 : else if (GistPageIsLeaf(page))
311 : : {
312 : : OffsetNumber todelete[MaxOffsetNumber];
313 : 1473 : int ntodelete = 0;
314 : : int nremain;
315 : 1473 : GISTPageOpaque opaque = GistPageGetOpaque(page);
316 : 1473 : OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
317 : :
318 : : /*
319 : : * Check whether we need to recurse back to earlier pages. What we
320 : : * are concerned about is a page split that happened since we started
321 : : * the vacuum scan. If the split moved some tuples to a lower page
322 : : * then we might have missed 'em. If so, set up for tail recursion.
323 : : *
324 : : * This is similar to the checks we do during searches, when following
325 : : * a downlink, but we don't need to jump to higher-numbered pages,
326 : : * because we will process them later, anyway.
327 : : */
328 [ + - - + ]: 2946 : if ((GistFollowRight(page) ||
1867 heikki.linnakangas@i 329 :UBC 0 : vstate->startNSN < GistPageGetNSN(page)) &&
330 [ # # ]: 0 : (opaque->rightlink != InvalidBlockNumber) &&
331 [ # # ]: 0 : (opaque->rightlink < orig_blkno))
332 : : {
333 : 0 : recurse_to = opaque->rightlink;
334 : : }
335 : :
336 : : /*
337 : : * Scan over all items to see which ones need to be deleted according
338 : : * to the callback function.
339 : : */
1867 heikki.linnakangas@i 340 [ + + ]:CBC 1473 : if (callback)
341 : : {
342 : : OffsetNumber off;
343 : :
344 : 564 : for (off = FirstOffsetNumber;
345 [ + + ]: 68562 : off <= maxoff;
346 : 67998 : off = OffsetNumberNext(off))
347 : : {
348 : 67998 : ItemId iid = PageGetItemId(page, off);
349 : 67998 : IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
350 : :
6779 bruce@momjian.us 351 [ + + ]: 67998 : if (callback(&(idxtuple->t_tid), callback_state))
1867 heikki.linnakangas@i 352 : 55506 : todelete[ntodelete++] = off;
353 : : }
354 : : }
355 : :
356 : : /*
357 : : * Apply any needed deletes. We issue just one WAL record per page,
358 : : * so as to minimize WAL traffic.
359 : : */
360 [ + + ]: 1473 : if (ntodelete > 0)
361 : : {
362 : 555 : START_CRIT_SECTION();
363 : :
364 : 555 : MarkBufferDirty(buffer);
365 : :
366 : 555 : PageIndexMultiDelete(page, todelete, ntodelete);
367 : 555 : GistMarkTuplesDeleted(page);
368 : :
369 [ + - + + : 555 : if (RelationNeedsWAL(rel))
+ - + - ]
370 : 555 : {
371 : : XLogRecPtr recptr;
372 : :
373 : 555 : recptr = gistXLogUpdate(buffer,
374 : : todelete, ntodelete,
375 : : NULL, 0, InvalidBuffer);
376 : 555 : PageSetLSN(page, recptr);
377 : : }
378 : : else
1867 heikki.linnakangas@i 379 :UBC 0 : PageSetLSN(page, gistGetFakeLSN(rel));
380 : :
1867 heikki.linnakangas@i 381 [ - + ]:CBC 555 : END_CRIT_SECTION();
382 : :
1553 akapila@postgresql.o 383 : 555 : vstate->stats->tuples_removed += ntodelete;
384 : : /* must recompute maxoff */
6866 teodor@sigaev.ru 385 : 555 : maxoff = PageGetMaxOffsetNumber(page);
386 : : }
387 : :
1850 heikki.linnakangas@i 388 : 1473 : nremain = maxoff - FirstOffsetNumber + 1;
389 [ + + ]: 1473 : if (nremain == 0)
390 : : {
391 : : /*
392 : : * The page is now completely empty. Remember its block number,
393 : : * so that we will try to delete the page in the second stage.
394 : : *
395 : : * Skip this when recursing, because IntegerSet requires that the
396 : : * values are added in ascending order. The next VACUUM will pick
397 : : * it up.
398 : : */
399 [ + - ]: 249 : if (blkno == orig_blkno)
1553 akapila@postgresql.o 400 : 249 : intset_add_member(vstate->empty_leaf_set, blkno);
401 : : }
402 : : else
403 : 1224 : vstate->stats->num_index_tuples += nremain;
404 : : }
405 : : else
406 : : {
407 : : /*
408 : : * On an internal page, check for "invalid tuples", left behind by an
409 : : * incomplete page split on PostgreSQL 9.0 or below. These are not
410 : : * created by newer PostgreSQL versions, but unfortunately, there is
411 : : * no version number anywhere in a GiST index, so we don't know
412 : : * whether this index might still contain invalid tuples or not.
413 : : */
1867 heikki.linnakangas@i 414 : 28 : OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
415 : : OffsetNumber off;
416 : :
417 : 28 : for (off = FirstOffsetNumber;
418 [ + + ]: 1485 : off <= maxoff;
419 : 1457 : off = OffsetNumberNext(off))
420 : : {
421 : 1457 : ItemId iid = PageGetItemId(page, off);
422 : 1457 : IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
423 : :
424 [ - + ]: 1457 : if (GistTupleIsInvalid(idxtuple))
1867 heikki.linnakangas@i 425 [ # # ]:UBC 0 : ereport(LOG,
426 : : (errmsg("index \"%s\" contains an inner tuple marked as invalid",
427 : : RelationGetRelationName(rel)),
428 : : errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
429 : : errhint("Please REINDEX it.")));
430 : : }
431 : :
432 : : /*
433 : : * Remember the block number of this page, so that we can revisit it
434 : : * later in gistvacuum_delete_empty_pages(), when we search for
435 : : * parents of empty leaf pages.
436 : : */
1850 heikki.linnakangas@i 437 [ + - ]:CBC 28 : if (blkno == orig_blkno)
1553 akapila@postgresql.o 438 : 28 : intset_add_member(vstate->internal_page_set, blkno);
439 : : }
440 : :
1867 heikki.linnakangas@i 441 : 1501 : UnlockReleaseBuffer(buffer);
442 : :
443 : : /*
444 : : * This is really tail recursion, but if the compiler is too stupid to
445 : : * optimize it as such, we'd eat an uncomfortably large amount of stack
446 : : * space per recursion level (due to the deletable[] array). A failure is
447 : : * improbable since the number of levels isn't likely to be large ... but
448 : : * just in case, let's hand-optimize into a loop.
449 : : */
450 [ - + ]: 1501 : if (recurse_to != InvalidBlockNumber)
451 : : {
1867 heikki.linnakangas@i 452 :UBC 0 : blkno = recurse_to;
453 : 0 : goto restart;
454 : : }
6873 teodor@sigaev.ru 455 :CBC 1501 : }
456 : :
457 : : /*
458 : : * Scan all internal pages, and try to delete their empty child pages.
459 : : */
460 : : static void
1553 akapila@postgresql.o 461 : 44 : gistvacuum_delete_empty_pages(IndexVacuumInfo *info, GistVacState *vstate)
462 : : {
1850 heikki.linnakangas@i 463 : 44 : Relation rel = info->index;
464 : : BlockNumber empty_pages_remaining;
465 : : uint64 blkno;
466 : :
467 : : /*
468 : : * Rescan all inner pages to find those that have empty child pages.
469 : : */
1553 akapila@postgresql.o 470 : 44 : empty_pages_remaining = intset_num_entries(vstate->empty_leaf_set);
471 : 44 : intset_begin_iterate(vstate->internal_page_set);
1850 heikki.linnakangas@i 472 [ + + + + ]: 56 : while (empty_pages_remaining > 0 &&
1553 akapila@postgresql.o 473 : 9 : intset_iterate_next(vstate->internal_page_set, &blkno))
474 : : {
475 : : Buffer buffer;
476 : : Page page;
477 : : OffsetNumber off,
478 : : maxoff;
479 : : OffsetNumber todelete[MaxOffsetNumber];
480 : : BlockNumber leafs_to_delete[MaxOffsetNumber];
481 : : int ntodelete;
482 : : int deleted;
483 : :
1850 heikki.linnakangas@i 484 : 3 : buffer = ReadBufferExtended(rel, MAIN_FORKNUM, (BlockNumber) blkno,
485 : : RBM_NORMAL, info->strategy);
486 : :
487 : 3 : LockBuffer(buffer, GIST_SHARE);
488 : 3 : page = (Page) BufferGetPage(buffer);
489 : :
490 [ + - + - : 3 : if (PageIsNew(page) || GistPageIsDeleted(page) || GistPageIsLeaf(page))
- + ]
491 : : {
492 : : /*
493 : : * This page was an internal page earlier, but now it's something
494 : : * else. Shouldn't happen...
495 : : */
1850 heikki.linnakangas@i 496 :UBC 0 : Assert(false);
497 : : UnlockReleaseBuffer(buffer);
498 : : continue;
499 : : }
500 : :
501 : : /*
502 : : * Scan all the downlinks, and see if any of them point to empty leaf
503 : : * pages.
504 : : */
1850 heikki.linnakangas@i 505 :CBC 3 : maxoff = PageGetMaxOffsetNumber(page);
506 : 3 : ntodelete = 0;
507 : 3 : for (off = FirstOffsetNumber;
508 [ + + + - ]: 492 : off <= maxoff && ntodelete < maxoff - 1;
509 : 489 : off = OffsetNumberNext(off))
510 : : {
511 : 489 : ItemId iid = PageGetItemId(page, off);
512 : 489 : IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
513 : : BlockNumber leafblk;
514 : :
515 : 489 : leafblk = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
1553 akapila@postgresql.o 516 [ + + ]: 489 : if (intset_is_member(vstate->empty_leaf_set, leafblk))
517 : : {
1850 heikki.linnakangas@i 518 : 243 : leafs_to_delete[ntodelete] = leafblk;
519 : 243 : todelete[ntodelete++] = off;
520 : : }
521 : : }
522 : :
523 : : /*
524 : : * In order to avoid deadlock, child page must be locked before
525 : : * parent, so we must release the lock on the parent, lock the child,
526 : : * and then re-acquire the lock the parent. (And we wouldn't want to
527 : : * do I/O, while holding a lock, anyway.)
528 : : *
529 : : * At the instant that we're not holding a lock on the parent, the
530 : : * downlink might get moved by a concurrent insert, so we must
531 : : * re-check that it still points to the same child page after we have
532 : : * acquired both locks. Also, another backend might have inserted a
533 : : * tuple to the page, so that it is no longer empty. gistdeletepage()
534 : : * re-checks all these conditions.
535 : : */
536 : 3 : LockBuffer(buffer, GIST_UNLOCK);
537 : :
538 : 3 : deleted = 0;
539 [ + + ]: 246 : for (int i = 0; i < ntodelete; i++)
540 : : {
541 : : Buffer leafbuf;
542 : :
543 : : /*
544 : : * Don't remove the last downlink from the parent. That would
545 : : * confuse the insertion code.
546 : : */
547 [ - + ]: 243 : if (PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
1850 heikki.linnakangas@i 548 :UBC 0 : break;
549 : :
1850 heikki.linnakangas@i 550 :CBC 243 : leafbuf = ReadBufferExtended(rel, MAIN_FORKNUM, leafs_to_delete[i],
551 : : RBM_NORMAL, info->strategy);
552 : 243 : LockBuffer(leafbuf, GIST_EXCLUSIVE);
553 : 243 : gistcheckpage(rel, leafbuf);
554 : :
555 : 243 : LockBuffer(buffer, GIST_EXCLUSIVE);
1553 akapila@postgresql.o 556 [ + - ]: 243 : if (gistdeletepage(info, vstate->stats,
1850 heikki.linnakangas@i 557 : 243 : buffer, todelete[i] - deleted,
558 : : leafbuf))
559 : 243 : deleted++;
560 : 243 : LockBuffer(buffer, GIST_UNLOCK);
561 : :
562 : 243 : UnlockReleaseBuffer(leafbuf);
563 : : }
564 : :
565 : 3 : ReleaseBuffer(buffer);
566 : :
567 : : /*
568 : : * We can stop the scan as soon as we have seen the downlinks, even if
569 : : * we were not able to remove them all.
570 : : */
571 : 3 : empty_pages_remaining -= ntodelete;
572 : : }
573 : 44 : }
574 : :
575 : : /*
576 : : * gistdeletepage takes a leaf page, and its parent, and tries to delete the
577 : : * leaf. Both pages must be locked.
578 : : *
579 : : * Even if the page was empty when we first saw it, a concurrent inserter might
580 : : * have added a tuple to it since. Similarly, the downlink might have moved.
581 : : * We re-check all the conditions, to make sure the page is still deletable,
582 : : * before modifying anything.
583 : : *
584 : : * Returns true, if the page was deleted, and false if a concurrent update
585 : : * prevented it.
586 : : */
587 : : static bool
1553 akapila@postgresql.o 588 : 243 : gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
589 : : Buffer parentBuffer, OffsetNumber downlink,
590 : : Buffer leafBuffer)
591 : : {
1850 heikki.linnakangas@i 592 : 243 : Page parentPage = BufferGetPage(parentBuffer);
593 : 243 : Page leafPage = BufferGetPage(leafBuffer);
594 : : ItemId iid;
595 : : IndexTuple idxtuple;
596 : : XLogRecPtr recptr;
597 : : FullTransactionId txid;
598 : :
599 : : /*
600 : : * Check that the leaf is still empty and deletable.
601 : : */
602 [ - + ]: 243 : if (!GistPageIsLeaf(leafPage))
603 : : {
604 : : /* a leaf page should never become a non-leaf page */
1850 heikki.linnakangas@i 605 :UBC 0 : Assert(false);
606 : : return false;
607 : : }
608 : :
1850 heikki.linnakangas@i 609 [ - + ]:CBC 243 : if (GistFollowRight(leafPage))
1850 heikki.linnakangas@i 610 :UBC 0 : return false; /* don't mess with a concurrent page split */
611 : :
1850 heikki.linnakangas@i 612 [ - + ]:CBC 243 : if (PageGetMaxOffsetNumber(leafPage) != InvalidOffsetNumber)
1850 heikki.linnakangas@i 613 :UBC 0 : return false; /* not empty anymore */
614 : :
615 : : /*
616 : : * Ok, the leaf is deletable. Is the downlink in the parent page still
617 : : * valid? It might have been moved by a concurrent insert. We could try
618 : : * to re-find it by scanning the page again, possibly moving right if the
619 : : * was split. But for now, let's keep it simple and just give up. The
620 : : * next VACUUM will pick it up.
621 : : */
1850 heikki.linnakangas@i 622 [ + - + - ]:CBC 243 : if (PageIsNew(parentPage) || GistPageIsDeleted(parentPage) ||
623 [ - + ]: 243 : GistPageIsLeaf(parentPage))
624 : : {
625 : : /* shouldn't happen, internal pages are never deleted */
1850 heikki.linnakangas@i 626 :UBC 0 : Assert(false);
627 : : return false;
628 : : }
629 : :
1850 heikki.linnakangas@i 630 [ + - ]:CBC 243 : if (PageGetMaxOffsetNumber(parentPage) < downlink
631 [ - + ]: 243 : || PageGetMaxOffsetNumber(parentPage) <= FirstOffsetNumber)
1850 heikki.linnakangas@i 632 :UBC 0 : return false;
633 : :
1850 heikki.linnakangas@i 634 :CBC 243 : iid = PageGetItemId(parentPage, downlink);
635 : 243 : idxtuple = (IndexTuple) PageGetItem(parentPage, iid);
636 [ - + ]: 486 : if (BufferGetBlockNumber(leafBuffer) !=
637 : 243 : ItemPointerGetBlockNumber(&(idxtuple->t_tid)))
1850 heikki.linnakangas@i 638 :UBC 0 : return false;
639 : :
640 : : /*
641 : : * All good, proceed with the deletion.
642 : : *
643 : : * The page cannot be immediately recycled, because in-progress scans that
644 : : * saw the downlink might still visit it. Mark the page with the current
645 : : * next-XID counter, so that we know when it can be recycled. Once that
646 : : * XID becomes older than GlobalXmin, we know that all scans that are
647 : : * currently in progress must have ended. (That's much more conservative
648 : : * than needed, but let's keep it safe and simple.)
649 : : */
1726 heikki.linnakangas@i 650 :CBC 243 : txid = ReadNextFullTransactionId();
651 : :
1850 652 : 243 : START_CRIT_SECTION();
653 : :
654 : : /* mark the page as deleted */
655 : 243 : MarkBufferDirty(leafBuffer);
1726 656 : 243 : GistPageSetDeleted(leafPage, txid);
1144 pg@bowt.ie 657 : 243 : stats->pages_newly_deleted++;
1553 akapila@postgresql.o 658 : 243 : stats->pages_deleted++;
659 : :
660 : : /* remove the downlink from the parent */
1850 heikki.linnakangas@i 661 : 243 : MarkBufferDirty(parentBuffer);
662 : 243 : PageIndexTupleDelete(parentPage, downlink);
663 : :
664 [ + - + + : 243 : if (RelationNeedsWAL(info->index))
+ - + - ]
665 : 243 : recptr = gistXLogPageDelete(leafBuffer, txid, parentBuffer, downlink);
666 : : else
1850 heikki.linnakangas@i 667 :UBC 0 : recptr = gistGetFakeLSN(info->index);
1850 heikki.linnakangas@i 668 :CBC 243 : PageSetLSN(parentPage, recptr);
669 : 243 : PageSetLSN(leafPage, recptr);
670 : :
671 [ - + ]: 243 : END_CRIT_SECTION();
672 : :
673 : 243 : return true;
674 : : }
|