Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * spgvacuum.c
4 : * vacuum for SP-GiST
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/spgist/spgvacuum.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "postgres.h"
17 :
18 : #include "access/genam.h"
19 : #include "access/spgist_private.h"
20 : #include "access/spgxlog.h"
21 : #include "access/transam.h"
22 : #include "access/xloginsert.h"
23 : #include "catalog/storage_xlog.h"
24 : #include "commands/vacuum.h"
25 : #include "miscadmin.h"
26 : #include "storage/bufmgr.h"
27 : #include "storage/indexfsm.h"
28 : #include "storage/lmgr.h"
29 : #include "utils/snapmgr.h"
30 :
31 :
32 : /* Entry in pending-list of TIDs we need to revisit */
33 : typedef struct spgVacPendingItem
34 : {
35 : ItemPointerData tid; /* redirection target to visit */
36 : bool done; /* have we dealt with this? */
37 : struct spgVacPendingItem *next; /* list link */
38 : } spgVacPendingItem;
39 :
40 : /* Local state for vacuum operations */
41 : typedef struct spgBulkDeleteState
42 : {
43 : /* Parameters passed in to spgvacuumscan */
44 : IndexVacuumInfo *info;
45 : IndexBulkDeleteResult *stats;
46 : IndexBulkDeleteCallback callback;
47 : void *callback_state;
48 :
49 : /* Additional working state */
50 : SpGistState spgstate; /* for SPGiST operations that need one */
51 : spgVacPendingItem *pendingList; /* TIDs we need to (re)visit */
52 : TransactionId myXmin; /* for detecting newly-added redirects */
53 : BlockNumber lastFilledBlock; /* last non-deletable block */
54 : } spgBulkDeleteState;
55 :
56 :
57 : /*
58 : * Add TID to pendingList, but only if not already present.
59 : *
60 : * Note that new items are always appended at the end of the list; this
61 : * ensures that scans of the list don't miss items added during the scan.
62 : */
63 : static void
4045 tgl 64 CBC 53975 : spgAddPendingTID(spgBulkDeleteState *bds, ItemPointer tid)
65 : {
66 : spgVacPendingItem *pitem;
67 : spgVacPendingItem **listLink;
68 :
69 : /* search the list for pre-existing entry */
70 53975 : listLink = &bds->pendingList;
71 4738496 : while (*listLink != NULL)
72 : {
73 4702157 : pitem = *listLink;
74 4702157 : if (ItemPointerEquals(tid, &pitem->tid))
75 17636 : return; /* already in list, do nothing */
76 4684521 : listLink = &pitem->next;
77 : }
78 : /* not there, so append new entry */
79 36339 : pitem = (spgVacPendingItem *) palloc(sizeof(spgVacPendingItem));
80 36339 : pitem->tid = *tid;
81 36339 : pitem->done = false;
82 36339 : pitem->next = NULL;
83 36339 : *listLink = pitem;
84 : }
85 :
86 : /*
87 : * Clear pendingList
88 : */
89 : static void
90 291 : spgClearPendingList(spgBulkDeleteState *bds)
91 : {
92 : spgVacPendingItem *pitem;
93 : spgVacPendingItem *nitem;
94 :
95 36630 : for (pitem = bds->pendingList; pitem != NULL; pitem = nitem)
96 : {
97 36339 : nitem = pitem->next;
98 : /* All items in list should have been dealt with */
99 36339 : Assert(pitem->done);
100 36339 : pfree(pitem);
101 : }
102 291 : bds->pendingList = NULL;
103 291 : }
104 :
105 : /*
106 : * Vacuum a regular (non-root) leaf page
107 : *
108 : * We must delete tuples that are targeted for deletion by the VACUUM,
109 : * but not move any tuples that are referenced by outside links; we assume
110 : * those are the ones that are heads of chains.
111 : *
112 : * If we find a REDIRECT that was made by a concurrently-running transaction,
113 : * we must add its target TID to pendingList. (We don't try to visit the
114 : * target immediately, first because we don't want VACUUM locking more than
115 : * one buffer at a time, and second because the duplicate-filtering logic
116 : * in spgAddPendingTID is useful to ensure we can't get caught in an infinite
117 : * loop in the face of continuous concurrent insertions.)
118 : *
119 : * If forPending is true, we are examining the page as a consequence of
120 : * chasing a redirect link, not as part of the normal sequential scan.
121 : * We still vacuum the page normally, but we don't increment the stats
122 : * about live tuples; else we'd double-count those tuples, since the page
123 : * has been or will be visited in the sequential scan as well.
124 : */
125 : static void
126 19909 : vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
127 : bool forPending)
128 : {
2545 kgrittn 129 19909 : Page page = BufferGetPage(buffer);
130 : spgxlogVacuumLeaf xlrec;
131 : OffsetNumber toDead[MaxIndexTuplesPerPage];
132 : OffsetNumber toPlaceholder[MaxIndexTuplesPerPage];
133 : OffsetNumber moveSrc[MaxIndexTuplesPerPage];
134 : OffsetNumber moveDest[MaxIndexTuplesPerPage];
135 : OffsetNumber chainSrc[MaxIndexTuplesPerPage];
136 : OffsetNumber chainDest[MaxIndexTuplesPerPage];
137 : OffsetNumber predecessor[MaxIndexTuplesPerPage + 1];
138 : bool deletable[MaxIndexTuplesPerPage + 1];
139 : int nDeletable;
140 : OffsetNumber i,
4131 tgl 141 19909 : max = PageGetMaxOffsetNumber(page);
142 :
143 19909 : memset(predecessor, 0, sizeof(predecessor));
144 19909 : memset(deletable, 0, sizeof(deletable));
145 19909 : nDeletable = 0;
146 :
147 : /* Scan page, identify tuples to delete, accumulate stats */
148 3163052 : for (i = FirstOffsetNumber; i <= max; i++)
149 : {
150 : SpGistLeafTuple lt;
151 :
152 3143143 : lt = (SpGistLeafTuple) PageGetItem(page,
153 : PageGetItemId(page, i));
154 3143143 : if (lt->tupstate == SPGIST_LIVE)
155 : {
156 3007702 : Assert(ItemPointerIsValid(<->heapPtr));
157 :
158 3007702 : if (bds->callback(<->heapPtr, bds->callback_state))
159 : {
160 5833 : bds->stats->tuples_removed += 1;
161 5833 : deletable[i] = true;
162 5833 : nDeletable++;
163 : }
164 : else
165 : {
4045 166 3001869 : if (!forPending)
167 312921 : bds->stats->num_index_tuples += 1;
168 : }
169 :
170 : /* Form predecessor map, too */
734 171 3007702 : if (SGLT_GET_NEXTOFFSET(lt) != InvalidOffsetNumber)
172 : {
173 : /* paranoia about corrupted chain links */
174 2980419 : if (SGLT_GET_NEXTOFFSET(lt) < FirstOffsetNumber ||
175 2980419 : SGLT_GET_NEXTOFFSET(lt) > max ||
176 2980419 : predecessor[SGLT_GET_NEXTOFFSET(lt)] != InvalidOffsetNumber)
4131 tgl 177 UBC 0 : elog(ERROR, "inconsistent tuple chain links in page %u of index \"%s\"",
178 : BufferGetBlockNumber(buffer),
179 : RelationGetRelationName(index));
734 tgl 180 CBC 2980419 : predecessor[SGLT_GET_NEXTOFFSET(lt)] = i;
181 : }
182 : }
4045 183 135441 : else if (lt->tupstate == SPGIST_REDIRECT)
184 : {
185 18272 : SpGistDeadTuple dt = (SpGistDeadTuple) lt;
186 :
734 187 18272 : Assert(SGLT_GET_NEXTOFFSET(dt) == InvalidOffsetNumber);
4045 188 18272 : Assert(ItemPointerIsValid(&dt->pointer));
189 :
190 : /*
191 : * Add target TID to pending list if the redirection could have
192 : * happened since VACUUM started.
193 : *
194 : * Note: we could make a tighter test by seeing if the xid is
195 : * "running" according to the active snapshot; but snapmgr.c
196 : * doesn't currently export a suitable API, and it's not entirely
197 : * clear that a tighter test is worth the cycles anyway.
198 : */
199 18272 : if (TransactionIdFollowsOrEquals(dt->xid, bds->myXmin))
200 18063 : spgAddPendingTID(bds, &dt->pointer);
201 : }
202 : else
203 : {
734 204 117169 : Assert(SGLT_GET_NEXTOFFSET(lt) == InvalidOffsetNumber);
205 : }
206 : }
207 :
4131 208 19909 : if (nDeletable == 0)
209 19837 : return; /* nothing more to do */
210 :
211 : /*----------
212 : * Figure out exactly what we have to do. We do this separately from
213 : * actually modifying the page, mainly so that we have a representation
214 : * that can be dumped into WAL and then the replay code can do exactly
215 : * the same thing. The output of this step consists of six arrays
216 : * describing four kinds of operations, to be performed in this order:
217 : *
218 : * toDead[]: tuple numbers to be replaced with DEAD tuples
219 : * toPlaceholder[]: tuple numbers to be replaced with PLACEHOLDER tuples
220 : * moveSrc[]: tuple numbers that need to be relocated to another offset
221 : * (replacing the tuple there) and then replaced with PLACEHOLDER tuples
222 : * moveDest[]: new locations for moveSrc tuples
223 : * chainSrc[]: tuple numbers whose chain links (nextOffset) need updates
224 : * chainDest[]: new values of nextOffset for chainSrc members
225 : *
226 : * It's easiest to figure out what we have to do by processing tuple
227 : * chains, so we iterate over all the tuples (not just the deletable
228 : * ones!) to identify chain heads, then chase down each chain and make
229 : * work item entries for deletable tuples within the chain.
230 : *----------
231 : */
232 72 : xlrec.nDead = xlrec.nPlaceholder = xlrec.nMove = xlrec.nChain = 0;
233 :
234 12210 : for (i = FirstOffsetNumber; i <= max; i++)
235 : {
236 : SpGistLeafTuple head;
237 : bool interveningDeletable;
238 : OffsetNumber prevLive;
239 : OffsetNumber j;
240 :
241 12138 : head = (SpGistLeafTuple) PageGetItem(page,
242 : PageGetItemId(page, i));
243 12138 : if (head->tupstate != SPGIST_LIVE)
244 1305 : continue; /* can't be a chain member */
245 10833 : if (predecessor[i] != 0)
246 10746 : continue; /* not a chain head */
247 :
248 : /* initialize ... */
249 87 : interveningDeletable = false;
250 87 : prevLive = deletable[i] ? InvalidOffsetNumber : i;
251 :
252 : /* scan down the chain ... */
734 253 87 : j = SGLT_GET_NEXTOFFSET(head);
4131 254 10833 : while (j != InvalidOffsetNumber)
255 : {
256 : SpGistLeafTuple lt;
257 :
258 10746 : lt = (SpGistLeafTuple) PageGetItem(page,
259 : PageGetItemId(page, j));
260 10746 : if (lt->tupstate != SPGIST_LIVE)
261 : {
262 : /* all tuples in chain should be live */
4131 tgl 263 UBC 0 : elog(ERROR, "unexpected SPGiST tuple state: %d",
264 : lt->tupstate);
265 : }
266 :
4131 tgl 267 CBC 10746 : if (deletable[j])
268 : {
269 : /* This tuple should be replaced by a placeholder */
270 5764 : toPlaceholder[xlrec.nPlaceholder] = j;
271 5764 : xlrec.nPlaceholder++;
272 : /* previous live tuple's chain link will need an update */
273 5764 : interveningDeletable = true;
274 : }
275 4982 : else if (prevLive == InvalidOffsetNumber)
276 : {
277 : /*
278 : * This is the first live tuple in the chain. It has to move
279 : * to the head position.
280 : */
281 69 : moveSrc[xlrec.nMove] = j;
282 69 : moveDest[xlrec.nMove] = i;
283 69 : xlrec.nMove++;
284 : /* Chain updates will be applied after the move */
285 69 : prevLive = i;
286 69 : interveningDeletable = false;
287 : }
288 : else
289 : {
290 : /*
291 : * Second or later live tuple. Arrange to re-chain it to the
292 : * previous live one, if there was a gap.
293 : */
294 4913 : if (interveningDeletable)
295 : {
296 12 : chainSrc[xlrec.nChain] = prevLive;
297 12 : chainDest[xlrec.nChain] = j;
298 12 : xlrec.nChain++;
299 : }
300 4913 : prevLive = j;
301 4913 : interveningDeletable = false;
302 : }
303 :
734 304 10746 : j = SGLT_GET_NEXTOFFSET(lt);
305 : }
306 :
4131 307 87 : if (prevLive == InvalidOffsetNumber)
308 : {
309 : /* The chain is entirely removable, so we need a DEAD tuple */
4131 tgl 310 UBC 0 : toDead[xlrec.nDead] = i;
311 0 : xlrec.nDead++;
312 : }
4131 tgl 313 CBC 87 : else if (interveningDeletable)
314 : {
315 : /* One or more deletions at end of chain, so close it off */
316 77 : chainSrc[xlrec.nChain] = prevLive;
317 77 : chainDest[xlrec.nChain] = InvalidOffsetNumber;
318 77 : xlrec.nChain++;
319 : }
320 : }
321 :
322 : /* sanity check ... */
323 72 : if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove)
4131 tgl 324 UBC 0 : elog(ERROR, "inconsistent counts of deletable tuples");
325 :
326 : /* Do the updates */
4131 tgl 327 CBC 72 : START_CRIT_SECTION();
328 :
329 72 : spgPageIndexMultiDelete(&bds->spgstate, page,
330 72 : toDead, xlrec.nDead,
331 : SPGIST_DEAD, SPGIST_DEAD,
332 : InvalidBlockNumber, InvalidOffsetNumber);
333 :
334 72 : spgPageIndexMultiDelete(&bds->spgstate, page,
335 72 : toPlaceholder, xlrec.nPlaceholder,
336 : SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
337 : InvalidBlockNumber, InvalidOffsetNumber);
338 :
339 : /*
340 : * We implement the move step by swapping the line pointers of the source
341 : * and target tuples, then replacing the newly-source tuples with
342 : * placeholders. This is perhaps unduly friendly with the page data
343 : * representation, but it's fast and doesn't risk page overflow when a
344 : * tuple to be relocated is large.
345 : */
346 141 : for (i = 0; i < xlrec.nMove; i++)
347 : {
348 69 : ItemId idSrc = PageGetItemId(page, moveSrc[i]);
349 69 : ItemId idDest = PageGetItemId(page, moveDest[i]);
350 : ItemIdData tmp;
351 :
352 69 : tmp = *idSrc;
353 69 : *idSrc = *idDest;
354 69 : *idDest = tmp;
355 : }
356 :
357 72 : spgPageIndexMultiDelete(&bds->spgstate, page,
358 72 : moveSrc, xlrec.nMove,
359 : SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
360 : InvalidBlockNumber, InvalidOffsetNumber);
361 :
362 161 : for (i = 0; i < xlrec.nChain; i++)
363 : {
364 : SpGistLeafTuple lt;
365 :
366 89 : lt = (SpGistLeafTuple) PageGetItem(page,
367 89 : PageGetItemId(page, chainSrc[i]));
368 89 : Assert(lt->tupstate == SPGIST_LIVE);
734 369 89 : SGLT_SET_NEXTOFFSET(lt, chainDest[i]);
370 : }
371 :
4131 372 72 : MarkBufferDirty(buffer);
373 :
374 72 : if (RelationNeedsWAL(index))
375 : {
376 : XLogRecPtr recptr;
377 :
3062 heikki.linnakangas 378 72 : XLogBeginInsert();
379 :
380 72 : STORE_STATE(&bds->spgstate, xlrec.stateSrc);
381 :
382 72 : XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumLeaf);
383 : /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
384 72 : XLogRegisterData((char *) toDead, sizeof(OffsetNumber) * xlrec.nDead);
385 72 : XLogRegisterData((char *) toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder);
386 72 : XLogRegisterData((char *) moveSrc, sizeof(OffsetNumber) * xlrec.nMove);
387 72 : XLogRegisterData((char *) moveDest, sizeof(OffsetNumber) * xlrec.nMove);
388 72 : XLogRegisterData((char *) chainSrc, sizeof(OffsetNumber) * xlrec.nChain);
389 72 : XLogRegisterData((char *) chainDest, sizeof(OffsetNumber) * xlrec.nChain);
390 :
391 72 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
392 :
393 72 : recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF);
394 :
4131 tgl 395 72 : PageSetLSN(page, recptr);
396 : }
397 :
398 72 : END_CRIT_SECTION();
399 : }
400 :
401 : /*
402 : * Vacuum a root page when it is also a leaf
403 : *
404 : * On the root, we just delete any dead leaf tuples; no fancy business
405 : */
406 : static void
407 15 : vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
408 : {
2545 kgrittn 409 15 : Page page = BufferGetPage(buffer);
410 : spgxlogVacuumRoot xlrec;
411 : OffsetNumber toDelete[MaxIndexTuplesPerPage];
412 : OffsetNumber i,
4131 tgl 413 15 : max = PageGetMaxOffsetNumber(page);
414 :
415 15 : xlrec.nDelete = 0;
416 :
417 : /* Scan page, identify tuples to delete, accumulate stats */
418 78 : for (i = FirstOffsetNumber; i <= max; i++)
419 : {
420 : SpGistLeafTuple lt;
421 :
422 63 : lt = (SpGistLeafTuple) PageGetItem(page,
423 : PageGetItemId(page, i));
424 63 : if (lt->tupstate == SPGIST_LIVE)
425 : {
426 63 : Assert(ItemPointerIsValid(<->heapPtr));
427 :
428 63 : if (bds->callback(<->heapPtr, bds->callback_state))
429 : {
4131 tgl 430 UBC 0 : bds->stats->tuples_removed += 1;
431 0 : toDelete[xlrec.nDelete] = i;
432 0 : xlrec.nDelete++;
433 : }
434 : else
435 : {
4131 tgl 436 CBC 63 : bds->stats->num_index_tuples += 1;
437 : }
438 : }
439 : else
440 : {
441 : /* all tuples on root should be live */
4131 tgl 442 UBC 0 : elog(ERROR, "unexpected SPGiST tuple state: %d",
443 : lt->tupstate);
444 : }
445 : }
446 :
4131 tgl 447 CBC 15 : if (xlrec.nDelete == 0)
448 15 : return; /* nothing more to do */
449 :
450 : /* Do the update */
4131 tgl 451 UBC 0 : START_CRIT_SECTION();
452 :
453 : /* The tuple numbers are in order, so we can use PageIndexMultiDelete */
454 0 : PageIndexMultiDelete(page, toDelete, xlrec.nDelete);
455 :
456 0 : MarkBufferDirty(buffer);
457 :
458 0 : if (RelationNeedsWAL(index))
459 : {
460 : XLogRecPtr recptr;
461 :
3062 heikki.linnakangas 462 0 : XLogBeginInsert();
463 :
464 : /* Prepare WAL record */
465 0 : STORE_STATE(&bds->spgstate, xlrec.stateSrc);
466 :
467 0 : XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRoot);
468 : /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
469 0 : XLogRegisterData((char *) toDelete,
470 0 : sizeof(OffsetNumber) * xlrec.nDelete);
471 :
472 0 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
473 :
474 0 : recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT);
475 :
4131 tgl 476 0 : PageSetLSN(page, recptr);
477 : }
478 :
479 0 : END_CRIT_SECTION();
480 : }
481 :
482 : /*
483 : * Clean up redirect and placeholder tuples on the given page
484 : *
485 : * Redirect tuples can be marked placeholder once they're old enough.
486 : * Placeholder tuples can be removed if it won't change the offsets of
487 : * non-placeholder ones.
488 : *
489 : * Unlike the routines above, this works on both leaf and inner pages.
490 : */
491 : static void
8 andres 492 GNC 19996 : vacuumRedirectAndPlaceholder(Relation index, Relation heaprel, Buffer buffer)
493 : {
2545 kgrittn 494 CBC 19996 : Page page = BufferGetPage(buffer);
4131 tgl 495 19996 : SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
496 : OffsetNumber i,
497 19996 : max = PageGetMaxOffsetNumber(page),
498 19996 : firstPlaceholder = InvalidOffsetNumber;
499 19996 : bool hasNonPlaceholder = false;
500 19996 : bool hasUpdate = false;
501 : OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage];
502 : OffsetNumber itemnos[MaxIndexTuplesPerPage];
503 : spgxlogVacuumRedirect xlrec;
504 : GlobalVisState *vistest;
505 :
7 andres 506 GNC 19996 : xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(heaprel);
4131 tgl 507 CBC 19996 : xlrec.nToPlaceholder = 0;
143 pg 508 GNC 19996 : xlrec.snapshotConflictHorizon = InvalidTransactionId;
4131 tgl 509 ECB :
6 pg 510 GNC 19996 : vistest = GlobalVisTestFor(heaprel);
511 :
4131 tgl 512 CBC 19996 : START_CRIT_SECTION();
513 :
514 : /*
515 : * Scan backwards to convert old redirection tuples to placeholder tuples,
516 : * and identify location of last non-placeholder tuple while at it.
517 : */
518 19996 : for (i = max;
519 2805818 : i >= FirstOffsetNumber &&
3955 bruce 520 2788222 : (opaque->nRedirection > 0 || !hasNonPlaceholder);
4131 tgl 521 2785822 : i--)
522 : {
523 : SpGistDeadTuple dt;
524 :
525 2785822 : dt = (SpGistDeadTuple) PageGetItem(page, PageGetItemId(page, i));
526 :
527 2804096 : if (dt->tupstate == SPGIST_REDIRECT &&
970 andres 528 18274 : GlobalVisTestIsRemovableXid(vistest, dt->xid))
529 : {
4131 tgl 530 76 : dt->tupstate = SPGIST_PLACEHOLDER;
531 76 : Assert(opaque->nRedirection > 0);
532 76 : opaque->nRedirection--;
533 76 : opaque->nPlaceholder++;
534 :
535 : /* remember newest XID among the removed redirects */
143 pg 536 GNC 118 : if (!TransactionIdIsValid(xlrec.snapshotConflictHorizon) ||
537 42 : TransactionIdPrecedes(xlrec.snapshotConflictHorizon, dt->xid))
538 34 : xlrec.snapshotConflictHorizon = dt->xid;
539 :
4131 tgl 540 CBC 76 : ItemPointerSetInvalid(&dt->pointer);
541 :
542 76 : itemToPlaceholder[xlrec.nToPlaceholder] = i;
543 76 : xlrec.nToPlaceholder++;
544 :
545 76 : hasUpdate = true;
546 : }
547 :
548 2785822 : if (dt->tupstate == SPGIST_PLACEHOLDER)
549 : {
550 92612 : if (!hasNonPlaceholder)
551 89683 : firstPlaceholder = i;
552 : }
553 : else
554 : {
555 2693210 : hasNonPlaceholder = true;
556 : }
557 : }
558 :
559 : /*
560 : * Any placeholder tuples at the end of page can safely be removed. We
561 : * can't remove ones before the last non-placeholder, though, because we
562 : * can't alter the offset numbers of non-placeholder tuples.
563 : */
564 19996 : if (firstPlaceholder != InvalidOffsetNumber)
565 : {
566 : /*
567 : * We do not store this array to rdata because it's easy to recreate.
568 : */
569 91124 : for (i = firstPlaceholder; i <= max; i++)
570 89683 : itemnos[i - firstPlaceholder] = i;
571 :
572 1441 : i = max - firstPlaceholder + 1;
573 1441 : Assert(opaque->nPlaceholder >= i);
574 1441 : opaque->nPlaceholder -= i;
575 :
576 : /* The array is surely sorted, so can use PageIndexMultiDelete */
577 1441 : PageIndexMultiDelete(page, itemnos, i);
578 :
579 1441 : hasUpdate = true;
580 : }
581 :
582 19996 : xlrec.firstPlaceholder = firstPlaceholder;
583 :
584 19996 : if (hasUpdate)
585 1441 : MarkBufferDirty(buffer);
586 :
587 19996 : if (hasUpdate && RelationNeedsWAL(index))
588 : {
589 : XLogRecPtr recptr;
590 :
3062 heikki.linnakangas 591 1441 : XLogBeginInsert();
592 :
593 1441 : XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRedirect);
594 1441 : XLogRegisterData((char *) itemToPlaceholder,
595 1441 : sizeof(OffsetNumber) * xlrec.nToPlaceholder);
596 :
597 1441 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
598 :
599 1441 : recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT);
600 :
4131 tgl 601 1441 : PageSetLSN(page, recptr);
602 : }
603 :
604 19996 : END_CRIT_SECTION();
605 19996 : }
606 :
607 : /*
608 : * Process one page during a bulkdelete scan
609 : */
610 : static void
611 2484 : spgvacuumpage(spgBulkDeleteState *bds, BlockNumber blkno)
612 : {
613 2484 : Relation index = bds->info->index;
614 : Buffer buffer;
615 : Page page;
616 :
617 : /* call vacuum_delay_point while not holding any buffer lock */
618 2484 : vacuum_delay_point();
619 :
620 2484 : buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
621 2484 : RBM_NORMAL, bds->info->strategy);
622 2484 : LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
2545 kgrittn 623 2484 : page = (Page) BufferGetPage(buffer);
624 :
4131 tgl 625 2484 : if (PageIsNew(page))
626 : {
627 : /*
628 : * We found an all-zero page, which could happen if the database
629 : * crashed just after extending the file. Recycle it.
630 : */
631 : }
2813 heikki.linnakangas 632 2478 : else if (PageIsEmpty(page))
633 : {
634 : /* nothing to do */
635 : }
4131 tgl 636 2426 : else if (SpGistPageIsLeaf(page))
637 : {
4046 638 2339 : if (SpGistBlockIsRoot(blkno))
639 : {
4131 640 15 : vacuumLeafRoot(bds, index, buffer);
641 : /* no need for vacuumRedirectAndPlaceholder */
642 : }
643 : else
644 : {
4045 645 2324 : vacuumLeafPage(bds, index, buffer, false);
8 andres 646 GNC 2324 : vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
647 : }
648 : }
649 : else
650 : {
651 : /* inner page */
652 87 : vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
653 : }
654 :
655 : /*
656 : * The root pages must never be deleted, nor marked as available in FSM,
657 : * because we don't want them ever returned by a search for a place to put
658 : * a new tuple. Otherwise, check for empty page, and make sure the FSM
659 : * knows about it.
660 : */
4046 tgl 661 CBC 2484 : if (!SpGistBlockIsRoot(blkno))
662 : {
2813 heikki.linnakangas 663 2404 : if (PageIsNew(page) || PageIsEmpty(page))
664 : {
4131 tgl 665 38 : RecordFreeIndexPage(index, blkno);
666 38 : bds->stats->pages_deleted++;
667 : }
668 : else
669 : {
2813 heikki.linnakangas 670 2366 : SpGistSetLastUsedPage(index, buffer);
4131 tgl 671 2366 : bds->lastFilledBlock = blkno;
672 : }
673 : }
674 :
675 2484 : UnlockReleaseBuffer(buffer);
676 2484 : }
677 :
678 : /*
679 : * Process the pending-TID list between pages of the main scan
680 : */
681 : static void
4045 682 291 : spgprocesspending(spgBulkDeleteState *bds)
683 : {
684 291 : Relation index = bds->info->index;
685 : spgVacPendingItem *pitem;
686 : spgVacPendingItem *nitem;
687 : BlockNumber blkno;
688 : Buffer buffer;
689 : Page page;
690 :
691 36630 : for (pitem = bds->pendingList; pitem != NULL; pitem = pitem->next)
692 : {
693 36339 : if (pitem->done)
694 18455 : continue; /* ignore already-done items */
695 :
696 : /* call vacuum_delay_point while not holding any buffer lock */
697 17884 : vacuum_delay_point();
698 :
699 : /* examine the referenced page */
700 17884 : blkno = ItemPointerGetBlockNumber(&pitem->tid);
701 17884 : buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
702 17884 : RBM_NORMAL, bds->info->strategy);
703 17884 : LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
2545 kgrittn 704 17884 : page = (Page) BufferGetPage(buffer);
705 :
4045 tgl 706 17884 : if (PageIsNew(page) || SpGistPageIsDeleted(page))
707 : {
708 : /* Probably shouldn't happen, but ignore it */
709 : }
710 17884 : else if (SpGistPageIsLeaf(page))
711 : {
712 17585 : if (SpGistBlockIsRoot(blkno))
713 : {
714 : /* this should definitely not happen */
4045 tgl 715 UBC 0 : elog(ERROR, "redirection leads to root page of index \"%s\"",
716 : RelationGetRelationName(index));
717 : }
718 :
719 : /* deal with any deletable tuples */
4045 tgl 720 CBC 17585 : vacuumLeafPage(bds, index, buffer, true);
721 : /* might as well do this while we are here */
8 andres 722 GNC 17585 : vacuumRedirectAndPlaceholder(index, bds->info->heaprel, buffer);
723 :
4045 tgl 724 CBC 17585 : SpGistSetLastUsedPage(index, buffer);
725 :
726 : /*
727 : * We can mark as done not only this item, but any later ones
728 : * pointing at the same page, since we vacuumed the whole page.
729 : */
730 17585 : pitem->done = true;
731 1531856 : for (nitem = pitem->next; nitem != NULL; nitem = nitem->next)
732 : {
733 1514271 : if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
734 1339 : nitem->done = true;
735 : }
736 : }
737 : else
738 : {
739 : /*
740 : * On an inner page, visit the referenced inner tuple and add all
741 : * its downlinks to the pending list. We might have pending items
742 : * for more than one inner tuple on the same page (in fact this is
743 : * pretty likely given the way space allocation works), so get
744 : * them all while we are here.
745 : */
746 36568 : for (nitem = pitem; nitem != NULL; nitem = nitem->next)
747 : {
748 36269 : if (nitem->done)
749 2 : continue;
750 36267 : if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
751 : {
752 : OffsetNumber offset;
753 : SpGistInnerTuple innerTuple;
754 :
755 17415 : offset = ItemPointerGetOffsetNumber(&nitem->tid);
756 17415 : innerTuple = (SpGistInnerTuple) PageGetItem(page,
757 : PageGetItemId(page, offset));
758 17415 : if (innerTuple->tupstate == SPGIST_LIVE)
759 : {
760 : SpGistNodeTuple node;
761 : int i;
762 :
763 87913 : SGITITERATE(innerTuple, i, node)
764 : {
765 70498 : if (ItemPointerIsValid(&node->t_tid))
766 35912 : spgAddPendingTID(bds, &node->t_tid);
767 : }
768 : }
4045 tgl 769 UBC 0 : else if (innerTuple->tupstate == SPGIST_REDIRECT)
770 : {
771 : /* transfer attention to redirect point */
772 0 : spgAddPendingTID(bds,
773 : &((SpGistDeadTuple) innerTuple)->pointer);
774 : }
775 : else
776 0 : elog(ERROR, "unexpected SPGiST tuple state: %d",
777 : innerTuple->tupstate);
778 :
4045 tgl 779 CBC 17415 : nitem->done = true;
780 : }
781 : }
782 : }
783 :
784 17884 : UnlockReleaseBuffer(buffer);
785 : }
786 :
787 291 : spgClearPendingList(bds);
788 291 : }
789 :
790 : /*
791 : * Perform a bulkdelete scan
792 : */
793 : static void
4131 794 40 : spgvacuumscan(spgBulkDeleteState *bds)
795 : {
796 40 : Relation index = bds->info->index;
797 : bool needLock;
798 : BlockNumber num_pages,
799 : blkno;
800 :
801 : /* Finish setting up spgBulkDeleteState */
802 40 : initSpGistState(&bds->spgstate, index);
4045 803 40 : bds->pendingList = NULL;
804 40 : bds->myXmin = GetActiveSnapshot()->xmin;
4046 805 40 : bds->lastFilledBlock = SPGIST_LAST_FIXED_BLKNO;
806 :
807 : /*
808 : * Reset counts that will be incremented during the scan; needed in case
809 : * of multiple scans during a single VACUUM command
810 : */
4131 811 40 : bds->stats->estimated_count = false;
812 40 : bds->stats->num_index_tuples = 0;
813 40 : bds->stats->pages_deleted = 0;
814 :
815 : /* We can skip locking for new or temp relations */
816 40 : needLock = !RELATION_IS_LOCAL(index);
817 :
818 : /*
819 : * The outer loop iterates over all index pages except the metapage, in
820 : * physical order (we hope the kernel will cooperate in providing
821 : * read-ahead for speed). It is critical that we visit all leaf pages,
822 : * including ones added after we start the scan, else we might fail to
823 : * delete some deletable tuples. See more extensive comments about this
824 : * in btvacuumscan().
825 : */
4046 826 40 : blkno = SPGIST_METAPAGE_BLKNO + 1;
827 : for (;;)
828 : {
829 : /* Get the current relation length */
4131 830 80 : if (needLock)
831 80 : LockRelationForExtension(index, ExclusiveLock);
832 80 : num_pages = RelationGetNumberOfBlocks(index);
833 80 : if (needLock)
834 80 : UnlockRelationForExtension(index, ExclusiveLock);
835 :
836 : /* Quit if we've scanned the whole relation */
837 80 : if (blkno >= num_pages)
838 40 : break;
839 : /* Iterate over pages, then loop back to recheck length */
840 2524 : for (; blkno < num_pages; blkno++)
841 : {
842 2484 : spgvacuumpage(bds, blkno);
843 : /* empty the pending-list after each page */
4045 844 2484 : if (bds->pendingList != NULL)
845 291 : spgprocesspending(bds);
846 : }
847 : }
848 :
849 : /* Propagate local lastUsedPages cache to metablock */
4131 850 40 : SpGistUpdateMetaPage(index);
851 :
852 : /*
853 : * If we found any empty pages (and recorded them in the FSM), then
854 : * forcibly update the upper-level FSM pages to ensure that searchers can
855 : * find them. It's possible that the pages were also found during
856 : * previous scans and so this is a waste of time, but it's cheap enough
857 : * relative to scanning the index that it shouldn't matter much, and
858 : * making sure that free pages are available sooner not later seems
859 : * worthwhile.
860 : *
861 : * Note that if no empty pages exist, we don't bother vacuuming the FSM at
862 : * all.
863 : */
1836 864 40 : if (bds->stats->pages_deleted > 0)
865 27 : IndexFreeSpaceMapVacuum(index);
866 :
867 : /*
868 : * Truncate index if possible
869 : *
870 : * XXX disabled because it's unsafe due to possible concurrent inserts.
871 : * We'd have to rescan the pages to make sure they're still empty, and it
872 : * doesn't seem worth it. Note that btree doesn't do this either.
873 : *
874 : * Another reason not to truncate is that it could invalidate the cached
875 : * pages-with-freespace pointers in the metapage and other backends'
876 : * relation caches, that is leave them pointing to nonexistent pages.
877 : * Adding RelationGetNumberOfBlocks calls to protect the places that use
878 : * those pointers would be unduly expensive.
879 : */
880 : #ifdef NOT_USED
881 : if (num_pages > bds->lastFilledBlock + 1)
882 : {
883 : BlockNumber lastBlock = num_pages - 1;
884 :
885 : num_pages = bds->lastFilledBlock + 1;
886 : RelationTruncate(index, num_pages);
887 : bds->stats->pages_removed += lastBlock - bds->lastFilledBlock;
888 : bds->stats->pages_deleted -= lastBlock - bds->lastFilledBlock;
889 : }
890 : #endif
891 :
892 : /* Report final stats */
4131 893 40 : bds->stats->num_pages = num_pages;
773 pg 894 40 : bds->stats->pages_newly_deleted = bds->stats->pages_deleted;
4131 tgl 895 40 : bds->stats->pages_free = bds->stats->pages_deleted;
896 40 : }
897 :
898 : /*
899 : * Bulk deletion of all index entries pointing to a set of heap tuples.
900 : * The set of target tuples is specified via a callback routine that tells
901 : * whether any given heap tuple (identified by ItemPointer) is being deleted.
902 : *
903 : * Result: a palloc'd struct containing statistical info for VACUUM displays.
904 : */
905 : IndexBulkDeleteResult *
2639 906 5 : spgbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
907 : IndexBulkDeleteCallback callback, void *callback_state)
908 : {
909 : spgBulkDeleteState bds;
910 :
911 : /* allocate stats if first time through, else re-use existing struct */
4131 912 5 : if (stats == NULL)
913 5 : stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
914 5 : bds.info = info;
915 5 : bds.stats = stats;
916 5 : bds.callback = callback;
917 5 : bds.callback_state = callback_state;
918 :
919 5 : spgvacuumscan(&bds);
920 :
2639 921 5 : return stats;
922 : }
923 :
924 : /* Dummy callback to delete no tuples during spgvacuumcleanup */
925 : static bool
4131 926 2996932 : dummy_callback(ItemPointer itemptr, void *state)
927 : {
928 2996932 : return false;
929 : }
930 :
931 : /*
932 : * Post-VACUUM cleanup.
933 : *
934 : * Result: a palloc'd struct containing statistical info for VACUUM displays.
935 : */
936 : IndexBulkDeleteResult *
2639 937 52 : spgvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
938 : {
939 : spgBulkDeleteState bds;
940 :
941 : /* No-op in ANALYZE ONLY mode */
4131 942 52 : if (info->analyze_only)
2639 943 12 : return stats;
944 :
945 : /*
946 : * We don't need to scan the index if there was a preceding bulkdelete
947 : * pass. Otherwise, make a pass that won't delete any live tuples, but
948 : * might still accomplish useful stuff with redirect/placeholder cleanup
949 : * and/or FSM housekeeping, and in any case will provide stats.
950 : */
4131 951 40 : if (stats == NULL)
952 : {
953 35 : stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
954 35 : bds.info = info;
955 35 : bds.stats = stats;
956 35 : bds.callback = dummy_callback;
957 35 : bds.callback_state = NULL;
958 :
959 35 : spgvacuumscan(&bds);
960 : }
961 :
962 : /*
963 : * It's quite possible for us to be fooled by concurrent tuple moves into
964 : * double-counting some index tuples, so disbelieve any total that exceeds
965 : * the underlying heap's count ... if we know that accurately. Otherwise
966 : * this might just make matters worse.
967 : */
968 40 : if (!info->estimated_count)
969 : {
970 40 : if (stats->num_index_tuples > info->num_heap_tuples)
4131 tgl 971 UBC 0 : stats->num_index_tuples = info->num_heap_tuples;
972 : }
973 :
2639 tgl 974 CBC 40 : return stats;
975 : }
|