Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * pruneheap.c
4 : : * heap page pruning and HOT-chain management code
5 : : *
6 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/access/heap/pruneheap.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : #include "postgres.h"
16 : :
17 : : #include "access/heapam.h"
18 : : #include "access/heapam_xlog.h"
19 : : #include "access/htup_details.h"
20 : : #include "access/multixact.h"
21 : : #include "access/transam.h"
22 : : #include "access/xlog.h"
23 : : #include "access/xloginsert.h"
24 : : #include "commands/vacuum.h"
25 : : #include "executor/instrument.h"
26 : : #include "miscadmin.h"
27 : : #include "pgstat.h"
28 : : #include "storage/bufmgr.h"
29 : : #include "utils/rel.h"
30 : : #include "utils/snapmgr.h"
31 : :
32 : : /* Working data for heap_page_prune_and_freeze() and subroutines */
33 : : typedef struct
34 : : {
35 : : /*-------------------------------------------------------
36 : : * Arguments passed to heap_page_and_freeze()
37 : : *-------------------------------------------------------
38 : : */
39 : :
40 : : /* tuple visibility test, initialized for the relation */
41 : : GlobalVisState *vistest;
42 : : /* whether or not dead items can be set LP_UNUSED during pruning */
43 : : bool mark_unused_now;
44 : : /* whether to attempt freezing tuples */
45 : : bool freeze;
46 : : struct VacuumCutoffs *cutoffs;
47 : :
48 : : /*-------------------------------------------------------
49 : : * Fields describing what to do to the page
50 : : *-------------------------------------------------------
51 : : */
52 : : TransactionId new_prune_xid; /* new prune hint value */
53 : : TransactionId latest_xid_removed;
54 : : int nredirected; /* numbers of entries in arrays below */
55 : : int ndead;
56 : : int nunused;
57 : : int nfrozen;
58 : : /* arrays that accumulate indexes of items to be changed */
59 : : OffsetNumber redirected[MaxHeapTuplesPerPage * 2];
60 : : OffsetNumber nowdead[MaxHeapTuplesPerPage];
61 : : OffsetNumber nowunused[MaxHeapTuplesPerPage];
62 : : HeapTupleFreeze frozen[MaxHeapTuplesPerPage];
63 : :
64 : : /*-------------------------------------------------------
65 : : * Working state for HOT chain processing
66 : : *-------------------------------------------------------
67 : : */
68 : :
69 : : /*
70 : : * 'root_items' contains offsets of all LP_REDIRECT line pointers and
71 : : * normal non-HOT tuples. They can be stand-alone items or the first item
72 : : * in a HOT chain. 'heaponly_items' contains heap-only tuples which can
73 : : * only be removed as part of a HOT chain.
74 : : */
75 : : int nroot_items;
76 : : OffsetNumber root_items[MaxHeapTuplesPerPage];
77 : : int nheaponly_items;
78 : : OffsetNumber heaponly_items[MaxHeapTuplesPerPage];
79 : :
80 : : /*
81 : : * processed[offnum] is true if item at offnum has been processed.
82 : : *
83 : : * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
84 : : * 1. Otherwise every access would need to subtract 1.
85 : : */
86 : : bool processed[MaxHeapTuplesPerPage + 1];
87 : :
88 : : /*
89 : : * Tuple visibility is only computed once for each tuple, for correctness
90 : : * and efficiency reasons; see comment in heap_page_prune_and_freeze() for
91 : : * details. This is of type int8[], instead of HTSV_Result[], so we can
92 : : * use -1 to indicate no visibility has been computed, e.g. for LP_DEAD
93 : : * items.
94 : : *
95 : : * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
96 : : * 1. Otherwise every access would need to subtract 1.
97 : : */
98 : : int8 htsv[MaxHeapTuplesPerPage + 1];
99 : :
100 : : /*
101 : : * Freezing-related state.
102 : : */
103 : : HeapPageFreeze pagefrz;
104 : :
105 : : /*-------------------------------------------------------
106 : : * Information about what was done
107 : : *
108 : : * These fields are not used by pruning itself for the most part, but are
109 : : * used to collect information about what was pruned and what state the
110 : : * page is in after pruning, for the benefit of the caller. They are
111 : : * copied to the caller's PruneFreezeResult at the end.
112 : : * -------------------------------------------------------
113 : : */
114 : :
115 : : int ndeleted; /* Number of tuples deleted from the page */
116 : :
117 : : /* Number of live and recently dead tuples, after pruning */
118 : : int live_tuples;
119 : : int recently_dead_tuples;
120 : :
121 : : /* Whether or not the page makes rel truncation unsafe */
122 : : bool hastup;
123 : :
124 : : /*
125 : : * LP_DEAD items on the page after pruning. Includes existing LP_DEAD
126 : : * items
127 : : */
128 : : int lpdead_items; /* number of items in the array */
129 : : OffsetNumber *deadoffsets; /* points directly to presult->deadoffsets */
130 : :
131 : : /*
132 : : * all_visible and all_frozen indicate if the all-visible and all-frozen
133 : : * bits in the visibility map can be set for this page after pruning.
134 : : *
135 : : * visibility_cutoff_xid is the newest xmin of live tuples on the page.
136 : : * The caller can use it as the conflict horizon, when setting the VM
137 : : * bits. It is only valid if we froze some tuples, and all_frozen is
138 : : * true.
139 : : *
140 : : * NOTE: all_visible and all_frozen don't include LP_DEAD items. That's
141 : : * convenient for heap_page_prune_and_freeze(), to use them to decide
142 : : * whether to freeze the page or not. The all_visible and all_frozen
143 : : * values returned to the caller are adjusted to include LP_DEAD items at
144 : : * the end.
145 : : *
146 : : * all_frozen should only be considered valid if all_visible is also set;
147 : : * we don't bother to clear the all_frozen flag every time we clear the
148 : : * all_visible flag.
149 : : */
150 : : bool all_visible;
151 : : bool all_frozen;
152 : : TransactionId visibility_cutoff_xid;
153 : : } PruneState;
154 : :
155 : : /* Local functions */
156 : : static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate,
157 : : HeapTuple tup,
158 : : Buffer buffer);
159 : : static inline HTSV_Result htsv_get_valid_status(int status);
160 : : static void heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
161 : : OffsetNumber rootoffnum, PruneState *prstate);
162 : : static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
163 : : static void heap_prune_record_redirect(PruneState *prstate,
164 : : OffsetNumber offnum, OffsetNumber rdoffnum,
165 : : bool was_normal);
166 : : static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum,
167 : : bool was_normal);
168 : : static void heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum,
169 : : bool was_normal);
170 : : static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal);
171 : :
172 : : static void heap_prune_record_unchanged_lp_unused(Page page, PruneState *prstate, OffsetNumber offnum);
173 : : static void heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumber offnum);
174 : : static void heap_prune_record_unchanged_lp_dead(Page page, PruneState *prstate, OffsetNumber offnum);
175 : : static void heap_prune_record_unchanged_lp_redirect(PruneState *prstate, OffsetNumber offnum);
176 : :
177 : : static void page_verify_redirects(Page page);
178 : :
179 : :
180 : : /*
181 : : * Optionally prune and repair fragmentation in the specified page.
182 : : *
183 : : * This is an opportunistic function. It will perform housekeeping
184 : : * only if the page heuristically looks like a candidate for pruning and we
185 : : * can acquire buffer cleanup lock without blocking.
186 : : *
187 : : * Note: this is called quite often. It's important that it fall out quickly
188 : : * if there's not any use in pruning.
189 : : *
190 : : * Caller must have pin on the buffer, and must *not* have a lock on it.
191 : : */
192 : : void
3695 rhaas@postgresql.org 193 :CBC 13838551 : heap_page_prune_opt(Relation relation, Buffer buffer)
194 : : {
2916 kgrittn@postgresql.o 195 : 13838551 : Page page = BufferGetPage(buffer);
196 : : TransactionId prune_xid;
197 : : GlobalVisState *vistest;
198 : : Size minfree;
199 : :
200 : : /*
201 : : * We can't write WAL in recovery mode, so there's no point trying to
202 : : * clean the page. The primary will likely issue a cleaning WAL record
203 : : * soon anyway, so this is no particular loss.
204 : : */
3695 rhaas@postgresql.org 205 [ + + ]: 13838551 : if (RecoveryInProgress())
206 : 168618 : return;
207 : :
208 : : /*
209 : : * First check whether there's any chance there's something to prune,
210 : : * determining the appropriate horizon is a waste if there's no prune_xid
211 : : * (i.e. no updates/deletes left potentially dead tuples around).
212 : : */
1341 andres@anarazel.de 213 : 13669933 : prune_xid = ((PageHeader) page)->pd_prune_xid;
214 [ + + ]: 13669933 : if (!TransactionIdIsValid(prune_xid))
215 : 6392188 : return;
216 : :
217 : : /*
218 : : * Check whether prune_xid indicates that there may be dead rows that can
219 : : * be cleaned up.
220 : : */
221 : 7277745 : vistest = GlobalVisTestFor(relation);
222 : :
223 [ + + ]: 7277745 : if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
222 tmunro@postgresql.or 224 :GNC 6342562 : return;
225 : :
226 : : /*
227 : : * We prune when a previous UPDATE failed to find enough space on the page
228 : : * for a new tuple version, or when free space falls below the relation's
229 : : * fill-factor target (but not less than 10%).
230 : : *
231 : : * Checking free space here is questionable since we aren't holding any
232 : : * lock on the buffer; in the worst case we could get a bogus answer. It's
233 : : * unlikely to be *seriously* wrong, though, since reading either pd_lower
234 : : * or pd_upper is probably atomic. Avoiding taking a lock seems more
235 : : * important than sometimes getting a wrong answer in what is after all
236 : : * just a heuristic estimate.
237 : : */
3 akorotkov@postgresql 238 [ + + ]:CBC 935183 : minfree = RelationGetTargetPageFreeSpace(relation,
239 : : HEAP_DEFAULT_FILLFACTOR);
6051 tgl@sss.pgh.pa.us 240 : 935183 : minfree = Max(minfree, BLCKSZ / 10);
241 : :
5754 242 [ + + + + ]: 935183 : if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
243 : : {
244 : : /* OK, try to get exclusive buffer lock */
6051 245 [ + + ]: 35077 : if (!ConditionalLockBufferForCleanup(buffer))
246 : 261 : return;
247 : :
248 : : /*
249 : : * Now that we have buffer lock, get accurate information about the
250 : : * page's free space, and recheck the heuristic about whether to
251 : : * prune.
252 : : */
5754 253 [ + + + - ]: 34816 : if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
254 : : {
255 : : OffsetNumber dummy_off_loc;
256 : : PruneFreezeResult presult;
257 : :
258 : : /*
259 : : * For now, pass mark_unused_now as false regardless of whether or
260 : : * not the relation has indexes, since we cannot safely determine
261 : : * that during on-access pruning with the current implementation.
262 : : */
11 heikki.linnakangas@i 263 :GNC 34816 : heap_page_prune_and_freeze(relation, buffer, vistest, 0,
264 : : NULL, &presult, PRUNE_ON_ACCESS, &dummy_off_loc, NULL, NULL);
265 : :
266 : : /*
267 : : * Report the number of tuples reclaimed to pgstats. This is
268 : : * presult.ndeleted minus the number of newly-LP_DEAD-set items.
269 : : *
270 : : * We derive the number of dead tuples like this to avoid totally
271 : : * forgetting about items that were set to LP_DEAD, since they
272 : : * still need to be cleaned up by VACUUM. We only want to count
273 : : * heap-only tuples that just became LP_UNUSED in our report,
274 : : * which don't.
275 : : *
276 : : * VACUUM doesn't have to compensate in the same way when it
277 : : * tracks ndeleted, since it will set the same LP_DEAD items to
278 : : * LP_UNUSED separately.
279 : : */
199 rhaas@postgresql.org 280 [ + + ]: 34816 : if (presult.ndeleted > presult.nnewlpdead)
884 pg@bowt.ie 281 :CBC 14950 : pgstat_update_heap_dead_tuples(relation,
199 rhaas@postgresql.org 282 :GNC 14950 : presult.ndeleted - presult.nnewlpdead);
283 : : }
284 : :
285 : : /* And release buffer lock */
6051 tgl@sss.pgh.pa.us 286 :CBC 34816 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
287 : :
288 : : /*
289 : : * We avoid reuse of any free space created on the page by unrelated
290 : : * UPDATEs/INSERTs by opting to not update the FSM at this point. The
291 : : * free space should be reused by UPDATEs to *this* page.
292 : : */
293 : : }
294 : : }
295 : :
296 : :
297 : : /*
298 : : * Prune and repair fragmentation and potentially freeze tuples on the
299 : : * specified page.
300 : : *
301 : : * Caller must have pin and buffer cleanup lock on the page. Note that we
302 : : * don't update the FSM information for page on caller's behalf. Caller might
303 : : * also need to account for a reduction in the length of the line pointer
304 : : * array following array truncation by us.
305 : : *
306 : : * If the HEAP_PRUNE_FREEZE option is set, we will also freeze tuples if it's
307 : : * required in order to advance relfrozenxid / relminmxid, or if it's
308 : : * considered advantageous for overall system performance to do so now. The
309 : : * 'cutoffs', 'presult', 'new_refrozen_xid' and 'new_relmin_mxid' arguments
310 : : * are required when freezing. When HEAP_PRUNE_FREEZE option is set, we also
311 : : * set presult->all_visible and presult->all_frozen on exit, to indicate if
312 : : * the VM bits can be set. They are always set to false when the
313 : : * HEAP_PRUNE_FREEZE option is not set, because at the moment only callers
314 : : * that also freeze need that information.
315 : : *
316 : : * vistest is used to distinguish whether tuples are DEAD or RECENTLY_DEAD
317 : : * (see heap_prune_satisfies_vacuum).
318 : : *
319 : : * options:
320 : : * MARK_UNUSED_NOW indicates that dead items can be set LP_UNUSED during
321 : : * pruning.
322 : : *
323 : : * FREEZE indicates that we will also freeze tuples, and will return
324 : : * 'all_visible', 'all_frozen' flags to the caller.
325 : : *
326 : : * cutoffs contains the freeze cutoffs, established by VACUUM at the beginning
327 : : * of vacuuming the relation. Required if HEAP_PRUNE_FREEZE option is set.
328 : : *
329 : : * presult contains output parameters needed by callers, such as the number of
330 : : * tuples removed and the offsets of dead items on the page after pruning.
331 : : * heap_page_prune_and_freeze() is responsible for initializing it. Required
332 : : * by all callers.
333 : : *
334 : : * reason indicates why the pruning is performed. It is included in the WAL
335 : : * record for debugging and analysis purposes, but otherwise has no effect.
336 : : *
337 : : * off_loc is the offset location required by the caller to use in error
338 : : * callback.
339 : : *
340 : : * new_relfrozen_xid and new_relmin_xid must provided by the caller if the
341 : : * HEAP_PRUNE_FREEZE option is set. On entry, they contain the oldest XID and
342 : : * multi-XID seen on the relation so far. They will be updated with oldest
343 : : * values present on the page after pruning. After processing the whole
344 : : * relation, VACUUM can use these values as the new relfrozenxid/relminmxid
345 : : * for the relation.
346 : : */
347 : : void
11 heikki.linnakangas@i 348 :GNC 361049 : heap_page_prune_and_freeze(Relation relation, Buffer buffer,
349 : : GlobalVisState *vistest,
350 : : int options,
351 : : struct VacuumCutoffs *cutoffs,
352 : : PruneFreezeResult *presult,
353 : : PruneReason reason,
354 : : OffsetNumber *off_loc,
355 : : TransactionId *new_relfrozen_xid,
356 : : MultiXactId *new_relmin_mxid)
357 : : {
2916 kgrittn@postgresql.o 358 :CBC 361049 : Page page = BufferGetPage(buffer);
818 andres@anarazel.de 359 : 361049 : BlockNumber blockno = BufferGetBlockNumber(buffer);
360 : : OffsetNumber offnum,
361 : : maxoff;
362 : : PruneState prstate;
363 : : HeapTupleData tup;
364 : : bool do_freeze;
365 : : bool do_prune;
366 : : bool do_hint;
367 : : bool hint_bit_fpi;
11 heikki.linnakangas@i 368 :GNC 361049 : int64 fpi_before = pgWalUsage.wal_fpi;
369 : :
370 : : /* Copy parameters to prstate */
371 : 361049 : prstate.vistest = vistest;
372 : 361049 : prstate.mark_unused_now = (options & HEAP_PAGE_PRUNE_MARK_UNUSED_NOW) != 0;
373 : 361049 : prstate.freeze = (options & HEAP_PAGE_PRUNE_FREEZE) != 0;
374 : 361049 : prstate.cutoffs = cutoffs;
375 : :
376 : : /*
377 : : * Our strategy is to scan the page and make lists of items to change,
378 : : * then apply the changes within a critical section. This keeps as much
379 : : * logic as possible out of the critical section, and also ensures that
380 : : * WAL replay will work the same as the normal case.
381 : : *
382 : : * First, initialize the new pd_prune_xid value to zero (indicating no
383 : : * prunable tuples). If we find any tuples which may soon become
384 : : * prunable, we will save the lowest relevant XID in new_prune_xid. Also
385 : : * initialize the rest of our working state.
386 : : */
5881 tgl@sss.pgh.pa.us 387 :CBC 361049 : prstate.new_prune_xid = InvalidTransactionId;
11 heikki.linnakangas@i 388 :GNC 361049 : prstate.latest_xid_removed = InvalidTransactionId;
389 : 361049 : prstate.nredirected = prstate.ndead = prstate.nunused = prstate.nfrozen = 0;
13 390 : 361049 : prstate.nroot_items = 0;
391 : 361049 : prstate.nheaponly_items = 0;
392 : :
393 : : /* initialize page freezing working state */
11 394 : 361049 : prstate.pagefrz.freeze_required = false;
395 [ + + ]: 361049 : if (prstate.freeze)
396 : : {
397 [ + - - + ]: 326233 : Assert(new_relfrozen_xid && new_relmin_mxid);
398 : 326233 : prstate.pagefrz.FreezePageRelfrozenXid = *new_relfrozen_xid;
399 : 326233 : prstate.pagefrz.NoFreezePageRelfrozenXid = *new_relfrozen_xid;
400 : 326233 : prstate.pagefrz.FreezePageRelminMxid = *new_relmin_mxid;
401 : 326233 : prstate.pagefrz.NoFreezePageRelminMxid = *new_relmin_mxid;
402 : : }
403 : : else
404 : : {
405 [ + - - + ]: 34816 : Assert(new_relfrozen_xid == NULL && new_relmin_mxid == NULL);
406 : 34816 : prstate.pagefrz.FreezePageRelminMxid = InvalidMultiXactId;
407 : 34816 : prstate.pagefrz.NoFreezePageRelminMxid = InvalidMultiXactId;
408 : 34816 : prstate.pagefrz.FreezePageRelfrozenXid = InvalidTransactionId;
409 : 34816 : prstate.pagefrz.NoFreezePageRelfrozenXid = InvalidTransactionId;
410 : : }
411 : :
412 : 361049 : prstate.ndeleted = 0;
413 : 361049 : prstate.live_tuples = 0;
414 : 361049 : prstate.recently_dead_tuples = 0;
415 : 361049 : prstate.hastup = false;
416 : 361049 : prstate.lpdead_items = 0;
417 : 361049 : prstate.deadoffsets = presult->deadoffsets;
418 : :
419 : : /*
420 : : * Caller may update the VM after we're done. We can keep track of
421 : : * whether the page will be all-visible and all-frozen after pruning and
422 : : * freezing to help the caller to do that.
423 : : *
424 : : * Currently, only VACUUM sets the VM bits. To save the effort, only do
425 : : * the bookkeeping if the caller needs it. Currently, that's tied to
426 : : * HEAP_PAGE_PRUNE_FREEZE, but it could be a separate flag if you wanted
427 : : * to update the VM bits without also freezing or freeze without also
428 : : * setting the VM bits.
429 : : *
430 : : * In addition to telling the caller whether it can set the VM bit, we
431 : : * also use 'all_visible' and 'all_frozen' for our own decision-making. If
432 : : * the whole page would become frozen, we consider opportunistically
433 : : * freezing tuples. We will not be able to freeze the whole page if there
434 : : * are tuples present that are not visible to everyone or if there are
435 : : * dead tuples which are not yet removable. However, dead tuples which
436 : : * will be removed by the end of vacuuming should not preclude us from
437 : : * opportunistically freezing. Because of that, we do not clear
438 : : * all_visible when we see LP_DEAD items. We fix that at the end of the
439 : : * function, when we return the value to the caller, so that the caller
440 : : * doesn't set the VM bit incorrectly.
441 : : */
442 [ + + ]: 361049 : if (prstate.freeze)
443 : : {
444 : 326233 : prstate.all_visible = true;
445 : 326233 : prstate.all_frozen = true;
446 : : }
447 : : else
448 : : {
449 : : /*
450 : : * Initializing to false allows skipping the work to update them in
451 : : * heap_prune_record_unchanged_lp_normal().
452 : : */
453 : 34816 : prstate.all_visible = false;
454 : 34816 : prstate.all_frozen = false;
455 : : }
456 : :
457 : : /*
458 : : * The visibility cutoff xid is the newest xmin of live tuples on the
459 : : * page. In the common case, this will be set as the conflict horizon the
460 : : * caller can use for updating the VM. If, at the end of freezing and
461 : : * pruning, the page is all-frozen, there is no possibility that any
462 : : * running transaction on the standby does not see tuples on the page as
463 : : * all-visible, so the conflict horizon remains InvalidTransactionId.
464 : : */
465 : 361049 : prstate.visibility_cutoff_xid = InvalidTransactionId;
466 : :
6051 tgl@sss.pgh.pa.us 467 : 361049 : maxoff = PageGetMaxOffsetNumber(page);
25 heikki.linnakangas@i 468 : 361049 : tup.t_tableOid = RelationGetRelid(relation);
469 : :
470 : : /*
471 : : * Determine HTSV for all tuples, and queue them up for processing as HOT
472 : : * chain roots or as heap-only items.
473 : : *
474 : : * Determining HTSV only once for each tuple is required for correctness,
475 : : * to deal with cases where running HTSV twice could result in different
476 : : * results. For example, RECENTLY_DEAD can turn to DEAD if another
477 : : * checked item causes GlobalVisTestIsRemovableFullXid() to update the
478 : : * horizon, or INSERT_IN_PROGRESS can change to DEAD if the inserting
479 : : * transaction aborts.
480 : : *
481 : : * It's also good for performance. Most commonly tuples within a page are
482 : : * stored at decreasing offsets (while the items are stored at increasing
483 : : * offsets). When processing all tuples on a page this leads to reading
484 : : * memory at decreasing offsets within a page, with a variable stride.
485 : : * That's hard for CPU prefetchers to deal with. Processing the items in
486 : : * reverse order (and thus the tuples in increasing order) increases
487 : : * prefetching efficiency significantly / decreases the number of cache
488 : : * misses.
489 : : */
856 andres@anarazel.de 490 :CBC 361049 : for (offnum = maxoff;
491 [ + + ]: 19740348 : offnum >= FirstOffsetNumber;
492 : 19379299 : offnum = OffsetNumberPrev(offnum))
493 : : {
494 : 19379299 : ItemId itemid = PageGetItemId(page, offnum);
495 : : HeapTupleHeader htup;
496 : :
497 : : /*
498 : : * Set the offset number so that we can display it along with any
499 : : * error that occurred while processing this tuple.
500 : : */
13 heikki.linnakangas@i 501 :GNC 19379299 : *off_loc = offnum;
502 : :
503 : 19379299 : prstate.processed[offnum] = false;
11 504 : 19379299 : prstate.htsv[offnum] = -1;
505 : :
506 : : /* Nothing to do if slot doesn't contain a tuple */
13 507 [ + + ]: 19379299 : if (!ItemIdIsUsed(itemid))
508 : : {
11 509 : 231137 : heap_prune_record_unchanged_lp_unused(page, &prstate, offnum);
856 andres@anarazel.de 510 : 231137 : continue;
511 : : }
512 : :
13 heikki.linnakangas@i 513 [ + + ]: 19148162 : if (ItemIdIsDead(itemid))
514 : : {
515 : : /*
516 : : * If the caller set mark_unused_now true, we can set dead line
517 : : * pointers LP_UNUSED now.
518 : : */
519 [ + + ]: 888168 : if (unlikely(prstate.mark_unused_now))
520 : 1840 : heap_prune_record_unused(&prstate, offnum, false);
521 : : else
11 522 : 886328 : heap_prune_record_unchanged_lp_dead(page, &prstate, offnum);
13 523 : 888168 : continue;
524 : : }
525 : :
526 [ + + ]: 18259994 : if (ItemIdIsRedirected(itemid))
527 : : {
528 : : /* This is the start of a HOT chain */
529 : 232887 : prstate.root_items[prstate.nroot_items++] = offnum;
530 : 232887 : continue;
531 : : }
532 : :
533 [ - + ]: 18027107 : Assert(ItemIdIsNormal(itemid));
534 : :
535 : : /*
536 : : * Get the tuple's visibility status and queue it up for processing.
537 : : */
538 : 18027107 : htup = (HeapTupleHeader) PageGetItem(page, itemid);
539 : 18027107 : tup.t_data = htup;
540 : 18027107 : tup.t_len = ItemIdGetLength(itemid);
541 : 18027107 : ItemPointerSet(&tup.t_self, blockno, offnum);
542 : :
11 heikki.linnakangas@i 543 :CBC 18027107 : prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
544 : : buffer);
545 : :
13 heikki.linnakangas@i 546 [ + + ]:GNC 18027107 : if (!HeapTupleHeaderIsHeapOnly(htup))
547 : 17695142 : prstate.root_items[prstate.nroot_items++] = offnum;
548 : : else
549 : 331965 : prstate.heaponly_items[prstate.nheaponly_items++] = offnum;
550 : : }
551 : :
552 : : /*
553 : : * If checksums are enabled, heap_prune_satisfies_vacuum() may have caused
554 : : * an FPI to be emitted.
555 : : */
11 556 : 361049 : hint_bit_fpi = fpi_before != pgWalUsage.wal_fpi;
557 : :
558 : : /*
559 : : * Process HOT chains.
560 : : *
561 : : * We added the items to the array starting from 'maxoff', so by
562 : : * processing the array in reverse order, we process the items in
563 : : * ascending offset number order. The order doesn't matter for
564 : : * correctness, but some quick micro-benchmarking suggests that this is
565 : : * faster. (Earlier PostgreSQL versions, which scanned all the items on
566 : : * the page instead of using the root_items array, also did it in
567 : : * ascending offset number order.)
568 : : */
13 569 [ + + ]: 18289078 : for (int i = prstate.nroot_items - 1; i >= 0; i--)
570 : : {
571 : 17928029 : offnum = prstate.root_items[i];
572 : :
573 : : /* Ignore items already processed as part of an earlier chain */
574 [ - + ]: 17928029 : if (prstate.processed[offnum])
5881 tgl@sss.pgh.pa.us 575 :UNC 0 : continue;
576 : :
577 : : /* see preceding loop */
13 heikki.linnakangas@i 578 :GNC 17928029 : *off_loc = offnum;
579 : :
580 : : /* Process this item or chain of items */
11 581 : 17928029 : heap_prune_chain(page, blockno, maxoff, offnum, &prstate);
582 : : }
583 : :
584 : : /*
585 : : * Process any heap-only tuples that were not already processed as part of
586 : : * a HOT chain.
587 : : */
13 588 [ + + ]: 693014 : for (int i = prstate.nheaponly_items - 1; i >= 0; i--)
589 : : {
590 : 331965 : offnum = prstate.heaponly_items[i];
591 : :
592 [ + + ]: 331965 : if (prstate.processed[offnum])
593 : 320316 : continue;
594 : :
595 : : /* see preceding loop */
596 : 11649 : *off_loc = offnum;
597 : :
598 : : /*
599 : : * If the tuple is DEAD and doesn't chain to anything else, mark it
600 : : * unused. (If it does chain, we can only remove it as part of
601 : : * pruning its chain.)
602 : : *
603 : : * We need this primarily to handle aborted HOT updates, that is,
604 : : * XMIN_INVALID heap-only tuples. Those might not be linked to by any
605 : : * chain, since the parent tuple might be re-updated before any
606 : : * pruning occurs. So we have to be able to reap them separately from
607 : : * chain-pruning. (Note that HeapTupleHeaderIsHotUpdated will never
608 : : * return true for an XMIN_INVALID tuple, so this code will work even
609 : : * when there were sequential updates within the aborted transaction.)
610 : : */
11 611 [ + + ]: 11649 : if (prstate.htsv[offnum] == HEAPTUPLE_DEAD)
612 : : {
13 613 : 1639 : ItemId itemid = PageGetItemId(page, offnum);
614 : 1639 : HeapTupleHeader htup = (HeapTupleHeader) PageGetItem(page, itemid);
615 : :
616 [ + + + - : 1639 : if (likely(!HeapTupleHeaderIsHotUpdated(htup)))
+ - + - ]
617 : : {
618 : 1639 : HeapTupleHeaderAdvanceConflictHorizon(htup,
619 : : &prstate.latest_xid_removed);
620 : 1639 : heap_prune_record_unused(&prstate, offnum, true);
621 : : }
622 : : else
623 : : {
624 : : /*
625 : : * This tuple should've been processed and removed as part of
626 : : * a HOT chain, so something's wrong. To preserve evidence,
627 : : * we don't dare to remove it. We cannot leave behind a DEAD
628 : : * tuple either, because that will cause VACUUM to error out.
629 : : * Throwing an error with a distinct error message seems like
630 : : * the least bad option.
631 : : */
13 heikki.linnakangas@i 632 [ # # ]:UNC 0 : elog(ERROR, "dead heap-only tuple (%u, %d) is not linked to from any HOT chain",
633 : : blockno, offnum);
634 : : }
635 : : }
636 : : else
11 heikki.linnakangas@i 637 :GNC 10010 : heap_prune_record_unchanged_lp_normal(page, &prstate, offnum);
638 : : }
639 : :
640 : : /* We should now have processed every tuple exactly once */
641 : : #ifdef USE_ASSERT_CHECKING
13 heikki.linnakangas@i 642 :CBC 361049 : for (offnum = FirstOffsetNumber;
643 [ + + ]: 19740348 : offnum <= maxoff;
644 : 19379299 : offnum = OffsetNumberNext(offnum))
645 : : {
13 heikki.linnakangas@i 646 :GNC 19379299 : *off_loc = offnum;
647 : :
648 [ - + ]: 19379299 : Assert(prstate.processed[offnum]);
649 : : }
650 : : #endif
651 : :
652 : : /* Clear the offset information once we have processed the given page. */
653 : 361049 : *off_loc = InvalidOffsetNumber;
654 : :
11 655 : 1070016 : do_prune = prstate.nredirected > 0 ||
656 [ + + + + ]: 676451 : prstate.ndead > 0 ||
657 [ + + ]: 315402 : prstate.nunused > 0;
658 : :
659 : : /*
660 : : * Even if we don't prune anything, if we found a new value for the
661 : : * pd_prune_xid field or the page was marked full, we will update the hint
662 : : * bit.
663 : : */
664 [ + + + + ]: 676147 : do_hint = ((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
665 : 315098 : PageIsFull(page);
666 : :
667 : : /*
668 : : * Decide if we want to go ahead with freezing according to the freeze
669 : : * plans we prepared, or not.
670 : : */
671 : 361049 : do_freeze = false;
672 [ + + ]: 361049 : if (prstate.freeze)
673 : : {
674 [ + + ]: 326233 : if (prstate.pagefrz.freeze_required)
675 : : {
676 : : /*
677 : : * heap_prepare_freeze_tuple indicated that at least one XID/MXID
678 : : * from before FreezeLimit/MultiXactCutoff is present. Must
679 : : * freeze to advance relfrozenxid/relminmxid.
680 : : */
681 : 13731 : do_freeze = true;
682 : : }
683 : : else
684 : : {
685 : : /*
686 : : * Opportunistically freeze the page if we are generating an FPI
687 : : * anyway and if doing so means that we can set the page
688 : : * all-frozen afterwards (might not happen until VACUUM's final
689 : : * heap pass).
690 : : *
691 : : * XXX: Previously, we knew if pruning emitted an FPI by checking
692 : : * pgWalUsage.wal_fpi before and after pruning. Once the freeze
693 : : * and prune records were combined, this heuristic couldn't be
694 : : * used anymore. The opportunistic freeze heuristic must be
695 : : * improved; however, for now, try to approximate the old logic.
696 : : */
697 [ + + + - : 312502 : if (prstate.all_visible && prstate.all_frozen && prstate.nfrozen > 0)
+ + ]
698 : : {
699 : : /*
700 : : * Freezing would make the page all-frozen. Have already
701 : : * emitted an FPI or will do so anyway?
702 : : */
703 [ + + + + : 14517 : if (RelationNeedsWAL(relation))
+ - + - ]
704 : : {
705 [ + + ]: 14511 : if (hint_bit_fpi)
706 : 475 : do_freeze = true;
707 [ + + ]: 14036 : else if (do_prune)
708 : : {
709 [ + + ]: 1808 : if (XLogCheckBufferNeedsBackup(buffer))
710 : 720 : do_freeze = true;
711 : : }
712 [ + + ]: 12228 : else if (do_hint)
713 : : {
714 [ + - + + : 6 : if (XLogHintBitIsNeeded() && XLogCheckBufferNeedsBackup(buffer))
+ + ]
715 : 1 : do_freeze = true;
716 : : }
717 : : }
718 : : }
719 : : }
720 : : }
721 : :
722 [ + + ]: 361049 : if (do_freeze)
723 : : {
724 : : /*
725 : : * Validate the tuples we will be freezing before entering the
726 : : * critical section.
727 : : */
728 : 14927 : heap_pre_freeze_checks(buffer, prstate.frozen, prstate.nfrozen);
729 : : }
730 [ + + ]: 346122 : else if (prstate.nfrozen > 0)
731 : : {
732 : : /*
733 : : * The page contained some tuples that were not already frozen, and we
734 : : * chose not to freeze them now. The page won't be all-frozen then.
735 : : */
736 [ - + ]: 13748 : Assert(!prstate.pagefrz.freeze_required);
737 : :
738 : 13748 : prstate.all_frozen = false;
739 : 13748 : prstate.nfrozen = 0; /* avoid miscounts in instrumentation */
740 : : }
741 : : else
742 : : {
743 : : /*
744 : : * We have no freeze plans to execute. The page might already be
745 : : * all-frozen (perhaps only following pruning), though. Such pages
746 : : * can be marked all-frozen in the VM by our caller, even though none
747 : : * of its tuples were newly frozen here.
748 : : */
749 : : }
750 : :
751 : : /* Any error while applying the changes is critical */
11 heikki.linnakangas@i 752 :CBC 361049 : START_CRIT_SECTION();
753 : :
11 heikki.linnakangas@i 754 [ + + ]:GNC 361049 : if (do_hint)
755 : : {
756 : : /*
757 : : * Update the page's pd_prune_xid field to either zero, or the lowest
758 : : * XID of any soon-prunable tuple.
759 : : */
5881 tgl@sss.pgh.pa.us 760 :CBC 46018 : ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
761 : :
762 : : /*
763 : : * Also clear the "page is full" flag, since there's no point in
764 : : * repeating the prune/defrag process until something else happens to
765 : : * the page.
766 : : */
767 : 46018 : PageClearFull(page);
768 : :
769 : : /*
770 : : * If that's all we had to do to the page, this is a non-WAL-logged
771 : : * hint. If we are going to freeze or prune the page, we will mark
772 : : * the buffer dirty below.
773 : : */
11 heikki.linnakangas@i 774 [ + + + + ]:GNC 46018 : if (!do_freeze && !do_prune)
775 : 176 : MarkBufferDirtyHint(buffer, true);
776 : : }
777 : :
778 [ + + + + ]: 361049 : if (do_prune || do_freeze)
779 : : {
780 : : /* Apply the planned item changes and repair page fragmentation. */
781 [ + + ]: 59532 : if (do_prune)
782 : : {
783 : 46028 : heap_page_prune_execute(buffer, false,
784 : : prstate.redirected, prstate.nredirected,
785 : : prstate.nowdead, prstate.ndead,
786 : : prstate.nowunused, prstate.nunused);
787 : : }
788 : :
789 [ + + ]: 59532 : if (do_freeze)
790 : 14927 : heap_freeze_prepared_tuples(buffer, prstate.frozen, prstate.nfrozen);
791 : :
6051 tgl@sss.pgh.pa.us 792 :CBC 59532 : MarkBufferDirty(buffer);
793 : :
794 : : /*
795 : : * Emit a WAL XLOG_HEAP2_PRUNE_FREEZE record showing what we did
796 : : */
4871 rhaas@postgresql.org 797 [ + + + + : 59532 : if (RelationNeedsWAL(relation))
+ - + - ]
798 : : {
799 : : /*
800 : : * The snapshotConflictHorizon for the whole record should be the
801 : : * most conservative of all the horizons calculated for any of the
802 : : * possible modifications. If this record will prune tuples, any
803 : : * transactions on the standby older than the youngest xmax of the
804 : : * most recently removed tuple this record will prune will
805 : : * conflict. If this record will freeze tuples, any transactions
806 : : * on the standby with xids older than the youngest tuple this
807 : : * record will freeze will conflict.
808 : : */
11 heikki.linnakangas@i 809 :GNC 58724 : TransactionId frz_conflict_horizon = InvalidTransactionId;
810 : : TransactionId conflict_xid;
811 : :
812 : : /*
813 : : * We can use the visibility_cutoff_xid as our cutoff for
814 : : * conflicts when the whole page is eligible to become all-frozen
815 : : * in the VM once we're done with it. Otherwise we generate a
816 : : * conservative cutoff by stepping back from OldestXmin.
817 : : */
818 [ + + ]: 58724 : if (do_freeze)
819 : : {
820 [ + + + + ]: 14923 : if (prstate.all_visible && prstate.all_frozen)
821 : 12634 : frz_conflict_horizon = prstate.visibility_cutoff_xid;
822 : : else
823 : : {
824 : : /* Avoids false conflicts when hot_standby_feedback in use */
825 : 2289 : frz_conflict_horizon = prstate.cutoffs->OldestXmin;
826 [ - + ]: 2289 : TransactionIdRetreat(frz_conflict_horizon);
827 : : }
828 : : }
829 : :
830 [ + + ]: 58724 : if (TransactionIdFollows(frz_conflict_horizon, prstate.latest_xid_removed))
831 : 13791 : conflict_xid = frz_conflict_horizon;
832 : : else
833 : 44933 : conflict_xid = prstate.latest_xid_removed;
834 : :
20 835 : 58724 : log_heap_prune_and_freeze(relation, buffer,
836 : : conflict_xid,
837 : : true, reason,
838 : : prstate.frozen, prstate.nfrozen,
839 : : prstate.redirected, prstate.nredirected,
840 : : prstate.nowdead, prstate.ndead,
841 : : prstate.nowunused, prstate.nunused);
842 : : }
843 : : }
844 : :
6051 tgl@sss.pgh.pa.us 845 [ - + ]:CBC 361049 : END_CRIT_SECTION();
846 : :
847 : : /* Copy information back for caller */
13 heikki.linnakangas@i 848 :GNC 361049 : presult->ndeleted = prstate.ndeleted;
11 849 : 361049 : presult->nnewlpdead = prstate.ndead;
850 : 361049 : presult->nfrozen = prstate.nfrozen;
851 : 361049 : presult->live_tuples = prstate.live_tuples;
852 : 361049 : presult->recently_dead_tuples = prstate.recently_dead_tuples;
853 : :
854 : : /*
855 : : * It was convenient to ignore LP_DEAD items in all_visible earlier on to
856 : : * make the choice of whether or not to freeze the page unaffected by the
857 : : * short-term presence of LP_DEAD items. These LP_DEAD items were
858 : : * effectively assumed to be LP_UNUSED items in the making. It doesn't
859 : : * matter which vacuum heap pass (initial pass or final pass) ends up
860 : : * setting the page all-frozen, as long as the ongoing VACUUM does it.
861 : : *
862 : : * Now that freezing has been finalized, unset all_visible if there are
863 : : * any LP_DEAD items on the page. It needs to reflect the present state
864 : : * of the page, as expected by our caller.
865 : : */
866 [ + + + + ]: 361049 : if (prstate.all_visible && prstate.lpdead_items == 0)
867 : : {
868 : 161406 : presult->all_visible = prstate.all_visible;
869 : 161406 : presult->all_frozen = prstate.all_frozen;
870 : : }
871 : : else
872 : : {
873 : 199643 : presult->all_visible = false;
874 : 199643 : presult->all_frozen = false;
875 : : }
876 : :
877 : 361049 : presult->hastup = prstate.hastup;
878 : :
879 : : /*
880 : : * For callers planning to update the visibility map, the conflict horizon
881 : : * for that record must be the newest xmin on the page. However, if the
882 : : * page is completely frozen, there can be no conflict and the
883 : : * vm_conflict_horizon should remain InvalidTransactionId. This includes
884 : : * the case that we just froze all the tuples; the prune-freeze record
885 : : * included the conflict XID already so the caller doesn't need it.
886 : : */
887 [ + + ]: 361049 : if (presult->all_frozen)
888 : 151083 : presult->vm_conflict_horizon = InvalidTransactionId;
889 : : else
890 : 209966 : presult->vm_conflict_horizon = prstate.visibility_cutoff_xid;
891 : :
892 : 361049 : presult->lpdead_items = prstate.lpdead_items;
893 : : /* the presult->deadoffsets array was already filled in */
894 : :
895 [ + + ]: 361049 : if (prstate.freeze)
896 : : {
897 [ + + ]: 326233 : if (presult->nfrozen > 0)
898 : : {
899 : 14927 : *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
900 : 14927 : *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
901 : : }
902 : : else
903 : : {
904 : 311306 : *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
905 : 311306 : *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
906 : : }
907 : : }
6051 tgl@sss.pgh.pa.us 908 :GIC 361049 : }
909 : :
910 : :
911 : : /*
912 : : * Perform visibility checks for heap pruning.
913 : : */
914 : : static HTSV_Result
1341 andres@anarazel.de 915 :CBC 18027107 : heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
916 : : {
917 : : HTSV_Result res;
918 : : TransactionId dead_after;
919 : :
920 : 18027107 : res = HeapTupleSatisfiesVacuumHorizon(tup, buffer, &dead_after);
921 : :
922 [ + + ]: 18027107 : if (res != HEAPTUPLE_RECENTLY_DEAD)
923 : 16377830 : return res;
924 : :
925 [ + + ]: 1649277 : if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after))
926 : 1379152 : res = HEAPTUPLE_DEAD;
927 : :
928 : 1649277 : return res;
929 : : }
930 : :
931 : :
932 : : /*
933 : : * Pruning calculates tuple visibility once and saves the results in an array
934 : : * of int8. See PruneState.htsv for details. This helper function is meant
935 : : * to guard against examining visibility status array members which have not
936 : : * yet been computed.
937 : : */
938 : : static inline HTSV_Result
11 heikki.linnakangas@i 939 :GNC 18015458 : htsv_get_valid_status(int status)
940 : : {
941 [ + - - + ]: 18015458 : Assert(status >= HEAPTUPLE_DEAD &&
942 : : status <= HEAPTUPLE_DELETE_IN_PROGRESS);
943 : 18015458 : return (HTSV_Result) status;
944 : : }
945 : :
946 : : /*
947 : : * Prune specified line pointer or a HOT chain originating at line pointer.
948 : : *
949 : : * Tuple visibility information is provided in prstate->htsv.
950 : : *
951 : : * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
952 : : * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
953 : : * chain. We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
954 : : * This is OK because a RECENTLY_DEAD tuple preceding a DEAD tuple is really
955 : : * DEAD, our visibility test is just too coarse to detect it.
956 : : *
957 : : * Pruning must never leave behind a DEAD tuple that still has tuple storage.
958 : : * VACUUM isn't prepared to deal with that case.
959 : : *
960 : : * The root line pointer is redirected to the tuple immediately after the
961 : : * latest DEAD tuple. If all tuples in the chain are DEAD, the root line
962 : : * pointer is marked LP_DEAD. (This includes the case of a DEAD simple
963 : : * tuple, which we treat as a chain of length 1.)
964 : : *
965 : : * We don't actually change the page here. We just add entries to the arrays in
966 : : * prstate showing the changes to be made. Items to be redirected are added
967 : : * to the redirected[] array (two entries per redirection); items to be set to
968 : : * LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
969 : : * state are added to nowunused[]. We perform bookkeeping of live tuples,
970 : : * visibility etc. based on what the page will look like after the changes
971 : : * applied. All that bookkeeping is performed in the heap_prune_record_*()
972 : : * subroutines. The division of labor is that heap_prune_chain() decides the
973 : : * fate of each tuple, ie. whether it's going to be removed, redirected or
974 : : * left unchanged, and the heap_prune_record_*() subroutines update PruneState
975 : : * based on that outcome.
976 : : */
977 : : static void
13 978 : 17928029 : heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
979 : : OffsetNumber rootoffnum, PruneState *prstate)
980 : : {
5995 bruce@momjian.us 981 :CBC 17928029 : TransactionId priorXmax = InvalidTransactionId;
982 : : ItemId rootlp;
983 : : OffsetNumber offnum;
984 : : OffsetNumber chainitems[MaxHeapTuplesPerPage];
985 : :
986 : : /*
987 : : * After traversing the HOT chain, ndeadchain is the index in chainitems
988 : : * of the first live successor after the last dead item.
989 : : */
13 heikki.linnakangas@i 990 :GNC 17928029 : int ndeadchain = 0,
991 : 17928029 : nchain = 0;
992 : :
993 : 17928029 : rootlp = PageGetItemId(page, rootoffnum);
994 : :
995 : : /* Start from the root tuple */
6051 tgl@sss.pgh.pa.us 996 :CBC 17928029 : offnum = rootoffnum;
997 : :
998 : : /* while not end of the chain */
999 : : for (;;)
1000 : 320316 : {
1001 : : HeapTupleHeader htup;
1002 : : ItemId lp;
1003 : :
1004 : : /* Sanity check (pure paranoia) */
935 pg@bowt.ie 1005 [ - + ]: 18248345 : if (offnum < FirstOffsetNumber)
935 pg@bowt.ie 1006 :UBC 0 : break;
1007 : :
1008 : : /*
1009 : : * An offset past the end of page's line pointer array is possible
1010 : : * when the array was truncated (original item must have been unused)
1011 : : */
935 pg@bowt.ie 1012 [ - + ]:CBC 18248345 : if (offnum > maxoff)
6051 tgl@sss.pgh.pa.us 1013 :UBC 0 : break;
1014 : :
1015 : : /* If item is already processed, stop --- it must not be same chain */
13 heikki.linnakangas@i 1016 [ - + ]:GNC 18248345 : if (prstate->processed[offnum])
5881 tgl@sss.pgh.pa.us 1017 :LBC (1991) : break;
1018 : :
13 heikki.linnakangas@i 1019 :GNC 18248345 : lp = PageGetItemId(page, offnum);
1020 : :
1021 : : /*
1022 : : * Unused item obviously isn't part of the chain. Likewise, a dead
1023 : : * line pointer can't be part of the chain. Both of those cases were
1024 : : * already marked as processed.
1025 : : */
1026 [ - + ]: 18248345 : Assert(ItemIdIsUsed(lp));
1027 [ - + ]: 18248345 : Assert(!ItemIdIsDead(lp));
1028 : :
1029 : : /*
1030 : : * If we are looking at the redirected root line pointer, jump to the
1031 : : * first normal tuple in the chain. If we find a redirect somewhere
1032 : : * else, stop --- it must not be same chain.
1033 : : */
6051 tgl@sss.pgh.pa.us 1034 [ + + ]:CBC 18248345 : if (ItemIdIsRedirected(lp))
1035 : : {
1036 [ - + ]: 232887 : if (nchain > 0)
6051 tgl@sss.pgh.pa.us 1037 :UBC 0 : break; /* not at start of chain */
6051 tgl@sss.pgh.pa.us 1038 :CBC 232887 : chainitems[nchain++] = offnum;
1039 : 232887 : offnum = ItemIdGetRedirect(rootlp);
1040 : 232887 : continue;
1041 : : }
1042 : :
1043 [ - + ]: 18015458 : Assert(ItemIdIsNormal(lp));
1044 : :
13 heikki.linnakangas@i 1045 :GNC 18015458 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1046 : :
1047 : : /*
1048 : : * Check the tuple XMIN against prior XMAX, if any
1049 : : */
6051 tgl@sss.pgh.pa.us 1050 [ + + - + ]:CBC 18102887 : if (TransactionIdIsValid(priorXmax) &&
2355 alvherre@alvh.no-ip. 1051 [ + - ]: 87429 : !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
6051 tgl@sss.pgh.pa.us 1052 :UBC 0 : break;
1053 : :
1054 : : /*
1055 : : * OK, this tuple is indeed a member of the chain.
1056 : : */
6051 tgl@sss.pgh.pa.us 1057 :CBC 18015458 : chainitems[nchain++] = offnum;
1058 : :
11 heikki.linnakangas@i 1059 [ + + + - ]:GNC 18015458 : switch (htsv_get_valid_status(prstate->htsv[offnum]))
1060 : : {
6051 tgl@sss.pgh.pa.us 1061 :CBC 1414955 : case HEAPTUPLE_DEAD:
1062 : :
1063 : : /* Remember the last DEAD tuple seen */
13 heikki.linnakangas@i 1064 :GNC 1414955 : ndeadchain = nchain;
1065 : 1414955 : HeapTupleHeaderAdvanceConflictHorizon(htup,
1066 : : &prstate->latest_xid_removed);
1067 : : /* Advance to next chain member */
6051 tgl@sss.pgh.pa.us 1068 :CBC 1414955 : break;
1069 : :
1070 : 270125 : case HEAPTUPLE_RECENTLY_DEAD:
1071 : :
1072 : : /*
1073 : : * We don't need to advance the conflict horizon for
1074 : : * RECENTLY_DEAD tuples, even if we are removing them. This
1075 : : * is because we only remove RECENTLY_DEAD tuples if they
1076 : : * precede a DEAD tuple, and the DEAD tuple must have been
1077 : : * inserted by a newer transaction than the RECENTLY_DEAD
1078 : : * tuple by virtue of being later in the chain. We will have
1079 : : * advanced the conflict horizon for the DEAD tuple.
1080 : : */
1081 : :
1082 : : /*
1083 : : * Advance past RECENTLY_DEAD tuples just in case there's a
1084 : : * DEAD one after them. We have to make sure that we don't
1085 : : * miss any DEAD tuples, since DEAD tuples that still have
1086 : : * tuple storage after pruning will confuse VACUUM.
1087 : : */
1088 : 270125 : break;
1089 : :
1090 : 16330378 : case HEAPTUPLE_DELETE_IN_PROGRESS:
6051 tgl@sss.pgh.pa.us 1091 :ECB (13436962) : case HEAPTUPLE_LIVE:
1092 : : case HEAPTUPLE_INSERT_IN_PROGRESS:
13 heikki.linnakangas@i 1093 :GNC 16330378 : goto process_chain;
1094 : :
6051 tgl@sss.pgh.pa.us 1095 :UBC 0 : default:
1096 [ # # ]: 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1097 : : goto process_chain;
1098 : : }
1099 : :
1100 : : /*
1101 : : * If the tuple is not HOT-updated, then we are at the end of this
1102 : : * HOT-update chain.
1103 : : */
6051 tgl@sss.pgh.pa.us 1104 [ + + + - :CBC 1685080 : if (!HeapTupleHeaderIsHotUpdated(htup))
+ + ]
13 heikki.linnakangas@i 1105 :GNC 1597651 : goto process_chain;
1106 : :
1107 : : /* HOT implies it can't have moved to different partition */
2199 andres@anarazel.de 1108 [ - + ]:CBC 87429 : Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
1109 : :
1110 : : /*
1111 : : * Advance to next chain member.
1112 : : */
13 heikki.linnakangas@i 1113 [ - + ]:GNC 87429 : Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blockno);
6051 tgl@sss.pgh.pa.us 1114 :CBC 87429 : offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
4099 alvherre@alvh.no-ip. 1115 [ + - + + : 87429 : priorXmax = HeapTupleHeaderGetUpdateXid(htup);
+ - ]
1116 : : }
1117 : :
13 heikki.linnakangas@i 1118 [ # # # # ]:UNC 0 : if (ItemIdIsRedirected(rootlp) && nchain < 2)
1119 : : {
1120 : : /*
1121 : : * We found a redirect item that doesn't point to a valid follow-on
1122 : : * item. This can happen if the loop in heap_page_prune_and_freeze()
1123 : : * caused us to visit the dead successor of a redirect item before
1124 : : * visiting the redirect item. We can clean up by setting the
1125 : : * redirect item to LP_DEAD state or LP_UNUSED if the caller
1126 : : * indicated.
1127 : : */
1128 : 0 : heap_prune_record_dead_or_unused(prstate, rootoffnum, false);
1129 : 0 : return;
1130 : : }
1131 : :
1132 : 0 : process_chain:
1133 : :
13 heikki.linnakangas@i 1134 [ + + ]:GNC 17928029 : if (ndeadchain == 0)
1135 : : {
1136 : : /*
1137 : : * No DEAD tuple was found, so the chain is entirely composed of
1138 : : * normal, unchanged tuples. Leave it alone.
1139 : : */
11 1140 : 16541696 : int i = 0;
1141 : :
1142 [ + + ]: 16541696 : if (ItemIdIsRedirected(rootlp))
1143 : : {
1144 : 219132 : heap_prune_record_unchanged_lp_redirect(prstate, rootoffnum);
1145 : 219132 : i++;
1146 : : }
1147 [ + + ]: 33086139 : for (; i < nchain; i++)
1148 : 16544443 : heap_prune_record_unchanged_lp_normal(page, prstate, chainitems[i]);
1149 : : }
13 1150 [ + + ]: 1386333 : else if (ndeadchain == nchain)
1151 : : {
1152 : : /*
1153 : : * The entire chain is dead. Mark the root line pointer LP_DEAD, and
1154 : : * fully remove the other tuples in the chain.
1155 : : */
1156 : 1330633 : heap_prune_record_dead_or_unused(prstate, rootoffnum, ItemIdIsNormal(rootlp));
1157 [ + + ]: 1359445 : for (int i = 1; i < nchain; i++)
1158 : 28812 : heap_prune_record_unused(prstate, chainitems[i], true);
1159 : : }
1160 : : else
1161 : : {
1162 : : /*
1163 : : * We found a DEAD tuple in the chain. Redirect the root line pointer
1164 : : * to the first non-DEAD tuple, and mark as unused each intermediate
1165 : : * item that we are able to remove from the chain.
1166 : : */
1167 : 55700 : heap_prune_record_redirect(prstate, rootoffnum, chainitems[ndeadchain],
1168 : 55700 : ItemIdIsNormal(rootlp));
1169 [ + + ]: 69265 : for (int i = 1; i < ndeadchain; i++)
1170 : 13565 : heap_prune_record_unused(prstate, chainitems[i], true);
1171 : :
1172 : : /* the rest of tuples in the chain are normal, unchanged tuples */
1173 [ + + ]: 111760 : for (int i = ndeadchain; i < nchain; i++)
11 1174 : 56060 : heap_prune_record_unchanged_lp_normal(page, prstate, chainitems[i]);
1175 : : }
1176 : : }
1177 : :
1178 : : /* Record lowest soon-prunable XID */
1179 : : static void
5881 tgl@sss.pgh.pa.us 1180 :CBC 3931059 : heap_prune_record_prunable(PruneState *prstate, TransactionId xid)
1181 : : {
1182 : : /*
1183 : : * This should exactly match the PageSetPrunable macro. We can't store
1184 : : * directly into the page header yet, so we update working state.
1185 : : */
1186 [ - + ]: 3931059 : Assert(TransactionIdIsNormal(xid));
1187 [ + + + + ]: 7710089 : if (!TransactionIdIsValid(prstate->new_prune_xid) ||
1188 : 3779030 : TransactionIdPrecedes(xid, prstate->new_prune_xid))
1189 : 152913 : prstate->new_prune_xid = xid;
1190 : 3931059 : }
1191 : :
1192 : : /* Record line pointer to be redirected */
1193 : : static void
1194 : 55700 : heap_prune_record_redirect(PruneState *prstate,
1195 : : OffsetNumber offnum, OffsetNumber rdoffnum,
1196 : : bool was_normal)
1197 : : {
13 heikki.linnakangas@i 1198 [ - + ]:GNC 55700 : Assert(!prstate->processed[offnum]);
1199 : 55700 : prstate->processed[offnum] = true;
1200 : :
1201 : : /*
1202 : : * Do not mark the redirect target here. It needs to be counted
1203 : : * separately as an unchanged tuple.
1204 : : */
1205 : :
5881 tgl@sss.pgh.pa.us 1206 [ - + ]:CBC 55700 : Assert(prstate->nredirected < MaxHeapTuplesPerPage);
1207 : 55700 : prstate->redirected[prstate->nredirected * 2] = offnum;
1208 : 55700 : prstate->redirected[prstate->nredirected * 2 + 1] = rdoffnum;
1209 : :
1210 : 55700 : prstate->nredirected++;
1211 : :
1212 : : /*
1213 : : * If the root entry had been a normal tuple, we are deleting it, so count
1214 : : * it in the result. But changing a redirect (even to DEAD state) doesn't
1215 : : * count.
1216 : : */
13 heikki.linnakangas@i 1217 [ + + ]:GNC 55700 : if (was_normal)
1218 : 49559 : prstate->ndeleted++;
1219 : :
11 1220 : 55700 : prstate->hastup = true;
6051 tgl@sss.pgh.pa.us 1221 :CBC 55700 : }
1222 : :
1223 : : /* Record line pointer to be marked dead */
1224 : : static void
13 heikki.linnakangas@i 1225 :GNC 1296906 : heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum,
1226 : : bool was_normal)
1227 : : {
1228 [ - + ]: 1296906 : Assert(!prstate->processed[offnum]);
1229 : 1296906 : prstate->processed[offnum] = true;
1230 : :
5881 tgl@sss.pgh.pa.us 1231 [ - + ]:CBC 1296906 : Assert(prstate->ndead < MaxHeapTuplesPerPage);
1232 : 1296906 : prstate->nowdead[prstate->ndead] = offnum;
1233 : 1296906 : prstate->ndead++;
1234 : :
1235 : : /*
1236 : : * Deliberately delay unsetting all_visible until later during pruning.
1237 : : * Removable dead tuples shouldn't preclude freezing the page.
1238 : : */
1239 : :
1240 : : /* Record the dead offset for vacuum */
11 heikki.linnakangas@i 1241 :GNC 1296906 : prstate->deadoffsets[prstate->lpdead_items++] = offnum;
1242 : :
1243 : : /*
1244 : : * If the root entry had been a normal tuple, we are deleting it, so count
1245 : : * it in the result. But changing a redirect (even to DEAD state) doesn't
1246 : : * count.
1247 : : */
13 1248 [ + + ]: 1296906 : if (was_normal)
1249 : 1289292 : prstate->ndeleted++;
6051 tgl@sss.pgh.pa.us 1250 : 1296906 : }
1251 : :
1252 : : /*
1253 : : * Depending on whether or not the caller set mark_unused_now to true, record that a
1254 : : * line pointer should be marked LP_DEAD or LP_UNUSED. There are other cases in
1255 : : * which we will mark line pointers LP_UNUSED, but we will not mark line
1256 : : * pointers LP_DEAD if mark_unused_now is true.
1257 : : */
1258 : : static void
13 heikki.linnakangas@i 1259 : 1330633 : heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum,
1260 : : bool was_normal)
1261 : : {
1262 : : /*
1263 : : * If the caller set mark_unused_now to true, we can remove dead tuples
1264 : : * during pruning instead of marking their line pointers dead. Set this
1265 : : * tuple's line pointer LP_UNUSED. We hint that this option is less
1266 : : * likely.
1267 : : */
87 rhaas@postgresql.org 1268 [ + + ]: 1330633 : if (unlikely(prstate->mark_unused_now))
13 heikki.linnakangas@i 1269 : 33727 : heap_prune_record_unused(prstate, offnum, was_normal);
1270 : : else
1271 : 1296906 : heap_prune_record_dead(prstate, offnum, was_normal);
87 rhaas@postgresql.org 1272 :CBC 1330633 : }
1273 : :
1274 : : /* Record line pointer to be marked unused */
1275 : : static void
13 heikki.linnakangas@i 1276 :GNC 79583 : heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal)
1277 : : {
1278 [ - + ]: 79583 : Assert(!prstate->processed[offnum]);
1279 : 79583 : prstate->processed[offnum] = true;
1280 : :
5881 tgl@sss.pgh.pa.us 1281 [ - + ]:CBC 79583 : Assert(prstate->nunused < MaxHeapTuplesPerPage);
1282 : 79583 : prstate->nowunused[prstate->nunused] = offnum;
1283 : 79583 : prstate->nunused++;
1284 : :
1285 : : /*
1286 : : * If the root entry had been a normal tuple, we are deleting it, so count
1287 : : * it in the result. But changing a redirect (even to DEAD state) doesn't
1288 : : * count.
1289 : : */
13 heikki.linnakangas@i 1290 [ + + ]:GNC 79583 : if (was_normal)
1291 : 77743 : prstate->ndeleted++;
5881 tgl@sss.pgh.pa.us 1292 : 79583 : }
1293 : :
1294 : : /*
1295 : : * Record an unused line pointer that is left unchanged.
1296 : : */
1297 : : static void
11 heikki.linnakangas@i 1298 : 231137 : heap_prune_record_unchanged_lp_unused(Page page, PruneState *prstate, OffsetNumber offnum)
1299 : : {
1300 [ - + ]: 231137 : Assert(!prstate->processed[offnum]);
1301 : 231137 : prstate->processed[offnum] = true;
1302 : 231137 : }
1303 : :
1304 : : /*
1305 : : * Record line pointer that is left unchanged. We consider freezing it, and
1306 : : * update bookkeeping of tuple counts and page visibility.
1307 : : */
1308 : : static void
1309 : 16610513 : heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumber offnum)
1310 : : {
1311 : : HeapTupleHeader htup;
1312 : :
1313 [ - + ]: 16610513 : Assert(!prstate->processed[offnum]);
1314 : 16610513 : prstate->processed[offnum] = true;
1315 : :
1316 : 16610513 : prstate->hastup = true; /* the page is not empty */
1317 : :
1318 : : /*
1319 : : * The criteria for counting a tuple as live in this block need to match
1320 : : * what analyze.c's acquire_sample_rows() does, otherwise VACUUM and
1321 : : * ANALYZE may produce wildly different reltuples values, e.g. when there
1322 : : * are many recently-dead tuples.
1323 : : *
1324 : : * The logic here is a bit simpler than acquire_sample_rows(), as VACUUM
1325 : : * can't run inside a transaction block, which makes some cases impossible
1326 : : * (e.g. in-progress insert from the same transaction).
1327 : : *
1328 : : * HEAPTUPLE_DEAD are handled by the other heap_prune_record_*()
1329 : : * subroutines. They don't count dead items like acquire_sample_rows()
1330 : : * does, because we assume that all dead items will become LP_UNUSED
1331 : : * before VACUUM finishes. This difference is only superficial. VACUUM
1332 : : * effectively agrees with ANALYZE about DEAD items, in the end. VACUUM
1333 : : * won't remember LP_DEAD items, but only because they're not supposed to
1334 : : * be left behind when it is done. (Cases where we bypass index vacuuming
1335 : : * will violate this optimistic assumption, but the overall impact of that
1336 : : * should be negligible.)
1337 : : */
1338 : 16610513 : htup = (HeapTupleHeader) PageGetItem(page, PageGetItemId(page, offnum));
1339 : :
1340 [ + + + + : 16610513 : switch (prstate->htsv[offnum])
- ]
1341 : : {
1342 : 12621160 : case HEAPTUPLE_LIVE:
1343 : :
1344 : : /*
1345 : : * Count it as live. Not only is this natural, but it's also what
1346 : : * acquire_sample_rows() does.
1347 : : */
1348 : 12621160 : prstate->live_tuples++;
1349 : :
1350 : : /*
1351 : : * Is the tuple definitely visible to all transactions?
1352 : : *
1353 : : * NB: Like with per-tuple hint bits, we can't set the
1354 : : * PD_ALL_VISIBLE flag if the inserter committed asynchronously.
1355 : : * See SetHintBits for more info. Check that the tuple is hinted
1356 : : * xmin-committed because of that.
1357 : : */
1358 [ + + ]: 12621160 : if (prstate->all_visible)
1359 : : {
1360 : : TransactionId xmin;
1361 : :
1362 [ + + ]: 9655412 : if (!HeapTupleHeaderXminCommitted(htup))
1363 : : {
1364 : 94 : prstate->all_visible = false;
1365 : 94 : break;
1366 : : }
1367 : :
1368 : : /*
1369 : : * The inserter definitely committed. But is it old enough
1370 : : * that everyone sees it as committed? A FrozenTransactionId
1371 : : * is seen as committed to everyone. Otherwise, we check if
1372 : : * there is a snapshot that considers this xid to still be
1373 : : * running, and if so, we don't consider the page all-visible.
1374 : : */
1375 [ + + ]: 9655318 : xmin = HeapTupleHeaderGetXmin(htup);
1376 : :
1377 : : /*
1378 : : * For now always use prstate->cutoffs for this test, because
1379 : : * we only update 'all_visible' when freezing is requested. We
1380 : : * could use GlobalVisTestIsRemovableXid instead, if a
1381 : : * non-freezing caller wanted to set the VM bit.
1382 : : */
1383 [ - + ]: 9655318 : Assert(prstate->cutoffs);
1384 [ + + ]: 9655318 : if (!TransactionIdPrecedes(xmin, prstate->cutoffs->OldestXmin))
1385 : : {
1386 : 2392 : prstate->all_visible = false;
1387 : 2392 : break;
1388 : : }
1389 : :
1390 : : /* Track newest xmin on page. */
1391 [ + + + + ]: 9652926 : if (TransactionIdFollows(xmin, prstate->visibility_cutoff_xid) &&
1392 : : TransactionIdIsNormal(xmin))
1393 : 86143 : prstate->visibility_cutoff_xid = xmin;
1394 : : }
1395 : 12618674 : break;
1396 : :
1397 : 270125 : case HEAPTUPLE_RECENTLY_DEAD:
1398 : 270125 : prstate->recently_dead_tuples++;
1399 : 270125 : prstate->all_visible = false;
1400 : :
1401 : : /*
1402 : : * This tuple will soon become DEAD. Update the hint field so
1403 : : * that the page is reconsidered for pruning in future.
1404 : : */
1405 : 270125 : heap_prune_record_prunable(prstate,
1406 [ + - - + : 270125 : HeapTupleHeaderGetUpdateXid(htup));
- - ]
1407 : 270125 : break;
1408 : :
1409 : 58294 : case HEAPTUPLE_INSERT_IN_PROGRESS:
1410 : :
1411 : : /*
1412 : : * We do not count these rows as live, because we expect the
1413 : : * inserting transaction to update the counters at commit, and we
1414 : : * assume that will happen only after we report our results. This
1415 : : * assumption is a bit shaky, but it is what acquire_sample_rows()
1416 : : * does, so be consistent.
1417 : : */
1418 : 58294 : prstate->all_visible = false;
1419 : :
1420 : : /*
1421 : : * If we wanted to optimize for aborts, we might consider marking
1422 : : * the page prunable when we see INSERT_IN_PROGRESS. But we
1423 : : * don't. See related decisions about when to mark the page
1424 : : * prunable in heapam.c.
1425 : : */
1426 : 58294 : break;
1427 : :
1428 : 3660934 : case HEAPTUPLE_DELETE_IN_PROGRESS:
1429 : :
1430 : : /*
1431 : : * This an expected case during concurrent vacuum. Count such
1432 : : * rows as live. As above, we assume the deleting transaction
1433 : : * will commit and update the counters after we report.
1434 : : */
1435 : 3660934 : prstate->live_tuples++;
1436 : 3660934 : prstate->all_visible = false;
1437 : :
1438 : : /*
1439 : : * This tuple may soon become DEAD. Update the hint field so that
1440 : : * the page is reconsidered for pruning in future.
1441 : : */
1442 : 3660934 : heap_prune_record_prunable(prstate,
1443 [ + - - + : 3660934 : HeapTupleHeaderGetUpdateXid(htup));
- - ]
1444 : 3660934 : break;
1445 : :
11 heikki.linnakangas@i 1446 :UNC 0 : default:
1447 : :
1448 : : /*
1449 : : * DEAD tuples should've been passed to heap_prune_record_dead()
1450 : : * or heap_prune_record_unused() instead.
1451 : : */
1452 [ # # ]: 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result %d",
1453 : : prstate->htsv[offnum]);
1454 : : break;
1455 : : }
1456 : :
1457 : : /* Consider freezing any normal tuples which will not be removed */
11 heikki.linnakangas@i 1458 [ + + ]:GNC 16610513 : if (prstate->freeze)
1459 : : {
1460 : : bool totally_frozen;
1461 : :
1462 [ + + ]: 15306115 : if ((heap_prepare_freeze_tuple(htup,
1463 : 15306115 : prstate->cutoffs,
1464 : : &prstate->pagefrz,
1465 : 15306115 : &prstate->frozen[prstate->nfrozen],
1466 : : &totally_frozen)))
1467 : : {
1468 : : /* Save prepared freeze plan for later */
1469 : 1906873 : prstate->frozen[prstate->nfrozen++].offset = offnum;
1470 : : }
1471 : :
1472 : : /*
1473 : : * If any tuple isn't either totally frozen already or eligible to
1474 : : * become totally frozen (according to its freeze plan), then the page
1475 : : * definitely cannot be set all-frozen in the visibility map later on.
1476 : : */
1477 [ + + ]: 15306115 : if (!totally_frozen)
1478 : 4223189 : prstate->all_frozen = false;
1479 : : }
11 heikki.linnakangas@i 1480 :CBC 16610513 : }
1481 : :
1482 : :
1483 : : /*
1484 : : * Record line pointer that was already LP_DEAD and is left unchanged.
1485 : : */
1486 : : static void
11 heikki.linnakangas@i 1487 :GNC 886328 : heap_prune_record_unchanged_lp_dead(Page page, PruneState *prstate, OffsetNumber offnum)
1488 : : {
1489 [ - + ]: 886328 : Assert(!prstate->processed[offnum]);
1490 : 886328 : prstate->processed[offnum] = true;
1491 : :
1492 : : /*
1493 : : * Deliberately don't set hastup for LP_DEAD items. We make the soft
1494 : : * assumption that any LP_DEAD items encountered here will become
1495 : : * LP_UNUSED later on, before count_nondeletable_pages is reached. If we
1496 : : * don't make this assumption then rel truncation will only happen every
1497 : : * other VACUUM, at most. Besides, VACUUM must treat
1498 : : * hastup/nonempty_pages as provisional no matter how LP_DEAD items are
1499 : : * handled (handled here, or handled later on).
1500 : : *
1501 : : * Similarly, don't unset all_visible until later, at the end of
1502 : : * heap_page_prune_and_freeze(). This will allow us to attempt to freeze
1503 : : * the page after pruning. As long as we unset it before updating the
1504 : : * visibility map, this will be correct.
1505 : : */
1506 : :
1507 : : /* Record the dead offset for vacuum */
1508 : 886328 : prstate->deadoffsets[prstate->lpdead_items++] = offnum;
1509 : 886328 : }
1510 : :
1511 : : /*
1512 : : * Record LP_REDIRECT that is left unchanged.
1513 : : */
1514 : : static void
1515 : 219132 : heap_prune_record_unchanged_lp_redirect(PruneState *prstate, OffsetNumber offnum)
1516 : : {
1517 : : /*
1518 : : * A redirect line pointer doesn't count as a live tuple.
1519 : : *
1520 : : * If we leave a redirect line pointer in place, there will be another
1521 : : * tuple on the page that it points to. We will do the bookkeeping for
1522 : : * that separately. So we have nothing to do here, except remember that
1523 : : * we processed this item.
1524 : : */
13 1525 [ - + ]: 219132 : Assert(!prstate->processed[offnum]);
1526 : 219132 : prstate->processed[offnum] = true;
1527 : 219132 : }
1528 : :
1529 : : /*
1530 : : * Perform the actual page changes needed by heap_page_prune_and_freeze().
1531 : : *
1532 : : * If 'lp_truncate_only' is set, we are merely marking LP_DEAD line pointers
1533 : : * as unused, not redirecting or removing anything else. The
1534 : : * PageRepairFragmentation() call is skipped in that case.
1535 : : *
1536 : : * If 'lp_truncate_only' is not set, the caller must hold a cleanup lock on
1537 : : * the buffer. If it is set, an ordinary exclusive lock suffices.
1538 : : */
1539 : : void
20 1540 : 57749 : heap_page_prune_execute(Buffer buffer, bool lp_truncate_only,
1541 : : OffsetNumber *redirected, int nredirected,
1542 : : OffsetNumber *nowdead, int ndead,
1543 : : OffsetNumber *nowunused, int nunused)
1544 : : {
2916 kgrittn@postgresql.o 1545 :CBC 57749 : Page page = (Page) BufferGetPage(buffer);
1546 : : OffsetNumber *offnum;
1547 : : HeapTupleHeader htup PG_USED_FOR_ASSERTS_ONLY;
1548 : :
1549 : : /* Shouldn't be called unless there's something to do */
1104 pg@bowt.ie 1550 [ + + + + : 57749 : Assert(nredirected > 0 || ndead > 0 || nunused > 0);
- + ]
1551 : :
1552 : : /* If 'lp_truncate_only', we can only remove already-dead line pointers */
20 heikki.linnakangas@i 1553 [ + + + - :GNC 57749 : Assert(!lp_truncate_only || (nredirected == 0 && ndead == 0));
- + ]
1554 : :
1555 : : /* Update all redirected line pointers */
5881 tgl@sss.pgh.pa.us 1556 :CBC 57749 : offnum = redirected;
892 pg@bowt.ie 1557 [ + + ]: 129582 : for (int i = 0; i < nredirected; i++)
1558 : : {
5881 tgl@sss.pgh.pa.us 1559 : 71833 : OffsetNumber fromoff = *offnum++;
1560 : 71833 : OffsetNumber tooff = *offnum++;
1561 : 71833 : ItemId fromlp = PageGetItemId(page, fromoff);
1562 : : ItemId tolp PG_USED_FOR_ASSERTS_ONLY;
1563 : :
1564 : : #ifdef USE_ASSERT_CHECKING
1565 : :
1566 : : /*
1567 : : * Any existing item that we set as an LP_REDIRECT (any 'from' item)
1568 : : * must be the first item from a HOT chain. If the item has tuple
1569 : : * storage then it can't be a heap-only tuple. Otherwise we are just
1570 : : * maintaining an existing LP_REDIRECT from an existing HOT chain that
1571 : : * has been pruned at least once before now.
1572 : : */
892 pg@bowt.ie 1573 [ + + ]: 71833 : if (!ItemIdIsRedirected(fromlp))
1574 : : {
1575 [ + - - + ]: 65417 : Assert(ItemIdHasStorage(fromlp) && ItemIdIsNormal(fromlp));
1576 : :
1577 : 65417 : htup = (HeapTupleHeader) PageGetItem(page, fromlp);
1578 [ - + ]: 65417 : Assert(!HeapTupleHeaderIsHeapOnly(htup));
1579 : : }
1580 : : else
1581 : : {
1582 : : /* We shouldn't need to redundantly set the redirect */
1583 [ - + ]: 6416 : Assert(ItemIdGetRedirect(fromlp) != tooff);
1584 : : }
1585 : :
1586 : : /*
1587 : : * The item that we're about to set as an LP_REDIRECT (the 'from'
1588 : : * item) will point to an existing item (the 'to' item) that is
1589 : : * already a heap-only tuple. There can be at most one LP_REDIRECT
1590 : : * item per HOT chain.
1591 : : *
1592 : : * We need to keep around an LP_REDIRECT item (after original
1593 : : * non-heap-only root tuple gets pruned away) so that it's always
1594 : : * possible for VACUUM to easily figure out what TID to delete from
1595 : : * indexes when an entire HOT chain becomes dead. A heap-only tuple
1596 : : * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
1597 : : * tuple can.
1598 : : *
1599 : : * This check may miss problems, e.g. the target of a redirect could
1600 : : * be marked as unused subsequently. The page_verify_redirects() check
1601 : : * below will catch such problems.
1602 : : */
1603 : 71833 : tolp = PageGetItemId(page, tooff);
1604 [ + - - + ]: 71833 : Assert(ItemIdHasStorage(tolp) && ItemIdIsNormal(tolp));
1605 : 71833 : htup = (HeapTupleHeader) PageGetItem(page, tolp);
1606 [ - + ]: 71833 : Assert(HeapTupleHeaderIsHeapOnly(htup));
1607 : : #endif
1608 : :
5179 tgl@sss.pgh.pa.us 1609 : 71833 : ItemIdSetRedirect(fromlp, tooff);
1610 : : }
1611 : :
1612 : : /* Update all now-dead line pointers */
5881 1613 : 57749 : offnum = nowdead;
892 pg@bowt.ie 1614 [ + + ]: 1671244 : for (int i = 0; i < ndead; i++)
1615 : : {
5881 tgl@sss.pgh.pa.us 1616 : 1613495 : OffsetNumber off = *offnum++;
1617 : 1613495 : ItemId lp = PageGetItemId(page, off);
1618 : :
1619 : : #ifdef USE_ASSERT_CHECKING
1620 : :
1621 : : /*
1622 : : * An LP_DEAD line pointer must be left behind when the original item
1623 : : * (which is dead to everybody) could still be referenced by a TID in
1624 : : * an index. This should never be necessary with any individual
1625 : : * heap-only tuple item, though. (It's not clear how much of a problem
1626 : : * that would be, but there is no reason to allow it.)
1627 : : */
892 pg@bowt.ie 1628 [ + + ]: 1613495 : if (ItemIdHasStorage(lp))
1629 : : {
1630 [ - + ]: 1604829 : Assert(ItemIdIsNormal(lp));
1631 : 1604829 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1632 [ - + ]: 1604829 : Assert(!HeapTupleHeaderIsHeapOnly(htup));
1633 : : }
1634 : : else
1635 : : {
1636 : : /* Whole HOT chain becomes dead */
1637 [ - + ]: 8666 : Assert(ItemIdIsRedirected(lp));
1638 : : }
1639 : : #endif
1640 : :
5881 tgl@sss.pgh.pa.us 1641 : 1613495 : ItemIdSetDead(lp);
1642 : : }
1643 : :
1644 : : /* Update all now-unused line pointers */
1645 : 57749 : offnum = nowunused;
892 pg@bowt.ie 1646 [ + + ]: 389318 : for (int i = 0; i < nunused; i++)
1647 : : {
5881 tgl@sss.pgh.pa.us 1648 : 331569 : OffsetNumber off = *offnum++;
1649 : 331569 : ItemId lp = PageGetItemId(page, off);
1650 : :
1651 : : #ifdef USE_ASSERT_CHECKING
1652 : :
20 heikki.linnakangas@i 1653 [ + + ]:GNC 331569 : if (lp_truncate_only)
1654 : : {
1655 : : /* Setting LP_DEAD to LP_UNUSED in vacuum's second pass */
1656 [ + - - + ]: 230520 : Assert(ItemIdIsDead(lp) && !ItemIdHasStorage(lp));
1657 : : }
1658 : : else
1659 : : {
1660 : : /*
1661 : : * When heap_page_prune_and_freeze() was called, mark_unused_now
1662 : : * may have been passed as true, which allows would-be LP_DEAD
1663 : : * items to be made LP_UNUSED instead. This is only possible if
1664 : : * the relation has no indexes. If there are any dead items, then
1665 : : * mark_unused_now was not true and every item being marked
1666 : : * LP_UNUSED must refer to a heap-only tuple.
1667 : : */
1668 [ + + ]: 101049 : if (ndead > 0)
1669 : : {
1670 [ + - - + ]: 41111 : Assert(ItemIdHasStorage(lp) && ItemIdIsNormal(lp));
1671 : 41111 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1672 [ - + ]: 41111 : Assert(HeapTupleHeaderIsHeapOnly(htup));
1673 : : }
1674 : : else
1675 [ - + ]: 59938 : Assert(ItemIdIsUsed(lp));
1676 : : }
1677 : :
1678 : : #endif
1679 : :
5881 tgl@sss.pgh.pa.us 1680 :CBC 331569 : ItemIdSetUnused(lp);
1681 : : }
1682 : :
20 heikki.linnakangas@i 1683 [ + + ]:GNC 57749 : if (lp_truncate_only)
1684 : 3485 : PageTruncateLinePointerArray(page);
1685 : : else
1686 : : {
1687 : : /*
1688 : : * Finally, repair any fragmentation, and update the page's hint bit
1689 : : * about whether it has free pointers.
1690 : : */
1691 : 54264 : PageRepairFragmentation(page);
1692 : :
1693 : : /*
1694 : : * Now that the page has been modified, assert that redirect items
1695 : : * still point to valid targets.
1696 : : */
1697 : 54264 : page_verify_redirects(page);
1698 : : }
874 andres@anarazel.de 1699 :CBC 57749 : }
1700 : :
1701 : :
1702 : : /*
1703 : : * If built with assertions, verify that all LP_REDIRECT items point to a
1704 : : * valid item.
1705 : : *
1706 : : * One way that bugs related to HOT pruning show is redirect items pointing to
1707 : : * removed tuples. It's not trivial to reliably check that marking an item
1708 : : * unused will not orphan a redirect item during heap_prune_chain() /
1709 : : * heap_page_prune_execute(), so we additionally check the whole page after
1710 : : * pruning. Without this check such bugs would typically only cause asserts
1711 : : * later, potentially well after the corruption has been introduced.
1712 : : *
1713 : : * Also check comments in heap_page_prune_execute()'s redirection loop.
1714 : : */
1715 : : static void
1716 : 54264 : page_verify_redirects(Page page)
1717 : : {
1718 : : #ifdef USE_ASSERT_CHECKING
1719 : : OffsetNumber offnum;
1720 : : OffsetNumber maxoff;
1721 : :
1722 : 54264 : maxoff = PageGetMaxOffsetNumber(page);
1723 : 54264 : for (offnum = FirstOffsetNumber;
1724 [ + + ]: 4676050 : offnum <= maxoff;
1725 : 4621786 : offnum = OffsetNumberNext(offnum))
1726 : : {
1727 : 4621786 : ItemId itemid = PageGetItemId(page, offnum);
1728 : : OffsetNumber targoff;
1729 : : ItemId targitem;
1730 : : HeapTupleHeader htup;
1731 : :
1732 [ + + ]: 4621786 : if (!ItemIdIsRedirected(itemid))
1733 : 4447441 : continue;
1734 : :
1735 : 174345 : targoff = ItemIdGetRedirect(itemid);
1736 : 174345 : targitem = PageGetItemId(page, targoff);
1737 : :
1738 [ - + ]: 174345 : Assert(ItemIdIsUsed(targitem));
1739 [ - + ]: 174345 : Assert(ItemIdIsNormal(targitem));
1740 [ - + ]: 174345 : Assert(ItemIdHasStorage(targitem));
1741 : 174345 : htup = (HeapTupleHeader) PageGetItem(page, targitem);
1742 [ - + ]: 174345 : Assert(HeapTupleHeaderIsHeapOnly(htup));
1743 : : }
1744 : : #endif
6051 tgl@sss.pgh.pa.us 1745 : 54264 : }
1746 : :
1747 : :
1748 : : /*
1749 : : * For all items in this page, find their respective root line pointers.
1750 : : * If item k is part of a HOT-chain with root at item j, then we set
1751 : : * root_offsets[k - 1] = j.
1752 : : *
1753 : : * The passed-in root_offsets array must have MaxHeapTuplesPerPage entries.
1754 : : * Unused entries are filled with InvalidOffsetNumber (zero).
1755 : : *
1756 : : * The function must be called with at least share lock on the buffer, to
1757 : : * prevent concurrent prune operations.
1758 : : *
1759 : : * Note: The information collected here is valid only as long as the caller
1760 : : * holds a pin on the buffer. Once pin is released, a tuple might be pruned
1761 : : * and reused by a completely unrelated tuple.
1762 : : */
1763 : : void
1764 : 101050 : heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
1765 : : {
1766 : : OffsetNumber offnum,
1767 : : maxoff;
1768 : :
1340 alvherre@alvh.no-ip. 1769 [ + - - + : 101050 : MemSet(root_offsets, InvalidOffsetNumber,
- - - - -
- ]
1770 : : MaxHeapTuplesPerPage * sizeof(OffsetNumber));
1771 : :
6051 tgl@sss.pgh.pa.us 1772 : 101050 : maxoff = PageGetMaxOffsetNumber(page);
5815 bruce@momjian.us 1773 [ + + ]: 8421812 : for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
1774 : : {
5995 1775 : 8320762 : ItemId lp = PageGetItemId(page, offnum);
1776 : : HeapTupleHeader htup;
1777 : : OffsetNumber nextoffnum;
1778 : : TransactionId priorXmax;
1779 : :
1780 : : /* skip unused and dead items */
6051 tgl@sss.pgh.pa.us 1781 [ + + + + ]: 8320762 : if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
1782 : 12511 : continue;
1783 : :
1784 [ + + ]: 8308251 : if (ItemIdIsNormal(lp))
1785 : : {
1786 : 8304237 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1787 : :
1788 : : /*
1789 : : * Check if this tuple is part of a HOT-chain rooted at some other
1790 : : * tuple. If so, skip it for now; we'll process it when we find
1791 : : * its root.
1792 : : */
1793 [ + + ]: 8304237 : if (HeapTupleHeaderIsHeapOnly(htup))
1794 : 4311 : continue;
1795 : :
1796 : : /*
1797 : : * This is either a plain tuple or the root of a HOT-chain.
1798 : : * Remember it in the mapping.
1799 : : */
1800 : 8299926 : root_offsets[offnum - 1] = offnum;
1801 : :
1802 : : /* If it's not the start of a HOT-chain, we're done with it */
1803 [ + + + - : 8299926 : if (!HeapTupleHeaderIsHotUpdated(htup))
- + ]
1804 : 8299699 : continue;
1805 : :
1806 : : /* Set up to scan the HOT-chain */
1807 : 227 : nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
4099 alvherre@alvh.no-ip. 1808 [ + - - + : 227 : priorXmax = HeapTupleHeaderGetUpdateXid(htup);
- - ]
1809 : : }
1810 : : else
1811 : : {
1812 : : /* Must be a redirect item. We do not set its root_offsets entry */
6051 tgl@sss.pgh.pa.us 1813 [ - + ]: 4014 : Assert(ItemIdIsRedirected(lp));
1814 : : /* Set up to scan the HOT-chain */
1815 : 4014 : nextoffnum = ItemIdGetRedirect(lp);
1816 : 4014 : priorXmax = InvalidTransactionId;
1817 : : }
1818 : :
1819 : : /*
1820 : : * Now follow the HOT-chain and collect other tuples in the chain.
1821 : : *
1822 : : * Note: Even though this is a nested loop, the complexity of the
1823 : : * function is O(N) because a tuple in the page should be visited not
1824 : : * more than twice, once in the outer loop and once in HOT-chain
1825 : : * chases.
1826 : : */
1827 : : for (;;)
1828 : : {
1829 : : /* Sanity check (pure paranoia) */
935 pg@bowt.ie 1830 [ - + ]: 4311 : if (offnum < FirstOffsetNumber)
935 pg@bowt.ie 1831 :UBC 0 : break;
1832 : :
1833 : : /*
1834 : : * An offset past the end of page's line pointer array is possible
1835 : : * when the array was truncated
1836 : : */
935 pg@bowt.ie 1837 [ - + ]:CBC 4311 : if (offnum > maxoff)
1103 pg@bowt.ie 1838 :UBC 0 : break;
1839 : :
6051 tgl@sss.pgh.pa.us 1840 :CBC 4311 : lp = PageGetItemId(page, nextoffnum);
1841 : :
1842 : : /* Check for broken chains */
1843 [ - + ]: 4311 : if (!ItemIdIsNormal(lp))
6051 tgl@sss.pgh.pa.us 1844 :UBC 0 : break;
1845 : :
6051 tgl@sss.pgh.pa.us 1846 :CBC 4311 : htup = (HeapTupleHeader) PageGetItem(page, lp);
1847 : :
1848 [ + + - + ]: 4608 : if (TransactionIdIsValid(priorXmax) &&
2355 alvherre@alvh.no-ip. 1849 [ + - ]: 297 : !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
6051 tgl@sss.pgh.pa.us 1850 :UBC 0 : break;
1851 : :
1852 : : /* Remember the root line pointer for this item */
6051 tgl@sss.pgh.pa.us 1853 :CBC 4311 : root_offsets[nextoffnum - 1] = offnum;
1854 : :
1855 : : /* Advance to next chain member, if any */
1856 [ + + + - : 4311 : if (!HeapTupleHeaderIsHotUpdated(htup))
+ - ]
1857 : : break;
1858 : :
1859 : : /* HOT implies it can't have moved to different partition */
2199 andres@anarazel.de 1860 [ - + ]: 70 : Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
1861 : :
6051 tgl@sss.pgh.pa.us 1862 : 70 : nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
4099 alvherre@alvh.no-ip. 1863 [ + - - + : 70 : priorXmax = HeapTupleHeaderGetUpdateXid(htup);
- - ]
1864 : : }
1865 : : }
6051 tgl@sss.pgh.pa.us 1866 : 101050 : }
1867 : :
1868 : :
1869 : : /*
1870 : : * Compare fields that describe actions required to freeze tuple with caller's
1871 : : * open plan. If everything matches then the frz tuple plan is equivalent to
1872 : : * caller's plan.
1873 : : */
1874 : : static inline bool
20 heikki.linnakangas@i 1875 :GNC 608128 : heap_log_freeze_eq(xlhp_freeze_plan *plan, HeapTupleFreeze *frz)
1876 : : {
1877 [ + + ]: 608128 : if (plan->xmax == frz->xmax &&
1878 [ + + ]: 606829 : plan->t_infomask2 == frz->t_infomask2 &&
1879 [ + + ]: 606123 : plan->t_infomask == frz->t_infomask &&
1880 [ + - ]: 604292 : plan->frzflags == frz->frzflags)
1881 : 604292 : return true;
1882 : :
1883 : : /* Caller must call heap_log_freeze_new_plan again for frz */
1884 : 3836 : return false;
1885 : : }
1886 : :
1887 : : /*
1888 : : * Comparator used to deduplicate XLOG_HEAP2_FREEZE_PAGE freeze plans
1889 : : */
1890 : : static int
1891 : 936656 : heap_log_freeze_cmp(const void *arg1, const void *arg2)
1892 : : {
1893 : 936656 : HeapTupleFreeze *frz1 = (HeapTupleFreeze *) arg1;
1894 : 936656 : HeapTupleFreeze *frz2 = (HeapTupleFreeze *) arg2;
1895 : :
1896 [ + + ]: 936656 : if (frz1->xmax < frz2->xmax)
1897 : 13832 : return -1;
1898 [ + + ]: 922824 : else if (frz1->xmax > frz2->xmax)
1899 : 15101 : return 1;
1900 : :
1901 [ + + ]: 907723 : if (frz1->t_infomask2 < frz2->t_infomask2)
1902 : 2888 : return -1;
1903 [ + + ]: 904835 : else if (frz1->t_infomask2 > frz2->t_infomask2)
1904 : 4584 : return 1;
1905 : :
1906 [ + + ]: 900251 : if (frz1->t_infomask < frz2->t_infomask)
1907 : 8142 : return -1;
1908 [ + + ]: 892109 : else if (frz1->t_infomask > frz2->t_infomask)
1909 : 11008 : return 1;
1910 : :
1911 [ - + ]: 881101 : if (frz1->frzflags < frz2->frzflags)
20 heikki.linnakangas@i 1912 :UNC 0 : return -1;
20 heikki.linnakangas@i 1913 [ - + ]:GNC 881101 : else if (frz1->frzflags > frz2->frzflags)
20 heikki.linnakangas@i 1914 :UNC 0 : return 1;
1915 : :
1916 : : /*
1917 : : * heap_log_freeze_eq would consider these tuple-wise plans to be equal.
1918 : : * (So the tuples will share a single canonical freeze plan.)
1919 : : *
1920 : : * We tiebreak on page offset number to keep each freeze plan's page
1921 : : * offset number array individually sorted. (Unnecessary, but be tidy.)
1922 : : */
20 heikki.linnakangas@i 1923 [ + + ]:GNC 881101 : if (frz1->offset < frz2->offset)
1924 : 707463 : return -1;
1925 [ + - ]: 173638 : else if (frz1->offset > frz2->offset)
1926 : 173638 : return 1;
1927 : :
20 heikki.linnakangas@i 1928 :UNC 0 : Assert(false);
1929 : : return 0;
1930 : : }
1931 : :
1932 : : /*
1933 : : * Start new plan initialized using tuple-level actions. At least one tuple
1934 : : * will have steps required to freeze described by caller's plan during REDO.
1935 : : */
1936 : : static inline void
20 heikki.linnakangas@i 1937 :GNC 18759 : heap_log_freeze_new_plan(xlhp_freeze_plan *plan, HeapTupleFreeze *frz)
1938 : : {
1939 : 18759 : plan->xmax = frz->xmax;
1940 : 18759 : plan->t_infomask2 = frz->t_infomask2;
1941 : 18759 : plan->t_infomask = frz->t_infomask;
1942 : 18759 : plan->frzflags = frz->frzflags;
1943 : 18759 : plan->ntuples = 1; /* for now */
1944 : 18759 : }
1945 : :
1946 : : /*
1947 : : * Deduplicate tuple-based freeze plans so that each distinct set of
1948 : : * processing steps is only stored once in XLOG_HEAP2_FREEZE_PAGE records.
1949 : : * Called during original execution of freezing (for logged relations).
1950 : : *
1951 : : * Return value is number of plans set in *plans_out for caller. Also writes
1952 : : * an array of offset numbers into *offsets_out output argument for caller
1953 : : * (actually there is one array per freeze plan, but that's not of immediate
1954 : : * concern to our caller).
1955 : : */
1956 : : static int
1957 : 14923 : heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples,
1958 : : xlhp_freeze_plan *plans_out,
1959 : : OffsetNumber *offsets_out)
1960 : : {
1961 : 14923 : int nplans = 0;
1962 : :
1963 : : /* Sort tuple-based freeze plans in the order required to deduplicate */
1964 : 14923 : qsort(tuples, ntuples, sizeof(HeapTupleFreeze), heap_log_freeze_cmp);
1965 : :
1966 [ + + ]: 637974 : for (int i = 0; i < ntuples; i++)
1967 : : {
1968 : 623051 : HeapTupleFreeze *frz = tuples + i;
1969 : :
1970 [ + + ]: 623051 : if (i == 0)
1971 : : {
1972 : : /* New canonical freeze plan starting with first tup */
1973 : 14923 : heap_log_freeze_new_plan(plans_out, frz);
1974 : 14923 : nplans++;
1975 : : }
1976 [ + + ]: 608128 : else if (heap_log_freeze_eq(plans_out, frz))
1977 : : {
1978 : : /* tup matches open canonical plan -- include tup in it */
1979 [ - + ]: 604292 : Assert(offsets_out[i - 1] < frz->offset);
1980 : 604292 : plans_out->ntuples++;
1981 : : }
1982 : : else
1983 : : {
1984 : : /* Tup doesn't match current plan -- done with it now */
1985 : 3836 : plans_out++;
1986 : :
1987 : : /* New canonical freeze plan starting with this tup */
1988 : 3836 : heap_log_freeze_new_plan(plans_out, frz);
1989 : 3836 : nplans++;
1990 : : }
1991 : :
1992 : : /*
1993 : : * Save page offset number in dedicated buffer in passing.
1994 : : *
1995 : : * REDO routine relies on the record's offset numbers array grouping
1996 : : * offset numbers by freeze plan. The sort order within each grouping
1997 : : * is ascending offset number order, just to keep things tidy.
1998 : : */
1999 : 623051 : offsets_out[i] = frz->offset;
2000 : : }
2001 : :
2002 [ + - - + ]: 14923 : Assert(nplans > 0 && nplans <= ntuples);
2003 : :
2004 : 14923 : return nplans;
2005 : : }
2006 : :
2007 : : /*
2008 : : * Write an XLOG_HEAP2_PRUNE_FREEZE WAL record
2009 : : *
2010 : : * This is used for several different page maintenance operations:
2011 : : *
2012 : : * - Page pruning, in VACUUM's 1st pass or on access: Some items are
2013 : : * redirected, some marked dead, and some removed altogether.
2014 : : *
2015 : : * - Freezing: Items are marked as 'frozen'.
2016 : : *
2017 : : * - Vacuum, 2nd pass: Items that are already LP_DEAD are marked as unused.
2018 : : *
2019 : : * They have enough commonalities that we use a single WAL record for them
2020 : : * all.
2021 : : *
2022 : : * If replaying the record requires a cleanup lock, pass cleanup_lock = true.
2023 : : * Replaying 'redirected' or 'dead' items always requires a cleanup lock, but
2024 : : * replaying 'unused' items depends on whether they were all previously marked
2025 : : * as dead.
2026 : : *
2027 : : * Note: This function scribbles on the 'frozen' array.
2028 : : *
2029 : : * Note: This is called in a critical section, so careful what you do here.
2030 : : */
2031 : : void
2032 : 70645 : log_heap_prune_and_freeze(Relation relation, Buffer buffer,
2033 : : TransactionId conflict_xid,
2034 : : bool cleanup_lock,
2035 : : PruneReason reason,
2036 : : HeapTupleFreeze *frozen, int nfrozen,
2037 : : OffsetNumber *redirected, int nredirected,
2038 : : OffsetNumber *dead, int ndead,
2039 : : OffsetNumber *unused, int nunused)
2040 : : {
2041 : : xl_heap_prune xlrec;
2042 : : XLogRecPtr recptr;
2043 : : uint8 info;
2044 : :
2045 : : /* The following local variables hold data registered in the WAL record: */
2046 : : xlhp_freeze_plan plans[MaxHeapTuplesPerPage];
2047 : : xlhp_freeze_plans freeze_plans;
2048 : : xlhp_prune_items redirect_items;
2049 : : xlhp_prune_items dead_items;
2050 : : xlhp_prune_items unused_items;
2051 : : OffsetNumber frz_offsets[MaxHeapTuplesPerPage];
2052 : :
2053 : 70645 : xlrec.flags = 0;
2054 : :
2055 : : /*
2056 : : * Prepare data for the buffer. The arrays are not actually in the
2057 : : * buffer, but we pretend that they are. When XLogInsert stores a full
2058 : : * page image, the arrays can be omitted.
2059 : : */
2060 : 70645 : XLogBeginInsert();
2061 : 70645 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
2062 [ + + ]: 70645 : if (nfrozen > 0)
2063 : : {
2064 : : int nplans;
2065 : :
2066 : 14923 : xlrec.flags |= XLHP_HAS_FREEZE_PLANS;
2067 : :
2068 : : /*
2069 : : * Prepare deduplicated representation for use in the WAL record. This
2070 : : * destructively sorts frozen tuples array in-place.
2071 : : */
2072 : 14923 : nplans = heap_log_freeze_plan(frozen, nfrozen, plans, frz_offsets);
2073 : :
2074 : 14923 : freeze_plans.nplans = nplans;
2075 : 14923 : XLogRegisterBufData(0, (char *) &freeze_plans,
2076 : : offsetof(xlhp_freeze_plans, plans));
2077 : 14923 : XLogRegisterBufData(0, (char *) plans,
2078 : : sizeof(xlhp_freeze_plan) * nplans);
2079 : : }
2080 [ + + ]: 70645 : if (nredirected > 0)
2081 : : {
2082 : 13126 : xlrec.flags |= XLHP_HAS_REDIRECTIONS;
2083 : :
2084 : 13126 : redirect_items.ntargets = nredirected;
2085 : 13126 : XLogRegisterBufData(0, (char *) &redirect_items,
2086 : : offsetof(xlhp_prune_items, data));
2087 : 13126 : XLogRegisterBufData(0, (char *) redirected,
2088 : : sizeof(OffsetNumber[2]) * nredirected);
2089 : : }
2090 [ + + ]: 70645 : if (ndead > 0)
2091 : : {
2092 : 35011 : xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
2093 : :
2094 : 35011 : dead_items.ntargets = ndead;
2095 : 35011 : XLogRegisterBufData(0, (char *) &dead_items,
2096 : : offsetof(xlhp_prune_items, data));
2097 : 35011 : XLogRegisterBufData(0, (char *) dead,
2098 : : sizeof(OffsetNumber) * ndead);
2099 : : }
2100 [ + + ]: 70645 : if (nunused > 0)
2101 : : {
2102 : 21277 : xlrec.flags |= XLHP_HAS_NOW_UNUSED_ITEMS;
2103 : :
2104 : 21277 : unused_items.ntargets = nunused;
2105 : 21277 : XLogRegisterBufData(0, (char *) &unused_items,
2106 : : offsetof(xlhp_prune_items, data));
2107 : 21277 : XLogRegisterBufData(0, (char *) unused,
2108 : : sizeof(OffsetNumber) * nunused);
2109 : : }
2110 [ + + ]: 70645 : if (nfrozen > 0)
2111 : 14923 : XLogRegisterBufData(0, (char *) frz_offsets,
2112 : : sizeof(OffsetNumber) * nfrozen);
2113 : :
2114 : : /*
2115 : : * Prepare the main xl_heap_prune record. We already set the XLPH_HAS_*
2116 : : * flag above.
2117 : : */
2118 [ + + + - : 70645 : if (RelationIsAccessibleInLogicalDecoding(relation))
- + - - -
- + + + +
- + - - +
- ]
2119 : 582 : xlrec.flags |= XLHP_IS_CATALOG_REL;
2120 [ + + ]: 70645 : if (TransactionIdIsValid(conflict_xid))
2121 : 54664 : xlrec.flags |= XLHP_HAS_CONFLICT_HORIZON;
2122 [ + + ]: 70645 : if (cleanup_lock)
2123 : 58724 : xlrec.flags |= XLHP_CLEANUP_LOCK;
2124 : : else
2125 : : {
2126 [ + - - + ]: 11921 : Assert(nredirected == 0 && ndead == 0);
2127 : : /* also, any items in 'unused' must've been LP_DEAD previously */
2128 : : }
2129 : 70645 : XLogRegisterData((char *) &xlrec, SizeOfHeapPrune);
2130 [ + + ]: 70645 : if (TransactionIdIsValid(conflict_xid))
2131 : 54664 : XLogRegisterData((char *) &conflict_xid, sizeof(TransactionId));
2132 : :
2133 [ + + + - ]: 70645 : switch (reason)
2134 : : {
2135 : 34660 : case PRUNE_ON_ACCESS:
2136 : 34660 : info = XLOG_HEAP2_PRUNE_ON_ACCESS;
2137 : 34660 : break;
2138 : 24064 : case PRUNE_VACUUM_SCAN:
2139 : 24064 : info = XLOG_HEAP2_PRUNE_VACUUM_SCAN;
2140 : 24064 : break;
2141 : 11921 : case PRUNE_VACUUM_CLEANUP:
2142 : 11921 : info = XLOG_HEAP2_PRUNE_VACUUM_CLEANUP;
2143 : 11921 : break;
20 heikki.linnakangas@i 2144 :UNC 0 : default:
2145 [ # # ]: 0 : elog(ERROR, "unrecognized prune reason: %d", (int) reason);
2146 : : break;
2147 : : }
20 heikki.linnakangas@i 2148 :GNC 70645 : recptr = XLogInsert(RM_HEAP2_ID, info);
2149 : :
2150 : 70645 : PageSetLSN(BufferGetPage(buffer), recptr);
2151 : 70645 : }
|