Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_visibility.c
4 : * display visibility map information and page-level visibility bits
5 : *
6 : * Copyright (c) 2016-2023, PostgreSQL Global Development Group
7 : *
8 : * contrib/pg_visibility/pg_visibility.c
9 : *-------------------------------------------------------------------------
10 : */
11 : #include "postgres.h"
12 :
13 : #include "access/heapam.h"
14 : #include "access/htup_details.h"
15 : #include "access/visibilitymap.h"
16 : #include "access/xloginsert.h"
17 : #include "catalog/pg_type.h"
18 : #include "catalog/storage_xlog.h"
19 : #include "funcapi.h"
20 : #include "miscadmin.h"
21 : #include "storage/bufmgr.h"
22 : #include "storage/procarray.h"
23 : #include "storage/smgr.h"
24 : #include "utils/rel.h"
25 : #include "utils/snapmgr.h"
26 :
2588 rhaas 27 CBC 1 : PG_MODULE_MAGIC;
28 :
29 : typedef struct vbits
30 : {
31 : BlockNumber next;
32 : BlockNumber count;
33 : uint8 bits[FLEXIBLE_ARRAY_MEMBER];
34 : } vbits;
35 :
36 : typedef struct corrupt_items
37 : {
38 : BlockNumber next;
39 : BlockNumber count;
40 : ItemPointer tids;
41 : } corrupt_items;
42 :
43 1 : PG_FUNCTION_INFO_V1(pg_visibility_map);
44 2 : PG_FUNCTION_INFO_V1(pg_visibility_map_rel);
45 2 : PG_FUNCTION_INFO_V1(pg_visibility);
46 2 : PG_FUNCTION_INFO_V1(pg_visibility_rel);
47 2 : PG_FUNCTION_INFO_V1(pg_visibility_map_summary);
2489 48 2 : PG_FUNCTION_INFO_V1(pg_check_frozen);
49 1 : PG_FUNCTION_INFO_V1(pg_check_visible);
2487 50 2 : PG_FUNCTION_INFO_V1(pg_truncate_visibility_map);
51 :
52 : static TupleDesc pg_visibility_tupdesc(bool include_blkno, bool include_pd);
53 : static vbits *collect_visibility_data(Oid relid, bool include_pd);
54 : static corrupt_items *collect_corrupt_items(Oid relid, bool all_visible,
55 : bool all_frozen);
56 : static void record_corrupt_item(corrupt_items *items, ItemPointer tid);
57 : static bool tuple_all_visible(HeapTuple tup, TransactionId OldestXmin,
58 : Buffer buffer);
59 : static void check_relation_relkind(Relation rel);
60 :
61 : /*
62 : * Visibility map information for a single block of a relation.
63 : *
64 : * Note: the VM code will silently return zeroes for pages past the end
65 : * of the map, so we allow probes up to MaxBlockNumber regardless of the
66 : * actual relation size.
67 : */
68 : Datum
2588 rhaas 69 UBC 0 : pg_visibility_map(PG_FUNCTION_ARGS)
70 : {
71 0 : Oid relid = PG_GETARG_OID(0);
72 0 : int64 blkno = PG_GETARG_INT64(1);
73 : int32 mapbits;
74 : Relation rel;
75 0 : Buffer vmbuffer = InvalidBuffer;
76 : TupleDesc tupdesc;
77 : Datum values[2];
267 peter 78 UNC 0 : bool nulls[2] = {0};
79 :
2588 rhaas 80 UBC 0 : rel = relation_open(relid, AccessShareLock);
81 :
82 : /* Only some relkinds have a visibility map */
2222 sfrost 83 0 : check_relation_relkind(rel);
84 :
2588 rhaas 85 0 : if (blkno < 0 || blkno > MaxBlockNumber)
86 0 : ereport(ERROR,
87 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
88 : errmsg("invalid block number")));
89 :
90 0 : tupdesc = pg_visibility_tupdesc(false, false);
2588 rhaas 91 EUB :
2588 rhaas 92 UBC 0 : mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
93 0 : if (vmbuffer != InvalidBuffer)
94 0 : ReleaseBuffer(vmbuffer);
95 0 : values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0);
2588 rhaas 96 UIC 0 : values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0);
2588 rhaas 97 EUB :
2588 rhaas 98 UIC 0 : relation_close(rel, AccessShareLock);
2588 rhaas 99 EUB :
2588 rhaas 100 UIC 0 : PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
101 : }
102 :
103 : /*
104 : * Visibility map information for a single block of a relation, plus the
105 : * page-level information for the same block.
106 : */
2588 rhaas 107 ECB : Datum
2588 rhaas 108 GIC 6 : pg_visibility(PG_FUNCTION_ARGS)
2588 rhaas 109 ECB : {
2588 rhaas 110 CBC 6 : Oid relid = PG_GETARG_OID(0);
2588 rhaas 111 GIC 6 : int64 blkno = PG_GETARG_INT64(1);
112 : int32 mapbits;
2588 rhaas 113 ECB : Relation rel;
2588 rhaas 114 GIC 6 : Buffer vmbuffer = InvalidBuffer;
115 : Buffer buffer;
116 : Page page;
117 : TupleDesc tupdesc;
2588 rhaas 118 ECB : Datum values[3];
267 peter 119 GNC 6 : bool nulls[3] = {0};
2588 rhaas 120 ECB :
2588 rhaas 121 GIC 6 : rel = relation_open(relid, AccessShareLock);
122 :
2222 sfrost 123 ECB : /* Only some relkinds have a visibility map */
2222 sfrost 124 GIC 6 : check_relation_relkind(rel);
2222 sfrost 125 ECB :
2588 rhaas 126 GBC 1 : if (blkno < 0 || blkno > MaxBlockNumber)
2588 rhaas 127 UIC 0 : ereport(ERROR,
128 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
129 : errmsg("invalid block number")));
2588 rhaas 130 ECB :
2588 rhaas 131 GIC 1 : tupdesc = pg_visibility_tupdesc(false, true);
2588 rhaas 132 ECB :
2588 rhaas 133 CBC 1 : mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
134 1 : if (vmbuffer != InvalidBuffer)
135 1 : ReleaseBuffer(vmbuffer);
2588 rhaas 136 GIC 1 : values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0);
137 1 : values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0);
2588 rhaas 138 ECB :
139 : /* Here we have to explicitly check rel size ... */
2381 tgl 140 CBC 1 : if (blkno < RelationGetNumberOfBlocks(rel))
2381 tgl 141 ECB : {
2381 tgl 142 GIC 1 : buffer = ReadBuffer(rel, blkno);
2381 tgl 143 CBC 1 : LockBuffer(buffer, BUFFER_LOCK_SHARE);
2588 rhaas 144 ECB :
2381 tgl 145 GIC 1 : page = BufferGetPage(buffer);
2381 tgl 146 CBC 1 : values[2] = BoolGetDatum(PageIsAllVisible(page));
147 :
2381 tgl 148 GIC 1 : UnlockReleaseBuffer(buffer);
149 : }
150 : else
2381 tgl 151 EUB : {
152 : /* As with the vismap, silently return 0 for pages past EOF */
2381 tgl 153 UIC 0 : values[2] = BoolGetDatum(false);
2381 tgl 154 ECB : }
155 :
2588 rhaas 156 CBC 1 : relation_close(rel, AccessShareLock);
157 :
2588 rhaas 158 GIC 1 : PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
159 : }
160 :
161 : /*
162 : * Visibility map information for every block in a relation.
2588 rhaas 163 ECB : */
164 : Datum
2588 rhaas 165 GIC 21 : pg_visibility_map_rel(PG_FUNCTION_ARGS)
166 : {
167 : FuncCallContext *funcctx;
2588 rhaas 168 ECB : vbits *info;
169 :
2588 rhaas 170 CBC 21 : if (SRF_IS_FIRSTCALL())
171 : {
2588 rhaas 172 GIC 11 : Oid relid = PG_GETARG_OID(0);
2495 rhaas 173 ECB : MemoryContext oldcontext;
2588 174 :
2588 rhaas 175 CBC 11 : funcctx = SRF_FIRSTCALL_INIT();
2588 rhaas 176 GIC 11 : oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
2588 rhaas 177 CBC 11 : funcctx->tuple_desc = pg_visibility_tupdesc(true, false);
2222 sfrost 178 ECB : /* collect_visibility_data will verify the relkind */
2588 rhaas 179 GIC 11 : funcctx->user_fctx = collect_visibility_data(relid, false);
180 4 : MemoryContextSwitchTo(oldcontext);
2588 rhaas 181 ECB : }
182 :
2588 rhaas 183 GIC 14 : funcctx = SRF_PERCALL_SETUP();
2588 rhaas 184 CBC 14 : info = (vbits *) funcctx->user_fctx;
185 :
2588 rhaas 186 GIC 14 : if (info->next < info->count)
2588 rhaas 187 ECB : {
188 : Datum values[3];
267 peter 189 GNC 10 : bool nulls[3] = {0};
2588 rhaas 190 ECB : HeapTuple tuple;
191 :
2588 rhaas 192 CBC 10 : values[0] = Int64GetDatum(info->next);
2588 rhaas 193 GIC 10 : values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
2588 rhaas 194 CBC 10 : values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
195 10 : info->next++;
196 :
2588 rhaas 197 GIC 10 : tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
2588 rhaas 198 CBC 10 : SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
199 : }
200 :
2588 rhaas 201 GIC 4 : SRF_RETURN_DONE(funcctx);
202 : }
203 :
204 : /*
205 : * Visibility map information for every block in a relation, plus the page
2588 rhaas 206 ECB : * level information for each block.
207 : */
208 : Datum
2588 rhaas 209 GIC 9 : pg_visibility_rel(PG_FUNCTION_ARGS)
210 : {
2588 rhaas 211 ECB : FuncCallContext *funcctx;
212 : vbits *info;
213 :
2588 rhaas 214 GIC 9 : if (SRF_IS_FIRSTCALL())
215 : {
2588 rhaas 216 CBC 6 : Oid relid = PG_GETARG_OID(0);
2495 rhaas 217 ECB : MemoryContext oldcontext;
2588 218 :
2588 rhaas 219 GIC 6 : funcctx = SRF_FIRSTCALL_INIT();
2588 rhaas 220 CBC 6 : oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
221 6 : funcctx->tuple_desc = pg_visibility_tupdesc(true, true);
222 : /* collect_visibility_data will verify the relkind */
2588 rhaas 223 GIC 6 : funcctx->user_fctx = collect_visibility_data(relid, true);
2588 rhaas 224 CBC 6 : MemoryContextSwitchTo(oldcontext);
2588 rhaas 225 ECB : }
226 :
2588 rhaas 227 CBC 9 : funcctx = SRF_PERCALL_SETUP();
2588 rhaas 228 GIC 9 : info = (vbits *) funcctx->user_fctx;
229 :
2588 rhaas 230 CBC 9 : if (info->next < info->count)
231 : {
232 : Datum values[4];
267 peter 233 GNC 3 : bool nulls[4] = {0};
2588 rhaas 234 ECB : HeapTuple tuple;
235 :
2588 rhaas 236 CBC 3 : values[0] = Int64GetDatum(info->next);
2588 rhaas 237 GIC 3 : values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
2588 rhaas 238 CBC 3 : values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
239 3 : values[3] = BoolGetDatum((info->bits[info->next] & (1 << 2)) != 0);
2588 rhaas 240 GIC 3 : info->next++;
241 :
2588 rhaas 242 CBC 3 : tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
2588 rhaas 243 GIC 3 : SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
244 : }
245 :
246 6 : SRF_RETURN_DONE(funcctx);
247 : }
248 :
249 : /*
2588 rhaas 250 ECB : * Count the number of all-visible and all-frozen pages in the visibility
251 : * map for a particular relation.
252 : */
253 : Datum
2588 rhaas 254 GIC 6 : pg_visibility_map_summary(PG_FUNCTION_ARGS)
255 : {
2588 rhaas 256 CBC 6 : Oid relid = PG_GETARG_OID(0);
2588 rhaas 257 ECB : Relation rel;
2495 258 : BlockNumber nblocks;
259 : BlockNumber blkno;
2588 rhaas 260 GIC 6 : Buffer vmbuffer = InvalidBuffer;
2588 rhaas 261 CBC 6 : int64 all_visible = 0;
2588 rhaas 262 GIC 6 : int64 all_frozen = 0;
2588 rhaas 263 ECB : TupleDesc tupdesc;
264 : Datum values[2];
267 peter 265 GNC 6 : bool nulls[2] = {0};
2588 rhaas 266 ECB :
2588 rhaas 267 GIC 6 : rel = relation_open(relid, AccessShareLock);
2222 sfrost 268 ECB :
269 : /* Only some relkinds have a visibility map */
2222 sfrost 270 CBC 6 : check_relation_relkind(rel);
271 :
2588 rhaas 272 GIC 1 : nblocks = RelationGetNumberOfBlocks(rel);
273 :
274 2 : for (blkno = 0; blkno < nblocks; ++blkno)
2588 rhaas 275 ECB : {
276 : int32 mapbits;
277 :
278 : /* Make sure we are interruptible. */
2588 rhaas 279 CBC 1 : CHECK_FOR_INTERRUPTS();
2588 rhaas 280 ECB :
281 : /* Get map info. */
2588 rhaas 282 GBC 1 : mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
2588 rhaas 283 GIC 1 : if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0)
284 1 : ++all_visible;
285 1 : if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
2588 rhaas 286 LBC 0 : ++all_frozen;
2588 rhaas 287 ECB : }
288 :
289 : /* Clean up. */
2588 rhaas 290 CBC 1 : if (vmbuffer != InvalidBuffer)
2588 rhaas 291 GBC 1 : ReleaseBuffer(vmbuffer);
2588 rhaas 292 GIC 1 : relation_close(rel, AccessShareLock);
2588 rhaas 293 ECB :
109 michael 294 GNC 1 : if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
109 michael 295 UNC 0 : elog(ERROR, "return type must be a row type");
296 :
2588 rhaas 297 GIC 1 : values[0] = Int64GetDatum(all_visible);
298 1 : values[1] = Int64GetDatum(all_frozen);
299 :
300 1 : PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
301 : }
2588 rhaas 302 ECB :
303 : /*
304 : * Return the TIDs of non-frozen tuples present in pages marked all-frozen
305 : * in the visibility map. We hope no one will ever find any, but there could
306 : * be bugs, database corruption, etc.
2489 307 : */
308 : Datum
2489 rhaas 309 CBC 9 : pg_check_frozen(PG_FUNCTION_ARGS)
310 : {
311 : FuncCallContext *funcctx;
2489 rhaas 312 ECB : corrupt_items *items;
313 :
2489 rhaas 314 GIC 9 : if (SRF_IS_FIRSTCALL())
2489 rhaas 315 ECB : {
2489 rhaas 316 CBC 9 : Oid relid = PG_GETARG_OID(0);
317 : MemoryContext oldcontext;
318 :
319 9 : funcctx = SRF_FIRSTCALL_INIT();
320 9 : oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
321 : /* collect_corrupt_items will verify the relkind */
322 9 : funcctx->user_fctx = collect_corrupt_items(relid, false, true);
2489 rhaas 323 GBC 4 : MemoryContextSwitchTo(oldcontext);
324 : }
2489 rhaas 325 ECB :
2489 rhaas 326 GIC 4 : funcctx = SRF_PERCALL_SETUP();
327 4 : items = (corrupt_items *) funcctx->user_fctx;
328 :
329 4 : if (items->next < items->count)
2489 rhaas 330 UIC 0 : SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++]));
331 :
2489 rhaas 332 GIC 4 : SRF_RETURN_DONE(funcctx);
333 : }
2489 rhaas 334 EUB :
335 : /*
336 : * Return the TIDs of not-all-visible tuples in pages marked all-visible
337 : * in the visibility map. We hope no one will ever find any, but there could
338 : * be bugs, database corruption, etc.
339 : */
340 : Datum
2489 rhaas 341 UBC 0 : pg_check_visible(PG_FUNCTION_ARGS)
342 : {
343 : FuncCallContext *funcctx;
2489 rhaas 344 EUB : corrupt_items *items;
345 :
2489 rhaas 346 UIC 0 : if (SRF_IS_FIRSTCALL())
2489 rhaas 347 EUB : {
2489 rhaas 348 UBC 0 : Oid relid = PG_GETARG_OID(0);
349 : MemoryContext oldcontext;
350 :
351 0 : funcctx = SRF_FIRSTCALL_INIT();
352 0 : oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
353 : /* collect_corrupt_items will verify the relkind */
354 0 : funcctx->user_fctx = collect_corrupt_items(relid, true, false);
355 0 : MemoryContextSwitchTo(oldcontext);
356 : }
2489 rhaas 357 EUB :
2489 rhaas 358 UIC 0 : funcctx = SRF_PERCALL_SETUP();
359 0 : items = (corrupt_items *) funcctx->user_fctx;
360 :
361 0 : if (items->next < items->count)
362 0 : SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++]));
363 :
364 0 : SRF_RETURN_DONE(funcctx);
365 : }
366 :
367 : /*
368 : * Remove the visibility map fork for a relation. If there turn out to be
2487 rhaas 369 ECB : * any bugs in the visibility map code that require rebuilding the VM, this
370 : * provides users with a way to do it that is cleaner than shutting down the
371 : * server and removing files by hand.
372 : *
373 : * This is a cut-down version of RelationTruncate.
374 : */
375 : Datum
2487 rhaas 376 CBC 6 : pg_truncate_visibility_map(PG_FUNCTION_ARGS)
377 : {
2487 rhaas 378 GIC 6 : Oid relid = PG_GETARG_OID(0);
2487 rhaas 379 ECB : Relation rel;
380 : ForkNumber fork;
381 : BlockNumber block;
382 :
2487 rhaas 383 GIC 6 : rel = relation_open(relid, AccessExclusiveLock);
2487 rhaas 384 ECB :
2222 sfrost 385 : /* Only some relkinds have a visibility map */
2222 sfrost 386 GIC 6 : check_relation_relkind(rel);
2487 rhaas 387 ECB :
636 tgl 388 : /* Forcibly reset cached file size */
636 tgl 389 GIC 1 : RelationGetSmgr(rel)->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] = InvalidBlockNumber;
390 :
1293 fujii 391 CBC 1 : block = visibilitymap_prepare_truncate(rel, 0);
1293 fujii 392 GIC 1 : if (BlockNumberIsValid(block))
393 : {
394 1 : fork = VISIBILITYMAP_FORKNUM;
636 tgl 395 CBC 1 : smgrtruncate(RelationGetSmgr(rel), &fork, 1, &block);
1293 fujii 396 ECB : }
2487 rhaas 397 :
2487 rhaas 398 GIC 1 : if (RelationNeedsWAL(rel))
2487 rhaas 399 ECB : {
400 : xl_smgr_truncate xlrec;
401 :
2487 rhaas 402 CBC 1 : xlrec.blkno = 0;
277 rhaas 403 GNC 1 : xlrec.rlocator = rel->rd_locator;
2487 rhaas 404 GIC 1 : xlrec.flags = SMGR_TRUNCATE_VM;
405 :
406 1 : XLogBeginInsert();
407 1 : XLogRegisterData((char *) &xlrec, sizeof(xlrec));
408 :
409 1 : XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE);
410 : }
411 :
412 : /*
413 : * Release the lock right away, not at commit time.
414 : *
415 : * It would be a problem to release the lock prior to commit if this
416 : * truncate operation sends any transactional invalidation messages. Other
417 : * backends would potentially be able to lock the relation without
418 : * processing them in the window of time between when we release the lock
419 : * here and when we sent the messages at our eventual commit. However,
420 : * we're currently only sending a non-transactional smgr invalidation,
421 : * which will have been posted to shared memory immediately from within
422 : * smgr_truncate. Therefore, there should be no race here.
423 : *
424 : * The reason why it's desirable to release the lock early here is because
2487 rhaas 425 ECB : * of the possibility that someone will need to use this to blow away many
426 : * visibility map forks at once. If we can't release the lock until
427 : * commit time, the transaction doing this will accumulate
428 : * AccessExclusiveLocks on all of those relations at the same time, which
429 : * is undesirable. However, if this turns out to be unsafe we may have no
430 : * choice...
431 : */
2487 rhaas 432 GIC 1 : relation_close(rel, AccessExclusiveLock);
433 :
434 : /* Nothing to return. */
435 1 : PG_RETURN_VOID();
2487 rhaas 436 ECB : }
437 :
438 : /*
2588 439 : * Helper function to construct whichever TupleDesc we need for a particular
440 : * call.
441 : */
442 : static TupleDesc
2588 rhaas 443 CBC 18 : pg_visibility_tupdesc(bool include_blkno, bool include_pd)
2588 rhaas 444 ECB : {
445 : TupleDesc tupdesc;
2588 rhaas 446 CBC 18 : AttrNumber maxattr = 2;
447 18 : AttrNumber a = 0;
2588 rhaas 448 ECB :
2588 rhaas 449 CBC 18 : if (include_blkno)
450 17 : ++maxattr;
451 18 : if (include_pd)
452 7 : ++maxattr;
1601 andres 453 18 : tupdesc = CreateTemplateTupleDesc(maxattr);
2588 rhaas 454 GIC 18 : if (include_blkno)
2588 rhaas 455 CBC 17 : TupleDescInitEntry(tupdesc, ++a, "blkno", INT8OID, -1, 0);
2588 rhaas 456 GIC 18 : TupleDescInitEntry(tupdesc, ++a, "all_visible", BOOLOID, -1, 0);
457 18 : TupleDescInitEntry(tupdesc, ++a, "all_frozen", BOOLOID, -1, 0);
458 18 : if (include_pd)
459 7 : TupleDescInitEntry(tupdesc, ++a, "pd_all_visible", BOOLOID, -1, 0);
460 18 : Assert(a == maxattr);
461 :
462 18 : return BlessTupleDesc(tupdesc);
463 : }
464 :
2588 rhaas 465 ECB : /*
466 : * Collect visibility data about a relation.
467 : *
468 : * Checks relkind of relid and will throw an error if the relation does not
469 : * have a VM.
470 : */
471 : static vbits *
2588 rhaas 472 CBC 17 : collect_visibility_data(Oid relid, bool include_pd)
473 : {
2588 rhaas 474 ECB : Relation rel;
475 : BlockNumber nblocks;
476 : vbits *info;
2495 477 : BlockNumber blkno;
2588 rhaas 478 GIC 17 : Buffer vmbuffer = InvalidBuffer;
2495 rhaas 479 CBC 17 : BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
2588 rhaas 480 ECB :
2588 rhaas 481 CBC 17 : rel = relation_open(relid, AccessShareLock);
2588 rhaas 482 ECB :
483 : /* Only some relkinds have a visibility map */
2222 sfrost 484 CBC 15 : check_relation_relkind(rel);
485 :
2588 rhaas 486 GIC 10 : nblocks = RelationGetNumberOfBlocks(rel);
2118 tgl 487 10 : info = palloc0(offsetof(vbits, bits) + nblocks);
2588 rhaas 488 10 : info->next = 0;
2588 rhaas 489 CBC 10 : info->count = nblocks;
490 :
2588 rhaas 491 GIC 23 : for (blkno = 0; blkno < nblocks; ++blkno)
2588 rhaas 492 ECB : {
493 : int32 mapbits;
494 :
495 : /* Make sure we are interruptible. */
2588 rhaas 496 CBC 13 : CHECK_FOR_INTERRUPTS();
497 :
498 : /* Get map info. */
2588 rhaas 499 GIC 13 : mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
500 13 : if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0)
501 9 : info->bits[blkno] |= (1 << 0);
502 13 : if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
2588 rhaas 503 CBC 6 : info->bits[blkno] |= (1 << 1);
504 :
505 : /*
506 : * Page-level data requires reading every block, so only get it if the
507 : * caller needs it. Use a buffer access strategy, too, to prevent
2588 rhaas 508 ECB : * cache-trashing.
509 : */
2588 rhaas 510 CBC 13 : if (include_pd)
511 : {
2588 rhaas 512 ECB : Buffer buffer;
513 : Page page;
514 :
2588 rhaas 515 GIC 3 : buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
2588 rhaas 516 ECB : bstrategy);
2588 rhaas 517 GIC 3 : LockBuffer(buffer, BUFFER_LOCK_SHARE);
518 :
2545 kgrittn 519 3 : page = BufferGetPage(buffer);
2588 rhaas 520 3 : if (PageIsAllVisible(page))
2588 rhaas 521 CBC 2 : info->bits[blkno] |= (1 << 2);
2588 rhaas 522 ECB :
2588 rhaas 523 CBC 3 : UnlockReleaseBuffer(buffer);
524 : }
2588 rhaas 525 ECB : }
526 :
527 : /* Clean up. */
2588 rhaas 528 GIC 10 : if (vmbuffer != InvalidBuffer)
529 7 : ReleaseBuffer(vmbuffer);
530 10 : relation_close(rel, AccessShareLock);
531 :
532 10 : return info;
533 : }
534 :
535 : /*
536 : * Returns a list of items whose visibility map information does not match
537 : * the status of the tuples on the page.
538 : *
539 : * If all_visible is passed as true, this will include all items which are
540 : * on pages marked as all-visible in the visibility map but which do not
541 : * seem to in fact be all-visible.
542 : *
2489 rhaas 543 ECB : * If all_frozen is passed as true, this will include all items which are
544 : * on pages marked as all-frozen but which do not seem to in fact be frozen.
545 : *
546 : * Checks relkind of relid and will throw an error if the relation does not
547 : * have a VM.
548 : */
549 : static corrupt_items *
2489 rhaas 550 CBC 9 : collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
2489 rhaas 551 ECB : {
552 : Relation rel;
553 : BlockNumber nblocks;
554 : corrupt_items *items;
555 : BlockNumber blkno;
2489 rhaas 556 CBC 9 : Buffer vmbuffer = InvalidBuffer;
2489 rhaas 557 GIC 9 : BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
2489 rhaas 558 CBC 9 : TransactionId OldestXmin = InvalidTransactionId;
2489 rhaas 559 EUB :
2489 rhaas 560 GIC 9 : rel = relation_open(relid, AccessShareLock);
2489 rhaas 561 ECB :
562 : /* Only some relkinds have a visibility map */
2222 sfrost 563 GIC 9 : check_relation_relkind(rel);
564 :
970 andres 565 4 : if (all_visible)
970 andres 566 UIC 0 : OldestXmin = GetOldestNonRemovableTransactionId(rel);
567 :
2489 rhaas 568 GIC 4 : nblocks = RelationGetNumberOfBlocks(rel);
569 :
570 : /*
2489 rhaas 571 ECB : * Guess an initial array size. We don't expect many corrupted tuples, so
572 : * start with a small array. This function uses the "next" field to track
573 : * the next offset where we can store an item (which is the same thing as
574 : * the number of items found so far) and the "count" field to track the
575 : * number of entries allocated. We'll repurpose these fields before
576 : * returning.
577 : */
2489 rhaas 578 GIC 4 : items = palloc0(sizeof(corrupt_items));
2489 rhaas 579 CBC 4 : items->next = 0;
580 4 : items->count = 64;
2489 rhaas 581 GIC 4 : items->tids = palloc(items->count * sizeof(ItemPointerData));
582 :
583 : /* Loop over every block in the relation. */
584 14 : for (blkno = 0; blkno < nblocks; ++blkno)
585 : {
586 10 : bool check_frozen = false;
2489 rhaas 587 CBC 10 : bool check_visible = false;
588 : Buffer buffer;
589 : Page page;
2489 rhaas 590 ECB : OffsetNumber offnum,
591 : maxoff;
592 :
2489 rhaas 593 EUB : /* Make sure we are interruptible. */
2489 rhaas 594 CBC 10 : CHECK_FOR_INTERRUPTS();
2489 rhaas 595 ECB :
596 : /* Use the visibility map to decide whether to check this page. */
2489 rhaas 597 GIC 10 : if (all_frozen && VM_ALL_FROZEN(rel, blkno, &vmbuffer))
2489 rhaas 598 CBC 6 : check_frozen = true;
2489 rhaas 599 GIC 10 : if (all_visible && VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
2489 rhaas 600 LBC 0 : check_visible = true;
2489 rhaas 601 GIC 10 : if (!check_visible && !check_frozen)
2489 rhaas 602 CBC 4 : continue;
2489 rhaas 603 ECB :
604 : /* Read and lock the page. */
2489 rhaas 605 GIC 6 : buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
606 : bstrategy);
607 6 : LockBuffer(buffer, BUFFER_LOCK_SHARE);
608 :
2489 rhaas 609 CBC 6 : page = BufferGetPage(buffer);
2489 rhaas 610 GBC 6 : maxoff = PageGetMaxOffsetNumber(page);
2489 rhaas 611 ECB :
2489 rhaas 612 EUB : /*
2489 rhaas 613 ECB : * The visibility map bits might have changed while we were acquiring
614 : * the page lock. Recheck to avoid returning spurious results.
2489 rhaas 615 EUB : */
2489 rhaas 616 GBC 6 : if (check_frozen && !VM_ALL_FROZEN(rel, blkno, &vmbuffer))
2489 rhaas 617 UIC 0 : check_frozen = false;
2489 rhaas 618 GIC 6 : if (check_visible && !VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
2489 rhaas 619 UIC 0 : check_visible = false;
2489 rhaas 620 CBC 6 : if (!check_visible && !check_frozen)
2489 rhaas 621 ECB : {
2489 rhaas 622 LBC 0 : UnlockReleaseBuffer(buffer);
2489 rhaas 623 UIC 0 : continue;
624 : }
625 :
626 : /* Iterate over each tuple on the page. */
2489 rhaas 627 CBC 6 : for (offnum = FirstOffsetNumber;
2489 rhaas 628 GIC 27 : offnum <= maxoff;
629 21 : offnum = OffsetNumberNext(offnum))
2489 rhaas 630 ECB : {
2489 rhaas 631 EUB : HeapTupleData tuple;
632 : ItemId itemid;
633 :
2489 rhaas 634 CBC 21 : itemid = PageGetItemId(page, offnum);
635 :
2489 rhaas 636 EUB : /* Unused or redirect line pointers are of no interest. */
2489 rhaas 637 GBC 21 : if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
2489 rhaas 638 UBC 0 : continue;
639 :
640 : /* Dead line pointers are neither all-visible nor frozen. */
2489 rhaas 641 GIC 21 : if (ItemIdIsDead(itemid))
2489 rhaas 642 ECB : {
2381 tgl 643 LBC 0 : ItemPointerSet(&(tuple.t_self), blkno, offnum);
644 0 : record_corrupt_item(items, &tuple.t_self);
2489 rhaas 645 0 : continue;
646 : }
647 :
648 : /* Initialize a HeapTupleData structure for checks below. */
2381 tgl 649 GIC 21 : ItemPointerSet(&(tuple.t_self), blkno, offnum);
2489 rhaas 650 21 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2489 rhaas 651 CBC 21 : tuple.t_len = ItemIdGetLength(itemid);
2489 rhaas 652 GBC 21 : tuple.t_tableOid = relid;
653 :
654 : /*
655 : * If we're checking whether the page is all-visible, we expect
656 : * the tuple to be all-visible.
657 : */
2489 rhaas 658 GIC 21 : if (check_visible &&
2489 rhaas 659 UIC 0 : !tuple_all_visible(&tuple, OldestXmin, buffer))
660 : {
661 : TransactionId RecomputedOldestXmin;
662 :
663 : /*
664 : * Time has passed since we computed OldestXmin, so it's
665 : * possible that this tuple is all-visible in reality even
666 : * though it doesn't appear so based on our
667 : * previously-computed value. Let's compute a new value so we
668 : * can be certain whether there is a problem.
669 : *
670 : * From a concurrency point of view, it sort of sucks to
2489 rhaas 671 EUB : * retake ProcArrayLock here while we're holding the buffer
672 : * exclusively locked, but it should be safe against
970 andres 673 : * deadlocks, because surely
674 : * GetOldestNonRemovableTransactionId() should never take a
675 : * buffer lock. And this shouldn't happen often, so it's worth
676 : * being careful so as to avoid false positives.
2489 rhaas 677 : */
970 andres 678 UBC 0 : RecomputedOldestXmin = GetOldestNonRemovableTransactionId(rel);
2489 rhaas 679 EUB :
2489 rhaas 680 UIC 0 : if (!TransactionIdPrecedes(OldestXmin, RecomputedOldestXmin))
2381 tgl 681 0 : record_corrupt_item(items, &tuple.t_self);
682 : else
683 : {
2489 rhaas 684 0 : OldestXmin = RecomputedOldestXmin;
685 0 : if (!tuple_all_visible(&tuple, OldestXmin, buffer))
2381 tgl 686 0 : record_corrupt_item(items, &tuple.t_self);
2489 rhaas 687 ECB : }
688 : }
689 :
2489 rhaas 690 EUB : /*
691 : * If we're checking whether the page is all-frozen, we expect the
692 : * tuple to be in a state where it will never need freezing.
693 : */
2489 rhaas 694 CBC 21 : if (check_frozen)
695 : {
2489 rhaas 696 GIC 21 : if (heap_tuple_needs_eventual_freeze(tuple.t_data))
2381 tgl 697 UIC 0 : record_corrupt_item(items, &tuple.t_self);
2489 rhaas 698 ECB : }
699 : }
700 :
2489 rhaas 701 GIC 6 : UnlockReleaseBuffer(buffer);
702 : }
703 :
704 : /* Clean up. */
705 4 : if (vmbuffer != InvalidBuffer)
706 4 : ReleaseBuffer(vmbuffer);
707 4 : relation_close(rel, AccessShareLock);
2489 rhaas 708 ECB :
709 : /*
710 : * Before returning, repurpose the fields to match caller's expectations.
711 : * next is now the next item that should be read (rather than written) and
712 : * count is now the number of items we wrote (rather than the number we
713 : * allocated).
714 : */
2489 rhaas 715 GIC 4 : items->count = items->next;
716 4 : items->next = 0;
717 :
2489 rhaas 718 GBC 4 : return items;
719 : }
720 :
2489 rhaas 721 EUB : /*
722 : * Remember one corrupt item.
723 : */
724 : static void
2489 rhaas 725 UBC 0 : record_corrupt_item(corrupt_items *items, ItemPointer tid)
726 : {
727 : /* enlarge output array if needed. */
728 0 : if (items->next >= items->count)
2489 rhaas 729 EUB : {
2489 rhaas 730 UIC 0 : items->count *= 2;
731 0 : items->tids = repalloc(items->tids,
732 0 : items->count * sizeof(ItemPointerData));
733 : }
734 : /* and add the new item */
735 0 : items->tids[items->next++] = *tid;
2489 rhaas 736 UBC 0 : }
737 :
738 : /*
739 : * Check whether a tuple is all-visible relative to a given OldestXmin value.
740 : * The buffer should contain the tuple and should be locked and pinned.
2489 rhaas 741 EUB : */
742 : static bool
2489 rhaas 743 UBC 0 : tuple_all_visible(HeapTuple tup, TransactionId OldestXmin, Buffer buffer)
744 : {
745 : HTSV_Result state;
746 : TransactionId xmin;
747 :
2489 rhaas 748 UIC 0 : state = HeapTupleSatisfiesVacuum(tup, OldestXmin, buffer);
749 0 : if (state != HEAPTUPLE_LIVE)
750 0 : return false; /* all-visible implies live */
751 :
2489 rhaas 752 EUB : /*
753 : * Neither lazy_scan_heap nor heap_page_is_all_visible will mark a page
754 : * all-visible unless every tuple is hinted committed. However, those hint
755 : * bits could be lost after a crash, so we can't be certain that they'll
756 : * be set here. So just check the xmin.
757 : */
758 :
2489 rhaas 759 UIC 0 : xmin = HeapTupleHeaderGetXmin(tup->t_data);
760 0 : if (!TransactionIdPrecedes(xmin, OldestXmin))
761 0 : return false; /* xmin not old enough for all to see */
762 :
763 0 : return true;
2489 rhaas 764 ECB : }
765 :
2222 sfrost 766 : /*
767 : * check_relation_relkind - convenience routine to check that relation
768 : * is of the relkind supported by the callers
769 : */
770 : static void
2222 sfrost 771 GIC 42 : check_relation_relkind(Relation rel)
2222 sfrost 772 ECB : {
492 peter 773 GIC 42 : if (!RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind))
2222 sfrost 774 25 : ereport(ERROR,
775 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
776 : errmsg("relation \"%s\" is of wrong relation kind",
777 : RelationGetRelationName(rel)),
778 : errdetail_relkind_not_supported(rel->rd_rel->relkind)));
779 17 : }
|