Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * heap_surgery.c
4 : * Functions to perform surgery on the damaged heap table.
5 : *
6 : * Copyright (c) 2020-2023, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * contrib/pg_surgery/heap_surgery.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 : #include "postgres.h"
14 :
15 : #include "access/heapam.h"
16 : #include "access/visibilitymap.h"
17 : #include "access/xloginsert.h"
18 : #include "catalog/pg_am_d.h"
19 : #include "catalog/pg_proc_d.h"
20 : #include "miscadmin.h"
21 : #include "storage/bufmgr.h"
22 : #include "utils/acl.h"
23 : #include "utils/array.h"
24 : #include "utils/rel.h"
25 :
941 rhaas 26 GIC 1 : PG_MODULE_MAGIC;
941 rhaas 27 ECB :
28 : /* Options to forcefully change the state of a heap tuple. */
29 : typedef enum HeapTupleForceOption
30 : {
31 : HEAP_FORCE_KILL,
32 : HEAP_FORCE_FREEZE
33 : } HeapTupleForceOption;
34 :
941 rhaas 35 GIC 2 : PG_FUNCTION_INFO_V1(heap_force_kill);
941 rhaas 36 CBC 2 : PG_FUNCTION_INFO_V1(heap_force_freeze);
941 rhaas 37 ECB :
38 : static int32 tidcmp(const void *a, const void *b);
39 : static Datum heap_force_common(FunctionCallInfo fcinfo,
40 : HeapTupleForceOption heap_force_opt);
41 : static void sanity_check_tid_array(ArrayType *ta, int *ntids);
42 : static BlockNumber find_tids_one_page(ItemPointer tids, int ntids,
43 : OffsetNumber *next_start_ptr);
44 :
45 : /*-------------------------------------------------------------------------
46 : * heap_force_kill()
47 : *
48 : * Force kill the tuple(s) pointed to by the item pointer(s) stored in the
49 : * given TID array.
50 : *
51 : * Usage: SELECT heap_force_kill(regclass, tid[]);
52 : *-------------------------------------------------------------------------
53 : */
54 : Datum
941 rhaas 55 GIC 9 : heap_force_kill(PG_FUNCTION_ARGS)
941 rhaas 56 ECB : {
941 rhaas 57 GIC 9 : PG_RETURN_DATUM(heap_force_common(fcinfo, HEAP_FORCE_KILL));
941 rhaas 58 ECB : }
59 :
60 : /*-------------------------------------------------------------------------
61 : * heap_force_freeze()
62 : *
63 : * Force freeze the tuple(s) pointed to by the item pointer(s) stored in the
64 : * given TID array.
65 : *
66 : * Usage: SELECT heap_force_freeze(regclass, tid[]);
67 : *-------------------------------------------------------------------------
68 : */
69 : Datum
941 rhaas 70 GIC 7 : heap_force_freeze(PG_FUNCTION_ARGS)
941 rhaas 71 ECB : {
941 rhaas 72 GIC 7 : PG_RETURN_DATUM(heap_force_common(fcinfo, HEAP_FORCE_FREEZE));
941 rhaas 73 ECB : }
74 :
75 : /*-------------------------------------------------------------------------
76 : * heap_force_common()
77 : *
78 : * Common code for heap_force_kill and heap_force_freeze
79 : *-------------------------------------------------------------------------
80 : */
81 : static Datum
941 rhaas 82 GIC 16 : heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt)
941 rhaas 83 ECB : {
941 rhaas 84 GIC 16 : Oid relid = PG_GETARG_OID(0);
941 rhaas 85 CBC 16 : ArrayType *ta = PG_GETARG_ARRAYTYPE_P_COPY(1);
941 rhaas 86 ECB : ItemPointer tids;
87 : int ntids,
88 : nblocks;
89 : Relation rel;
90 : OffsetNumber curr_start_ptr,
91 : next_start_ptr;
92 : bool include_this_tid[MaxHeapTuplesPerPage];
93 :
941 rhaas 94 GIC 16 : if (RecoveryInProgress())
941 rhaas 95 LBC 0 : ereport(ERROR,
941 rhaas 96 EUB : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
97 : errmsg("recovery is in progress"),
98 : errhint("heap surgery functions cannot be executed during recovery.")));
99 :
100 : /* Check inputs. */
941 rhaas 101 GIC 16 : sanity_check_tid_array(ta, &ntids);
941 rhaas 102 ECB :
941 rhaas 103 GIC 14 : rel = relation_open(relid, RowExclusiveLock);
941 rhaas 104 ECB :
105 : /*
106 : * Check target relation.
107 : */
492 peter 108 GIC 14 : if (!RELKIND_HAS_TABLE_AM(rel->rd_rel->relkind))
640 peter 109 CBC 2 : ereport(ERROR,
640 peter 110 ECB : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
111 : errmsg("cannot operate on relation \"%s\"",
112 : RelationGetRelationName(rel)),
113 : errdetail_relkind_not_supported(rel->rd_rel->relkind)));
114 :
640 peter 115 GIC 12 : if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
640 peter 116 LBC 0 : ereport(ERROR,
640 peter 117 EUB : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
118 : errmsg("only heap AM is supported")));
119 :
120 : /* Must be owner of the table or superuser. */
147 peter 121 GNC 12 : if (!object_ownercheck(RelationRelationId, RelationGetRelid(rel), GetUserId()))
640 peter 122 LBC 0 : aclcheck_error(ACLCHECK_NOT_OWNER,
640 peter 123 UBC 0 : get_relkind_objtype(rel->rd_rel->relkind),
124 0 : RelationGetRelationName(rel));
941 rhaas 125 EUB :
941 rhaas 126 GIC 12 : tids = ((ItemPointer) ARR_DATA_PTR(ta));
941 rhaas 127 ECB :
128 : /*
129 : * If there is more than one TID in the array, sort them so that we can
130 : * easily fetch all the TIDs belonging to one particular page from the
131 : * array.
132 : */
941 rhaas 133 GIC 12 : if (ntids > 1)
61 peter 134 GNC 2 : qsort(tids, ntids, sizeof(ItemPointerData), tidcmp);
941 rhaas 135 ECB :
941 rhaas 136 GIC 12 : curr_start_ptr = next_start_ptr = 0;
941 rhaas 137 CBC 12 : nblocks = RelationGetNumberOfBlocks(rel);
941 rhaas 138 ECB :
139 : /*
140 : * Loop, performing the necessary actions for each block.
141 : */
941 rhaas 142 GIC 24 : while (next_start_ptr != ntids)
941 rhaas 143 ECB : {
144 : Buffer buf;
941 rhaas 145 GIC 12 : Buffer vmbuf = InvalidBuffer;
941 rhaas 146 ECB : Page page;
147 : BlockNumber blkno;
148 : OffsetNumber curoff;
149 : OffsetNumber maxoffset;
150 : int i;
941 rhaas 151 GIC 12 : bool did_modify_page = false;
941 rhaas 152 CBC 12 : bool did_modify_vm = false;
941 rhaas 153 ECB :
941 rhaas 154 GIC 12 : CHECK_FOR_INTERRUPTS();
941 rhaas 155 ECB :
156 : /*
157 : * Find all the TIDs belonging to one particular page starting from
158 : * next_start_ptr and process them one by one.
159 : */
941 rhaas 160 GIC 12 : blkno = find_tids_one_page(tids, ntids, &next_start_ptr);
941 rhaas 161 ECB :
162 : /* Check whether the block number is valid. */
941 rhaas 163 GIC 12 : if (blkno >= nblocks)
941 rhaas 164 ECB : {
165 : /* Update the current_start_ptr before moving to the next page. */
941 rhaas 166 GIC 1 : curr_start_ptr = next_start_ptr;
941 rhaas 167 ECB :
941 rhaas 168 GIC 1 : ereport(NOTICE,
941 rhaas 169 ECB : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
170 : errmsg("skipping block %u for relation \"%s\" because the block number is out of range",
171 : blkno, RelationGetRelationName(rel))));
941 rhaas 172 GIC 1 : continue;
941 rhaas 173 ECB : }
174 :
941 rhaas 175 GIC 11 : buf = ReadBuffer(rel, blkno);
941 rhaas 176 CBC 11 : LockBufferForCleanup(buf);
941 rhaas 177 ECB :
941 rhaas 178 GIC 11 : page = BufferGetPage(buf);
941 rhaas 179 ECB :
941 rhaas 180 GIC 11 : maxoffset = PageGetMaxOffsetNumber(page);
941 rhaas 181 ECB :
182 : /*
183 : * Figure out which TIDs we are going to process and which ones we are
184 : * going to skip.
185 : */
941 rhaas 186 GIC 11 : memset(include_this_tid, 0, sizeof(include_this_tid));
941 rhaas 187 CBC 24 : for (i = curr_start_ptr; i < next_start_ptr; i++)
941 rhaas 188 ECB : {
941 rhaas 189 GIC 13 : OffsetNumber offno = ItemPointerGetOffsetNumberNoCheck(&tids[i]);
941 rhaas 190 ECB : ItemId itemid;
191 :
192 : /* Check whether the offset number is valid. */
941 rhaas 193 GIC 13 : if (offno == InvalidOffsetNumber || offno > maxoffset)
941 rhaas 194 ECB : {
941 rhaas 195 GIC 2 : ereport(NOTICE,
941 rhaas 196 ECB : errmsg("skipping tid (%u, %u) for relation \"%s\" because the item number is out of range",
197 : blkno, offno, RelationGetRelationName(rel)));
941 rhaas 198 GIC 2 : continue;
941 rhaas 199 ECB : }
200 :
941 rhaas 201 GIC 11 : itemid = PageGetItemId(page, offno);
941 rhaas 202 ECB :
203 : /* Only accept an item ID that is used. */
941 rhaas 204 GIC 11 : if (ItemIdIsRedirected(itemid))
941 rhaas 205 ECB : {
941 rhaas 206 GIC 1 : ereport(NOTICE,
941 rhaas 207 ECB : errmsg("skipping tid (%u, %u) for relation \"%s\" because it redirects to item %u",
208 : blkno, offno, RelationGetRelationName(rel),
209 : ItemIdGetRedirect(itemid)));
941 rhaas 210 GIC 1 : continue;
941 rhaas 211 ECB : }
941 rhaas 212 GIC 10 : else if (ItemIdIsDead(itemid))
941 rhaas 213 ECB : {
941 rhaas 214 GIC 2 : ereport(NOTICE,
941 rhaas 215 ECB : (errmsg("skipping tid (%u, %u) for relation \"%s\" because it is marked dead",
216 : blkno, offno, RelationGetRelationName(rel))));
941 rhaas 217 GIC 2 : continue;
941 rhaas 218 ECB : }
941 rhaas 219 GIC 8 : else if (!ItemIdIsUsed(itemid))
941 rhaas 220 ECB : {
941 rhaas 221 GIC 1 : ereport(NOTICE,
941 rhaas 222 ECB : (errmsg("skipping tid (%u, %u) for relation \"%s\" because it is marked unused",
223 : blkno, offno, RelationGetRelationName(rel))));
941 rhaas 224 GIC 1 : continue;
941 rhaas 225 ECB : }
226 :
227 : /* Mark it for processing. */
941 rhaas 228 GIC 7 : Assert(offno < MaxHeapTuplesPerPage);
941 rhaas 229 CBC 7 : include_this_tid[offno] = true;
941 rhaas 230 ECB : }
231 :
232 : /*
233 : * Before entering the critical section, pin the visibility map page
234 : * if it appears to be necessary.
235 : */
941 rhaas 236 GIC 11 : if (heap_force_opt == HEAP_FORCE_KILL && PageIsAllVisible(page))
941 rhaas 237 CBC 3 : visibilitymap_pin(rel, blkno, &vmbuf);
941 rhaas 238 ECB :
239 : /* No ereport(ERROR) from here until all the changes are logged. */
941 rhaas 240 GIC 11 : START_CRIT_SECTION();
941 rhaas 241 ECB :
941 rhaas 242 GIC 55 : for (curoff = FirstOffsetNumber; curoff <= maxoffset;
941 rhaas 243 CBC 44 : curoff = OffsetNumberNext(curoff))
941 rhaas 244 ECB : {
245 : ItemId itemid;
246 :
941 rhaas 247 GIC 44 : if (!include_this_tid[curoff])
941 rhaas 248 CBC 37 : continue;
941 rhaas 249 ECB :
941 rhaas 250 GIC 7 : itemid = PageGetItemId(page, curoff);
941 rhaas 251 CBC 7 : Assert(ItemIdIsNormal(itemid));
941 rhaas 252 ECB :
941 rhaas 253 GIC 7 : did_modify_page = true;
941 rhaas 254 ECB :
941 rhaas 255 GIC 7 : if (heap_force_opt == HEAP_FORCE_KILL)
941 rhaas 256 ECB : {
941 rhaas 257 GIC 3 : ItemIdSetDead(itemid);
941 rhaas 258 ECB :
259 : /*
260 : * If the page is marked all-visible, we must clear
261 : * PD_ALL_VISIBLE flag on the page header and an all-visible
262 : * bit on the visibility map corresponding to the page.
263 : */
941 rhaas 264 GIC 3 : if (PageIsAllVisible(page))
941 rhaas 265 ECB : {
941 rhaas 266 GIC 1 : PageClearAllVisible(page);
941 rhaas 267 CBC 1 : visibilitymap_clear(rel, blkno, vmbuf,
941 rhaas 268 ECB : VISIBILITYMAP_VALID_BITS);
941 rhaas 269 GIC 1 : did_modify_vm = true;
941 rhaas 270 ECB : }
271 : }
272 : else
273 : {
274 : HeapTupleHeader htup;
275 :
941 rhaas 276 GIC 4 : Assert(heap_force_opt == HEAP_FORCE_FREEZE);
941 rhaas 277 ECB :
941 rhaas 278 GIC 4 : htup = (HeapTupleHeader) PageGetItem(page, itemid);
941 rhaas 279 ECB :
280 : /*
281 : * Reset all visibility-related fields of the tuple. This
282 : * logic should mimic heap_execute_freeze_tuple(), but we
283 : * choose to reset xmin and ctid just to be sure that no
284 : * potentially-garbled data is left behind.
285 : */
941 rhaas 286 GIC 4 : ItemPointerSet(&htup->t_ctid, blkno, curoff);
941 rhaas 287 CBC 4 : HeapTupleHeaderSetXmin(htup, FrozenTransactionId);
288 4 : HeapTupleHeaderSetXmax(htup, InvalidTransactionId);
289 4 : if (htup->t_infomask & HEAP_MOVED)
941 rhaas 290 ECB : {
941 rhaas 291 UIC 0 : if (htup->t_infomask & HEAP_MOVED_OFF)
941 rhaas 292 UBC 0 : HeapTupleHeaderSetXvac(htup, InvalidTransactionId);
941 rhaas 293 EUB : else
941 rhaas 294 UIC 0 : HeapTupleHeaderSetXvac(htup, FrozenTransactionId);
941 rhaas 295 EUB : }
296 :
297 : /*
298 : * Clear all the visibility-related bits of this tuple and
299 : * mark it as frozen. Also, get rid of HOT_UPDATED and
300 : * KEYS_UPDATES bits.
301 : */
941 rhaas 302 GIC 4 : htup->t_infomask &= ~HEAP_XACT_MASK;
941 rhaas 303 CBC 4 : htup->t_infomask |= (HEAP_XMIN_FROZEN | HEAP_XMAX_INVALID);
304 4 : htup->t_infomask2 &= ~HEAP_HOT_UPDATED;
305 4 : htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
941 rhaas 306 ECB : }
307 : }
308 :
309 : /*
310 : * If the page was modified, only then, we mark the buffer dirty or do
311 : * the WAL logging.
312 : */
941 rhaas 313 GIC 11 : if (did_modify_page)
941 rhaas 314 ECB : {
315 : /* Mark buffer dirty before we write WAL. */
941 rhaas 316 GIC 6 : MarkBufferDirty(buf);
941 rhaas 317 ECB :
318 : /* XLOG stuff */
941 rhaas 319 GIC 6 : if (RelationNeedsWAL(rel))
941 rhaas 320 CBC 2 : log_newpage_buffer(buf, true);
941 rhaas 321 ECB : }
322 :
323 : /* WAL log the VM page if it was modified. */
941 rhaas 324 GIC 11 : if (did_modify_vm && RelationNeedsWAL(rel))
941 rhaas 325 LBC 0 : log_newpage_buffer(vmbuf, false);
941 rhaas 326 EUB :
941 rhaas 327 GIC 11 : END_CRIT_SECTION();
941 rhaas 328 ECB :
941 rhaas 329 GIC 11 : UnlockReleaseBuffer(buf);
941 rhaas 330 ECB :
941 rhaas 331 GIC 11 : if (vmbuf != InvalidBuffer)
941 rhaas 332 CBC 3 : ReleaseBuffer(vmbuf);
941 rhaas 333 ECB :
334 : /* Update the current_start_ptr before moving to the next page. */
941 rhaas 335 GIC 11 : curr_start_ptr = next_start_ptr;
941 rhaas 336 ECB : }
337 :
941 rhaas 338 GIC 12 : relation_close(rel, RowExclusiveLock);
941 rhaas 339 ECB :
941 rhaas 340 GIC 12 : pfree(ta);
941 rhaas 341 ECB :
941 rhaas 342 GIC 12 : PG_RETURN_VOID();
941 rhaas 343 ECB : }
344 :
345 : /*-------------------------------------------------------------------------
346 : * tidcmp()
347 : *
348 : * Compare two item pointers, return -1, 0, or +1.
349 : *
350 : * See ItemPointerCompare for details.
351 : * ------------------------------------------------------------------------
352 : */
353 : static int32
941 rhaas 354 GIC 3 : tidcmp(const void *a, const void *b)
941 rhaas 355 ECB : {
941 rhaas 356 GIC 3 : ItemPointer iptr1 = ((const ItemPointer) a);
941 rhaas 357 CBC 3 : ItemPointer iptr2 = ((const ItemPointer) b);
941 rhaas 358 ECB :
941 rhaas 359 GIC 3 : return ItemPointerCompare(iptr1, iptr2);
941 rhaas 360 ECB : }
361 :
362 : /*-------------------------------------------------------------------------
363 : * sanity_check_tid_array()
364 : *
365 : * Perform sanity checks on the given tid array, and set *ntids to the
366 : * number of items in the array.
367 : * ------------------------------------------------------------------------
368 : */
369 : static void
941 rhaas 370 GIC 16 : sanity_check_tid_array(ArrayType *ta, int *ntids)
941 rhaas 371 ECB : {
941 rhaas 372 GIC 16 : if (ARR_HASNULL(ta) && array_contains_nulls(ta))
941 rhaas 373 CBC 1 : ereport(ERROR,
941 rhaas 374 ECB : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
375 : errmsg("array must not contain nulls")));
376 :
941 rhaas 377 GIC 15 : if (ARR_NDIM(ta) > 1)
941 rhaas 378 CBC 1 : ereport(ERROR,
941 rhaas 379 ECB : (errcode(ERRCODE_DATA_EXCEPTION),
380 : errmsg("argument must be empty or one-dimensional array")));
381 :
941 rhaas 382 GIC 14 : *ntids = ArrayGetNItems(ARR_NDIM(ta), ARR_DIMS(ta));
941 rhaas 383 CBC 14 : }
941 rhaas 384 ECB :
385 : /*-------------------------------------------------------------------------
386 : * find_tids_one_page()
387 : *
388 : * Find all the tids residing in the same page as tids[next_start_ptr], and
389 : * update next_start_ptr so that it points to the first tid in the next page.
390 : *
391 : * NOTE: The input tids[] array must be sorted.
392 : * ------------------------------------------------------------------------
393 : */
394 : static BlockNumber
941 rhaas 395 GIC 12 : find_tids_one_page(ItemPointer tids, int ntids, OffsetNumber *next_start_ptr)
941 rhaas 396 ECB : {
397 : int i;
398 : BlockNumber prev_blkno,
399 : blkno;
400 :
941 rhaas 401 GIC 12 : prev_blkno = blkno = InvalidBlockNumber;
941 rhaas 402 ECB :
941 rhaas 403 GIC 26 : for (i = *next_start_ptr; i < ntids; i++)
941 rhaas 404 ECB : {
941 rhaas 405 GIC 15 : ItemPointerData tid = tids[i];
941 rhaas 406 ECB :
941 rhaas 407 GIC 15 : blkno = ItemPointerGetBlockNumberNoCheck(&tid);
941 rhaas 408 ECB :
941 rhaas 409 GIC 15 : if (i == *next_start_ptr)
941 rhaas 410 CBC 12 : prev_blkno = blkno;
941 rhaas 411 ECB :
941 rhaas 412 GIC 15 : if (prev_blkno != blkno)
941 rhaas 413 CBC 1 : break;
941 rhaas 414 ECB : }
415 :
941 rhaas 416 GIC 12 : *next_start_ptr = i;
941 rhaas 417 CBC 12 : return prev_blkno;
941 rhaas 418 ECB : }
|