Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * heapam_handler.c
4 : * heap table access method code
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/heap/heapam_handler.c
12 : *
13 : *
14 : * NOTES
15 : * This files wires up the lower level heapam.c et al routines with the
16 : * tableam abstraction.
17 : *
18 : *-------------------------------------------------------------------------
19 : */
20 : #include "postgres.h"
21 :
22 : #include "access/genam.h"
23 : #include "access/heapam.h"
24 : #include "access/heaptoast.h"
25 : #include "access/multixact.h"
26 : #include "access/rewriteheap.h"
27 : #include "access/syncscan.h"
28 : #include "access/tableam.h"
29 : #include "access/tsmapi.h"
30 : #include "access/xact.h"
31 : #include "catalog/catalog.h"
32 : #include "catalog/index.h"
33 : #include "catalog/storage.h"
34 : #include "catalog/storage_xlog.h"
35 : #include "commands/progress.h"
36 : #include "executor/executor.h"
37 : #include "miscadmin.h"
38 : #include "pgstat.h"
39 : #include "storage/bufmgr.h"
40 : #include "storage/bufpage.h"
41 : #include "storage/lmgr.h"
42 : #include "storage/predicate.h"
43 : #include "storage/procarray.h"
44 : #include "storage/smgr.h"
45 : #include "utils/builtins.h"
46 : #include "utils/rel.h"
47 :
48 : static void reform_and_rewrite_tuple(HeapTuple tuple,
49 : Relation OldHeap, Relation NewHeap,
50 : Datum *values, bool *isnull, RewriteState rwstate);
51 :
52 : static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
53 : HeapTuple tuple,
54 : OffsetNumber tupoffset);
55 :
56 : static BlockNumber heapam_scan_get_blocks_done(HeapScanDesc hscan);
57 :
58 : static const TableAmRoutine heapam_methods;
59 :
60 :
61 : /* ------------------------------------------------------------------------
62 : * Slot related callbacks for heap AM
63 : * ------------------------------------------------------------------------
64 : */
65 :
66 : static const TupleTableSlotOps *
1490 andres 67 CBC 15838912 : heapam_slot_callbacks(Relation relation)
68 : {
69 15838912 : return &TTSOpsBufferHeapTuple;
70 : }
71 :
72 :
73 : /* ------------------------------------------------------------------------
74 : * Index Scan Callbacks for heap AM
75 : * ------------------------------------------------------------------------
76 : */
77 :
78 : static IndexFetchTableData *
79 14712661 : heapam_index_fetch_begin(Relation rel)
80 : {
81 14712661 : IndexFetchHeapData *hscan = palloc0(sizeof(IndexFetchHeapData));
82 :
83 14712661 : hscan->xs_base.rel = rel;
84 14712661 : hscan->xs_cbuf = InvalidBuffer;
85 :
86 14712661 : return &hscan->xs_base;
87 : }
88 :
89 : static void
90 29040280 : heapam_index_fetch_reset(IndexFetchTableData *scan)
91 : {
92 29040280 : IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
93 :
94 29040280 : if (BufferIsValid(hscan->xs_cbuf))
95 : {
96 10792610 : ReleaseBuffer(hscan->xs_cbuf);
97 10792610 : hscan->xs_cbuf = InvalidBuffer;
98 : }
99 29040280 : }
100 :
101 : static void
102 14711980 : heapam_index_fetch_end(IndexFetchTableData *scan)
103 : {
104 14711980 : IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
105 :
106 14711980 : heapam_index_fetch_reset(scan);
107 :
108 14711980 : pfree(hscan);
109 14711980 : }
110 :
111 : static bool
112 21147270 : heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
113 : ItemPointer tid,
114 : Snapshot snapshot,
115 : TupleTableSlot *slot,
116 : bool *call_again, bool *all_dead)
117 : {
118 21147270 : IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
119 21147270 : BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
120 : bool got_heap_tuple;
121 :
122 21147270 : Assert(TTS_IS_BUFFERTUPLE(slot));
123 :
124 : /* We can skip the buffer-switching logic if we're in mid-HOT chain. */
125 21147270 : if (!*call_again)
126 : {
127 : /* Switch to correct buffer if we don't have it already */
128 20973478 : Buffer prev_buf = hscan->xs_cbuf;
129 :
130 20973478 : hscan->xs_cbuf = ReleaseAndReadBuffer(hscan->xs_cbuf,
131 : hscan->xs_base.rel,
132 : ItemPointerGetBlockNumber(tid));
133 :
134 : /*
135 : * Prune page, but only if we weren't already on this page
136 : */
137 20973475 : if (prev_buf != hscan->xs_cbuf)
138 13036061 : heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf);
139 : }
140 :
141 : /* Obtain share-lock on the buffer so we can examine visibility */
142 21147267 : LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_SHARE);
143 21147267 : got_heap_tuple = heap_hot_search_buffer(tid,
144 : hscan->xs_base.rel,
145 : hscan->xs_cbuf,
146 : snapshot,
147 : &bslot->base.tupdata,
148 : all_dead,
149 21147267 : !*call_again);
150 21147265 : bslot->base.tupdata.t_self = *tid;
151 21147265 : LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_UNLOCK);
152 :
153 21147265 : if (got_heap_tuple)
154 : {
155 : /*
156 : * Only in a non-MVCC snapshot can more than one member of the HOT
157 : * chain be visible.
158 : */
159 14679054 : *call_again = !IsMVCCSnapshot(snapshot);
160 :
161 14679054 : slot->tts_tableOid = RelationGetRelid(scan->rel);
162 14679054 : ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf);
163 : }
164 : else
165 : {
166 : /* We've reached the end of the HOT chain. */
167 6468211 : *call_again = false;
168 : }
169 :
170 21147265 : return got_heap_tuple;
171 : }
172 :
173 :
174 : /* ------------------------------------------------------------------------
175 : * Callbacks for non-modifying operations on individual tuples for heap AM
176 : * ------------------------------------------------------------------------
177 : */
178 :
179 : static bool
1476 180 195976 : heapam_fetch_row_version(Relation relation,
181 : ItemPointer tid,
182 : Snapshot snapshot,
183 : TupleTableSlot *slot)
184 : {
185 195976 : BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
186 : Buffer buffer;
187 :
188 195976 : Assert(TTS_IS_BUFFERTUPLE(slot));
189 :
190 195976 : bslot->base.tupdata.t_self = *tid;
361 tgl 191 195976 : if (heap_fetch(relation, snapshot, &bslot->base.tupdata, &buffer, false))
192 : {
193 : /* store in slot, transferring existing pin */
1476 andres 194 195961 : ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, slot, buffer);
195 195961 : slot->tts_tableOid = RelationGetRelid(relation);
196 :
197 195961 : return true;
198 : }
199 :
200 15 : return false;
201 : }
202 :
203 : static bool
1423 204 292 : heapam_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
205 : {
206 292 : HeapScanDesc hscan = (HeapScanDesc) scan;
207 :
208 584 : return ItemPointerIsValid(tid) &&
209 292 : ItemPointerGetBlockNumber(tid) < hscan->rs_nblocks;
210 : }
211 :
212 : static bool
1490 213 91614 : heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
214 : Snapshot snapshot)
215 : {
216 91614 : BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
217 : bool res;
218 :
219 91614 : Assert(TTS_IS_BUFFERTUPLE(slot));
220 91614 : Assert(BufferIsValid(bslot->buffer));
221 :
222 : /*
223 : * We need buffer pin and lock to call HeapTupleSatisfiesVisibility.
224 : * Caller should be holding pin, but not lock.
225 : */
226 91614 : LockBuffer(bslot->buffer, BUFFER_LOCK_SHARE);
227 91614 : res = HeapTupleSatisfiesVisibility(bslot->base.tuple, snapshot,
228 : bslot->buffer);
229 91614 : LockBuffer(bslot->buffer, BUFFER_LOCK_UNLOCK);
230 :
231 91614 : return res;
232 : }
233 :
234 :
235 : /* ----------------------------------------------------------------------------
236 : * Functions for manipulations of physical tuples for heap AM.
237 : * ----------------------------------------------------------------------------
238 : */
239 :
240 : static void
1478 241 7135630 : heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid,
242 : int options, BulkInsertState bistate)
243 : {
244 7135630 : bool shouldFree = true;
245 7135630 : HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
246 :
247 : /* Update the tuple with table oid */
248 7135630 : slot->tts_tableOid = RelationGetRelid(relation);
249 7135630 : tuple->t_tableOid = slot->tts_tableOid;
250 :
251 : /* Perform the insertion, and copy the resulting ItemPointer */
252 7135630 : heap_insert(relation, tuple, cid, options, bistate);
253 7135618 : ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
254 :
255 7135618 : if (shouldFree)
256 1357341 : pfree(tuple);
257 7135618 : }
258 :
259 : static void
1471 260 2013 : heapam_tuple_insert_speculative(Relation relation, TupleTableSlot *slot,
261 : CommandId cid, int options,
262 : BulkInsertState bistate, uint32 specToken)
263 : {
1478 264 2013 : bool shouldFree = true;
265 2013 : HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
266 :
267 : /* Update the tuple with table oid */
268 2013 : slot->tts_tableOid = RelationGetRelid(relation);
269 2013 : tuple->t_tableOid = slot->tts_tableOid;
270 :
271 2013 : HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
272 2013 : options |= HEAP_INSERT_SPECULATIVE;
273 :
274 : /* Perform the insertion, and copy the resulting ItemPointer */
275 2013 : heap_insert(relation, tuple, cid, options, bistate);
276 2013 : ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
277 :
278 2013 : if (shouldFree)
279 30 : pfree(tuple);
280 2013 : }
281 :
282 : static void
1471 283 2010 : heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot,
284 : uint32 specToken, bool succeeded)
285 : {
1478 286 2010 : bool shouldFree = true;
287 2010 : HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
288 :
289 : /* adjust the tuple's state accordingly */
1426 290 2010 : if (succeeded)
1478 291 2005 : heap_finish_speculative(relation, &slot->tts_tid);
292 : else
293 5 : heap_abort_speculative(relation, &slot->tts_tid);
294 :
295 2010 : if (shouldFree)
296 30 : pfree(tuple);
297 2010 : }
298 :
299 : static TM_Result
300 883906 : heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
301 : Snapshot snapshot, Snapshot crosscheck, bool wait,
302 : TM_FailureData *tmfd, bool changingPart)
303 : {
304 : /*
305 : * Currently Deleting of index tuples are handled at vacuum, in case if
306 : * the storage itself is cleaning the dead tuples by itself, it is the
307 : * time to call the index tuple deletion also.
308 : */
6 akorotkov 309 883906 : return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
310 : }
311 :
312 :
313 : static TM_Result
1478 andres 314 217778 : heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
315 : CommandId cid, Snapshot snapshot, Snapshot crosscheck,
316 : bool wait, TM_FailureData *tmfd,
317 : LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
318 : {
319 217778 : bool shouldFree = true;
320 217778 : HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
321 : TM_Result result;
322 :
323 : /* Update the tuple with table oid */
324 217778 : slot->tts_tableOid = RelationGetRelid(relation);
325 217778 : tuple->t_tableOid = slot->tts_tableOid;
326 :
327 217778 : result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
328 : tmfd, lockmode, update_indexes);
329 217766 : ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
330 :
331 : /*
332 : * Decide whether new index entries are needed for the tuple
333 : *
334 : * Note: heap_update returns the tid (location) of the new tuple in the
335 : * t_self field.
336 : *
337 : * If the update is not HOT, we must update all indexes. If the update
338 : * is HOT, it could be that we updated summarized columns, so we either
339 : * update only summarized indexes, or none at all.
340 : */
20 tomas.vondra 341 GNC 217766 : if (result != TM_Ok)
342 : {
343 114 : Assert(*update_indexes == TU_None);
344 114 : *update_indexes = TU_None;
345 : }
346 217652 : else if (!HeapTupleIsHeapOnly(tuple))
347 149288 : Assert(*update_indexes == TU_All);
348 : else
349 68364 : Assert((*update_indexes == TU_Summarizing) ||
350 : (*update_indexes == TU_None));
351 :
1478 andres 352 CBC 217766 : if (shouldFree)
1478 andres 353 GIC 31926 : pfree(tuple);
1478 andres 354 ECB :
1478 andres 355 CBC 217766 : return result;
356 : }
1478 andres 357 ECB :
358 : static TM_Result
1478 andres 359 GIC 82371 : heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
1478 andres 360 ECB : TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
361 : LockWaitPolicy wait_policy, uint8 flags,
362 : TM_FailureData *tmfd)
363 : {
1478 andres 364 CBC 82371 : BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
365 : TM_Result result;
6 akorotkov 366 ECB : Buffer buffer;
1478 andres 367 GIC 82371 : HeapTuple tuple = &bslot->base.tupdata;
368 : bool follow_updates;
369 :
1478 andres 370 CBC 82371 : follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0;
1478 andres 371 GIC 82371 : tmfd->traversed = false;
372 :
373 82371 : Assert(TTS_IS_BUFFERTUPLE(slot));
374 :
1478 andres 375 CBC 82371 : tuple_lock_retry:
1478 andres 376 GIC 82482 : tuple->t_self = *tid;
6 akorotkov 377 82482 : result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
6 akorotkov 378 ECB : follow_updates, &buffer, tmfd);
379 :
1478 andres 380 GIC 82473 : if (result == TM_Updated &&
1478 andres 381 CBC 143 : (flags & TUPLE_LOCK_FLAG_FIND_LAST_VERSION))
1478 andres 382 ECB : {
383 : /* Should not encounter speculative tuple on recheck */
6 akorotkov 384 CBC 130 : Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data));
385 :
386 130 : ReleaseBuffer(buffer);
994 pg 387 ECB :
1478 andres 388 CBC 130 : if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
389 : {
390 : SnapshotData SnapshotDirty;
1478 andres 391 ECB : TransactionId priorXmax;
392 :
393 : /* it was updated, so look at the updated version */
1478 andres 394 GIC 130 : *tid = tmfd->ctid;
1478 andres 395 ECB : /* updated row should have xmin matching this xmax */
1478 andres 396 GIC 130 : priorXmax = tmfd->xmax;
1478 andres 397 ECB :
398 : /* signal that a tuple later in the chain is getting locked */
1478 andres 399 CBC 130 : tmfd->traversed = true;
400 :
401 : /*
402 : * fetch target tuple
403 : *
404 : * Loop here to deal with updated or busy tuples
1478 andres 405 ECB : */
1478 andres 406 GIC 130 : InitDirtySnapshot(SnapshotDirty);
1478 andres 407 ECB : for (;;)
408 : {
1478 andres 409 GIC 154 : if (ItemPointerIndicatesMovedPartitions(tid))
1478 andres 410 CBC 9 : ereport(ERROR,
411 : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
412 : errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
413 :
1478 andres 414 GIC 145 : tuple->t_self = *tid;
361 tgl 415 145 : if (heap_fetch(relation, &SnapshotDirty, tuple, &buffer, true))
416 : {
1478 andres 417 ECB : /*
418 : * If xmin isn't what we're expecting, the slot must have
419 : * been recycled and reused for an unrelated tuple. This
420 : * implies that the latest version of the row was deleted,
421 : * so we need do nothing. (Should be safe to examine xmin
422 : * without getting buffer's content lock. We assume
423 : * reading a TransactionId to be atomic, and Xmin never
424 : * changes in an existing tuple, except to invalid or
425 : * frozen, and neither of those can match priorXmax.)
426 : */
1478 andres 427 GIC 118 : if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
428 : priorXmax))
429 : {
1478 andres 430 UIC 0 : ReleaseBuffer(buffer);
1478 andres 431 GIC 9 : return TM_Deleted;
432 : }
433 :
434 : /* otherwise xmin should not be dirty... */
435 118 : if (TransactionIdIsValid(SnapshotDirty.xmin))
1347 peter 436 UIC 0 : ereport(ERROR,
437 : (errcode(ERRCODE_DATA_CORRUPTED),
587 alvherre 438 ECB : errmsg_internal("t_xmin %u is uncommitted in tuple (%u,%u) to be updated in table \"%s\"",
439 : SnapshotDirty.xmin,
440 : ItemPointerGetBlockNumber(&tuple->t_self),
587 alvherre 441 EUB : ItemPointerGetOffsetNumber(&tuple->t_self),
587 alvherre 442 ECB : RelationGetRelationName(relation))));
443 :
444 : /*
445 : * If tuple is being updated by other transaction then we
1478 andres 446 : * have to wait for its commit/abort, or die trying.
1478 andres 447 EUB : */
1478 andres 448 GIC 118 : if (TransactionIdIsValid(SnapshotDirty.xmax))
449 : {
450 2 : ReleaseBuffer(buffer);
451 2 : switch (wait_policy)
452 : {
1478 andres 453 UIC 0 : case LockWaitBlock:
454 0 : XactLockTableWait(SnapshotDirty.xmax,
455 : relation, &tuple->t_self,
456 : XLTW_FetchUpdated);
457 0 : break;
1478 andres 458 GIC 1 : case LockWaitSkip:
1478 andres 459 CBC 1 : if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
460 : /* skip instead of waiting */
461 1 : return TM_WouldBlock;
1478 andres 462 LBC 0 : break;
1478 andres 463 GIC 1 : case LockWaitError:
1478 andres 464 GBC 1 : if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
465 1 : ereport(ERROR,
466 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
467 : errmsg("could not obtain lock on row in relation \"%s\"",
1478 andres 468 EUB : RelationGetRelationName(relation))));
1478 andres 469 LBC 0 : break;
1478 andres 470 ECB : }
1478 andres 471 UIC 0 : continue; /* loop back to repeat heap_fetch */
1478 andres 472 ECB : }
1478 andres 473 EUB :
1478 andres 474 ECB : /*
475 : * If tuple was inserted by our own transaction, we have
476 : * to check cmin against cid: cmin >= current CID means
477 : * our command cannot see the tuple, so we should ignore
478 : * it. Otherwise heap_lock_tuple() will throw an error,
479 : * and so would any later attempt to update or delete the
1478 andres 480 EUB : * tuple. (We need not check cmax because
481 : * HeapTupleSatisfiesDirty will consider a tuple deleted
482 : * by our transaction dead, regardless of cmax.) We just
483 : * checked that priorXmax == xmin, so we can test that
484 : * variable instead of doing HeapTupleHeaderGetXmin again.
485 : */
1478 andres 486 GIC 121 : if (TransactionIdIsCurrentTransactionId(priorXmax) &&
487 5 : HeapTupleHeaderGetCmin(tuple->t_data) >= cid)
488 : {
1463 489 5 : tmfd->xmax = priorXmax;
490 :
491 : /*
492 : * Cmin is the problematic value, so store that. See
493 : * above.
494 : */
495 5 : tmfd->cmax = HeapTupleHeaderGetCmin(tuple->t_data);
1478 496 5 : ReleaseBuffer(buffer);
1463 andres 497 CBC 5 : return TM_SelfModified;
1478 andres 498 ECB : }
499 :
500 : /*
501 : * This is a live tuple, so try to lock it again.
502 : */
1478 andres 503 GIC 111 : ReleaseBuffer(buffer);
504 111 : goto tuple_lock_retry;
505 : }
1478 andres 506 ECB :
507 : /*
508 : * If the referenced slot was actually empty, the latest
509 : * version of the row must have been deleted, so we need do
510 : * nothing.
511 : */
1478 andres 512 GIC 27 : if (tuple->t_data == NULL)
513 : {
361 tgl 514 LBC 0 : Assert(!BufferIsValid(buffer));
1478 andres 515 0 : return TM_Deleted;
516 : }
517 :
518 : /*
519 : * As above, if xmin isn't what we're expecting, do nothing.
520 : */
1478 andres 521 GIC 27 : if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
522 : priorXmax))
1478 andres 523 ECB : {
361 tgl 524 UIC 0 : ReleaseBuffer(buffer);
1478 andres 525 UBC 0 : return TM_Deleted;
1478 andres 526 EUB : }
527 :
528 : /*
529 : * If we get here, the tuple was found but failed
530 : * SnapshotDirty. Assuming the xmin is either a committed xact
531 : * or our own xact (as it certainly should be if we're trying
1478 andres 532 ECB : * to modify the tuple), this must mean that the row was
533 : * updated or deleted by either a committed xact or our own
534 : * xact. If it was deleted, we can ignore it; if it was
1478 andres 535 EUB : * updated then chain up to the next version and repeat the
536 : * whole process.
537 : *
538 : * As above, it should be safe to examine xmax and t_ctid
539 : * without the buffer content lock, because they can't be
540 : * changing. We'd better hold a buffer pin though.
541 : */
1478 andres 542 GIC 27 : if (ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
543 : {
544 : /* deleted, so forget about it */
361 tgl 545 3 : ReleaseBuffer(buffer);
1478 andres 546 3 : return TM_Deleted;
547 : }
548 :
549 : /* updated, so look at the updated row */
550 24 : *tid = tuple->t_data->t_ctid;
551 : /* updated row should have xmin matching this xmax */
552 24 : priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
361 tgl 553 CBC 24 : ReleaseBuffer(buffer);
554 : /* loop back to fetch next in chain */
555 : }
1478 andres 556 ECB : }
557 : else
558 : {
559 : /* tuple was deleted, so give up */
1478 andres 560 UIC 0 : return TM_Deleted;
1478 andres 561 ECB : }
562 : }
563 :
1478 andres 564 CBC 82343 : slot->tts_tableOid = RelationGetRelid(relation);
1478 andres 565 GIC 82343 : tuple->t_tableOid = slot->tts_tableOid;
566 :
567 : /* store in slot, transferring existing pin */
568 82343 : ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
569 :
570 82343 : return result;
1478 andres 571 EUB : }
572 :
573 :
574 : /* ------------------------------------------------------------------------
1474 andres 575 ECB : * DDL related callbacks for heap AM.
576 : * ------------------------------------------------------------------------
577 : */
578 :
1473 579 : static void
277 rhaas 580 GNC 55537 : heapam_relation_set_new_filelocator(Relation rel,
581 : const RelFileLocator *newrlocator,
582 : char persistence,
583 : TransactionId *freezeXid,
584 : MultiXactId *minmulti)
585 : {
586 : SMgrRelation srel;
587 :
588 : /*
589 : * Initialize to the minimum XID that could put tuples in the table. We
590 : * know that no xacts older than RecentXmin are still running, so that
1473 andres 591 ECB : * will do.
592 : */
1473 andres 593 GIC 55537 : *freezeXid = RecentXmin;
594 :
595 : /*
596 : * Similarly, initialize the minimum Multixact to the first value that
597 : * could possibly be stored in tuples in the table. Running transactions
598 : * could reuse values from their local cache, so we are careful to
599 : * consider all currently running multis.
600 : *
601 : * XXX this could be refined further, but is it worth the hassle?
602 : */
603 55537 : *minmulti = GetOldestMultiXactId();
1473 andres 604 ECB :
277 rhaas 605 GNC 55537 : srel = RelationCreateStorage(*newrlocator, persistence, true);
606 :
607 : /*
608 : * If required, set up an init fork for an unlogged table so that it can
609 : * be correctly reinitialized on restart. An immediate sync is required
610 : * even if the page has been logged, because the write did not go through
611 : * shared_buffers and therefore a concurrent checkpoint may have moved the
612 : * redo pointer past our xlog record. Recovery may as well remove it
613 : * while replaying, for example, XLOG_DBASE_CREATE* or XLOG_TBLSPC_CREATE
1473 andres 614 ECB : * record. Therefore, logging is necessary even if wal_level=minimal.
615 : */
1441 andres 616 CBC 55537 : if (persistence == RELPERSISTENCE_UNLOGGED)
617 : {
1473 andres 618 GIC 103 : Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
619 : rel->rd_rel->relkind == RELKIND_MATVIEW ||
620 : rel->rd_rel->relkind == RELKIND_TOASTVALUE);
1441 621 103 : smgrcreate(srel, INIT_FORKNUM, false);
277 rhaas 622 GNC 103 : log_smgrcreate(newrlocator, INIT_FORKNUM);
1441 andres 623 GIC 103 : smgrimmedsync(srel, INIT_FORKNUM);
624 : }
625 :
626 55537 : smgrclose(srel);
1473 andres 627 CBC 55537 : }
628 :
1473 andres 629 ECB : static void
1473 andres 630 GIC 262 : heapam_relation_nontransactional_truncate(Relation rel)
631 : {
1473 andres 632 CBC 262 : RelationTruncate(rel, 0);
633 262 : }
1473 andres 634 ECB :
635 : static void
277 rhaas 636 GNC 49 : heapam_relation_copy_data(Relation rel, const RelFileLocator *newrlocator)
1473 andres 637 ECB : {
638 : SMgrRelation dstrel;
639 :
277 rhaas 640 GNC 49 : dstrel = smgropen(*newrlocator, rel->rd_backend);
1473 andres 641 ECB :
642 : /*
1441 643 : * Since we copy the file directly without looking at the shared buffers,
644 : * we'd better first flush out any pages of the source relation that are
645 : * in shared buffers. We assume no new changes will be made while we are
646 : * holding exclusive lock on the rel.
647 : */
1441 andres 648 GIC 49 : FlushRelationBuffers(rel);
649 :
650 : /*
1473 andres 651 ECB : * Create and copy all forks of the relation, and schedule unlinking of
652 : * old physical files.
653 : *
654 : * NOTE: any conflict in relfilenumber value will be caught in
655 : * RelationCreateStorage().
656 : */
277 rhaas 657 GNC 49 : RelationCreateStorage(*newrlocator, rel->rd_rel->relpersistence, true);
658 :
1473 andres 659 ECB : /* copy main fork */
636 tgl 660 GIC 49 : RelationCopyStorage(RelationGetSmgr(rel), dstrel, MAIN_FORKNUM,
1473 andres 661 49 : rel->rd_rel->relpersistence);
662 :
663 : /* copy those extra forks that exist */
664 49 : for (ForkNumber forkNum = MAIN_FORKNUM + 1;
665 196 : forkNum <= MAX_FORKNUM; forkNum++)
666 : {
636 tgl 667 147 : if (smgrexists(RelationGetSmgr(rel), forkNum))
1473 andres 668 ECB : {
1473 andres 669 GIC 6 : smgrcreate(dstrel, forkNum, false);
670 :
1473 andres 671 ECB : /*
672 : * WAL log creation if the relation is persistent, or this is the
673 : * init fork of an unlogged relation.
674 : */
748 bruce 675 CBC 6 : if (RelationIsPermanent(rel) ||
1473 andres 676 3 : (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
677 : forkNum == INIT_FORKNUM))
277 rhaas 678 GNC 3 : log_smgrcreate(newrlocator, forkNum);
636 tgl 679 GIC 6 : RelationCopyStorage(RelationGetSmgr(rel), dstrel, forkNum,
1473 andres 680 CBC 6 : rel->rd_rel->relpersistence);
681 : }
682 : }
683 :
684 :
685 : /* drop old relation, and close new one */
686 49 : RelationDropStorage(rel);
687 49 : smgrclose(dstrel);
1473 andres 688 GIC 49 : }
1473 andres 689 ECB :
690 : static void
1473 andres 691 CBC 262 : heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
692 : Relation OldIndex, bool use_sort,
693 : TransactionId OldestXmin,
694 : TransactionId *xid_cutoff,
695 : MultiXactId *multi_cutoff,
696 : double *num_tuples,
1473 andres 697 ECB : double *tups_vacuumed,
698 : double *tups_recently_dead)
699 : {
700 : RewriteState rwstate;
701 : IndexScanDesc indexScan;
702 : TableScanDesc tableScan;
703 : HeapScanDesc heapScan;
704 : bool is_system_catalog;
705 : Tuplesortstate *tuplesort;
1473 andres 706 GIC 262 : TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
707 262 : TupleDesc newTupDesc = RelationGetDescr(NewHeap);
708 : TupleTableSlot *slot;
709 : int natts;
710 : Datum *values;
711 : bool *isnull;
712 : BufferHeapTupleTableSlot *hslot;
863 fujii 713 262 : BlockNumber prev_cblock = InvalidBlockNumber;
714 :
715 : /* Remember if it's a system catalog */
1473 andres 716 262 : is_system_catalog = IsSystemRelation(OldHeap);
1473 andres 717 ECB :
718 : /*
719 : * Valid smgr_targblock implies something already wrote to the relation.
720 : * This may be harmless, but this function hasn't planned for it.
721 : */
1473 andres 722 GIC 262 : Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber);
723 :
1473 andres 724 ECB : /* Preallocate values/isnull arrays */
1473 andres 725 GIC 262 : natts = newTupDesc->natts;
726 262 : values = (Datum *) palloc(natts * sizeof(Datum));
1473 andres 727 CBC 262 : isnull = (bool *) palloc(natts * sizeof(bool));
728 :
729 : /* Initialize the rewrite operation */
1447 andres 730 GIC 262 : rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, *xid_cutoff,
731 : *multi_cutoff);
732 :
1473 andres 733 ECB :
734 : /*
735 : * Set up sorting if wanted. NewHeap is being passed to
736 : * tuplesort_begin_cluster(), it could have been OldHeap too. It does not
737 : * really matter, as the goal is to have a heap relation being passed to
738 : * _bt_log_reuse_page() (which should not be called from this code path).
739 : */
1473 andres 740 GIC 262 : if (use_sort)
8 andres 741 GNC 50 : tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex, NewHeap,
1473 andres 742 ECB : maintenance_work_mem,
370 drowley 743 : NULL, TUPLESORT_NONE);
744 : else
1473 andres 745 GIC 212 : tuplesort = NULL;
1473 andres 746 ECB :
747 : /*
748 : * Prepare to scan the OldHeap. To ensure we see recently-dead tuples
749 : * that still need to be copied, we scan with SnapshotAny and use
750 : * HeapTupleSatisfiesVacuum for the visibility test.
751 : */
1473 andres 752 GIC 262 : if (OldIndex != NULL && !use_sort)
753 48 : {
1471 754 48 : const int ci_index[] = {
755 : PROGRESS_CLUSTER_PHASE,
1473 andres 756 ECB : PROGRESS_CLUSTER_INDEX_RELID
757 : };
758 : int64 ci_val[2];
759 :
760 : /* Set phase and OIDOldIndex to columns */
1473 andres 761 CBC 48 : ci_val[0] = PROGRESS_CLUSTER_PHASE_INDEX_SCAN_HEAP;
1473 andres 762 GIC 48 : ci_val[1] = RelationGetRelid(OldIndex);
763 48 : pgstat_progress_update_multi_param(2, ci_index, ci_val);
764 :
765 48 : tableScan = NULL;
766 48 : heapScan = NULL;
767 48 : indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0);
1473 andres 768 CBC 48 : index_rescan(indexScan, NULL, 0, NULL, 0);
1473 andres 769 ECB : }
770 : else
771 : {
772 : /* In scan-and-sort mode and also VACUUM FULL, set phase */
1473 andres 773 GIC 214 : pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
774 : PROGRESS_CLUSTER_PHASE_SEQ_SCAN_HEAP);
775 :
776 214 : tableScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
1473 andres 777 CBC 214 : heapScan = (HeapScanDesc) tableScan;
778 214 : indexScan = NULL;
1473 andres 779 ECB :
780 : /* Set total heap blocks */
1473 andres 781 CBC 214 : pgstat_progress_update_param(PROGRESS_CLUSTER_TOTAL_HEAP_BLKS,
782 214 : heapScan->rs_nblocks);
1473 andres 783 ECB : }
784 :
1473 andres 785 GIC 262 : slot = table_slot_create(OldHeap, NULL);
786 262 : hslot = (BufferHeapTupleTableSlot *) slot;
787 :
788 : /*
1473 andres 789 ECB : * Scan through the OldHeap, either in OldIndex order or sequentially;
790 : * copy each tuple into the NewHeap, or transiently to the tuplesort
791 : * module. Note that we don't bother sorting dead tuples (they won't get
792 : * to the new table anyway).
793 : */
794 : for (;;)
1473 andres 795 GIC 396038 : {
796 : HeapTuple tuple;
1473 andres 797 ECB : Buffer buf;
798 : bool isdead;
799 :
1473 andres 800 GIC 396300 : CHECK_FOR_INTERRUPTS();
1473 andres 801 ECB :
1473 andres 802 CBC 396300 : if (indexScan != NULL)
803 : {
1473 andres 804 GIC 102 : if (!index_getnext_slot(indexScan, ForwardScanDirection, slot))
805 48 : break;
806 :
807 : /* Since we used no scan keys, should never need to recheck */
808 54 : if (indexScan->xs_recheck)
1473 andres 809 UIC 0 : elog(ERROR, "CLUSTER does not support lossy index conditions");
810 : }
1473 andres 811 ECB : else
812 : {
1473 andres 813 GIC 396198 : if (!table_scan_getnextslot(tableScan, ForwardScanDirection, slot))
814 : {
815 : /*
863 fujii 816 ECB : * If the last pages of the scan were empty, we would go to
817 : * the next phase while heap_blks_scanned != heap_blks_total.
818 : * Instead, to ensure that heap_blks_scanned is equivalent to
819 : * total_heap_blks after the table scan phase, this parameter
820 : * is manually updated to the correct value when the table
821 : * scan finishes.
822 : */
863 fujii 823 GIC 214 : pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_BLKS_SCANNED,
863 fujii 824 CBC 214 : heapScan->rs_nblocks);
1473 andres 825 GBC 214 : break;
826 : }
827 :
828 : /*
1471 andres 829 ECB : * In scan-and-sort mode and also VACUUM FULL, set heap blocks
830 : * scanned
831 : *
832 : * Note that heapScan may start at an offset and wrap around, i.e.
833 : * rs_startblock may be >0, and rs_cblock may end with a number
834 : * below rs_startblock. To prevent showing this wraparound to the
835 : * user, we offset rs_cblock by rs_startblock (modulo rs_nblocks).
836 : */
863 fujii 837 GIC 395984 : if (prev_cblock != heapScan->rs_cblock)
838 : {
863 fujii 839 CBC 5481 : pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_BLKS_SCANNED,
840 5481 : (heapScan->rs_cblock +
841 5481 : heapScan->rs_nblocks -
863 fujii 842 GIC 5481 : heapScan->rs_startblock
843 5481 : ) % heapScan->rs_nblocks + 1);
844 5481 : prev_cblock = heapScan->rs_cblock;
845 : }
846 : }
847 :
1473 andres 848 396038 : tuple = ExecFetchSlotHeapTuple(slot, false, NULL);
849 396038 : buf = hslot->buffer;
850 :
851 396038 : LockBuffer(buf, BUFFER_LOCK_SHARE);
852 :
1473 andres 853 CBC 396038 : switch (HeapTupleSatisfiesVacuum(tuple, OldestXmin, buf))
854 : {
855 11411 : case HEAPTUPLE_DEAD:
1473 andres 856 ECB : /* Definitely dead */
1473 andres 857 CBC 11411 : isdead = true;
858 11411 : break;
859 32491 : case HEAPTUPLE_RECENTLY_DEAD:
860 32491 : *tups_recently_dead += 1;
861 : /* fall through */
1473 andres 862 GIC 384520 : case HEAPTUPLE_LIVE:
863 : /* Live or recently dead, must copy it */
1473 andres 864 CBC 384520 : isdead = false;
865 384520 : break;
1473 andres 866 GIC 72 : case HEAPTUPLE_INSERT_IN_PROGRESS:
1473 andres 867 ECB :
868 : /*
869 : * Since we hold exclusive lock on the relation, normally the
870 : * only way to see this is if it was inserted earlier in our
871 : * own transaction. However, it can happen in system
872 : * catalogs, since we tend to release write lock before commit
873 : * there. Give a warning if neither case applies; but in any
874 : * case we had better copy it.
875 : */
1473 andres 876 CBC 72 : if (!is_system_catalog &&
1473 andres 877 GIC 10 : !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data)))
1473 andres 878 LBC 0 : elog(WARNING, "concurrent insert in progress within table \"%s\"",
879 : RelationGetRelationName(OldHeap));
1473 andres 880 ECB : /* treat as live */
1473 andres 881 CBC 72 : isdead = false;
882 72 : break;
1473 andres 883 GIC 35 : case HEAPTUPLE_DELETE_IN_PROGRESS:
884 :
885 : /*
886 : * Similar situation to INSERT_IN_PROGRESS case.
887 : */
888 35 : if (!is_system_catalog &&
889 15 : !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple->t_data)))
1473 andres 890 UIC 0 : elog(WARNING, "concurrent delete in progress within table \"%s\"",
891 : RelationGetRelationName(OldHeap));
1473 andres 892 ECB : /* treat as recently dead */
1473 andres 893 CBC 35 : *tups_recently_dead += 1;
1473 andres 894 GBC 35 : isdead = false;
1473 andres 895 GIC 35 : break;
1473 andres 896 UIC 0 : default:
1473 andres 897 LBC 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1473 andres 898 ECB : isdead = false; /* keep compiler quiet */
899 : break;
900 : }
901 :
1473 andres 902 GIC 396038 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
903 :
1473 andres 904 CBC 396038 : if (isdead)
1473 andres 905 ECB : {
1473 andres 906 GBC 11411 : *tups_vacuumed += 1;
907 : /* heap rewrite module still needs to see it... */
1473 andres 908 GIC 11411 : if (rewrite_heap_dead_tuple(rwstate, tuple))
1473 andres 909 ECB : {
910 : /* A previous recently-dead tuple is now known dead */
1473 andres 911 LBC 0 : *tups_vacuumed += 1;
1473 andres 912 UBC 0 : *tups_recently_dead -= 1;
1473 andres 913 EUB : }
1473 andres 914 GIC 11411 : continue;
915 : }
916 :
917 384627 : *num_tuples += 1;
1473 andres 918 CBC 384627 : if (tuplesort != NULL)
919 : {
920 273643 : tuplesort_putheaptuple(tuplesort, tuple);
921 :
1471 andres 922 ECB : /*
923 : * In scan-and-sort mode, report increase in number of tuples
924 : * scanned
925 : */
1473 andres 926 GIC 273643 : pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED,
1473 andres 927 GBC 273643 : *num_tuples);
1473 andres 928 EUB : }
929 : else
1473 andres 930 ECB : {
1471 andres 931 GIC 110984 : const int ct_index[] = {
932 : PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED,
1473 andres 933 ECB : PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN
934 : };
935 : int64 ct_val[2];
936 :
1473 andres 937 GIC 110984 : reform_and_rewrite_tuple(tuple, OldHeap, NewHeap,
938 : values, isnull, rwstate);
939 :
940 : /*
941 : * In indexscan mode and also VACUUM FULL, report increase in
1473 andres 942 ECB : * number of tuples scanned and written
943 : */
1473 andres 944 GIC 110984 : ct_val[0] = *num_tuples;
945 110984 : ct_val[1] = *num_tuples;
946 110984 : pgstat_progress_update_multi_param(2, ct_index, ct_val);
1473 andres 947 ECB : }
948 : }
949 :
1473 andres 950 GIC 262 : if (indexScan != NULL)
951 48 : index_endscan(indexScan);
952 262 : if (tableScan != NULL)
1473 andres 953 CBC 214 : table_endscan(tableScan);
1473 andres 954 GIC 262 : if (slot)
955 262 : ExecDropSingleTupleTableSlot(slot);
956 :
957 : /*
958 : * In scan-and-sort mode, complete the sort, then read out all live tuples
959 : * from the tuplestore and write them to the new relation.
1473 andres 960 ECB : */
1473 andres 961 CBC 262 : if (tuplesort != NULL)
1473 andres 962 ECB : {
1471 andres 963 GIC 50 : double n_tuples = 0;
964 :
965 : /* Report that we are now sorting tuples */
1473 andres 966 CBC 50 : pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
1473 andres 967 ECB : PROGRESS_CLUSTER_PHASE_SORT_TUPLES);
968 :
1473 andres 969 CBC 50 : tuplesort_performsort(tuplesort);
1473 andres 970 ECB :
971 : /* Report that we are now writing new heap */
1473 andres 972 GIC 50 : pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
973 : PROGRESS_CLUSTER_PHASE_WRITE_NEW_HEAP);
974 :
975 : for (;;)
976 273643 : {
1473 andres 977 ECB : HeapTuple tuple;
978 :
1473 andres 979 CBC 273693 : CHECK_FOR_INTERRUPTS();
980 :
1473 andres 981 GIC 273693 : tuple = tuplesort_getheaptuple(tuplesort, true);
1473 andres 982 CBC 273693 : if (tuple == NULL)
1473 andres 983 GIC 50 : break;
984 :
1473 andres 985 CBC 273643 : n_tuples += 1;
1473 andres 986 GIC 273643 : reform_and_rewrite_tuple(tuple,
987 : OldHeap, NewHeap,
1473 andres 988 ECB : values, isnull,
989 : rwstate);
990 : /* Report n_tuples */
1473 andres 991 GIC 273643 : pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN,
1473 andres 992 ECB : n_tuples);
993 : }
994 :
1473 andres 995 CBC 50 : tuplesort_end(tuplesort);
996 : }
1473 andres 997 ECB :
998 : /* Write out any remaining tuples, and fsync if needed */
1473 andres 999 CBC 262 : end_heap_rewrite(rwstate);
1000 :
1473 andres 1001 ECB : /* Clean up */
1473 andres 1002 CBC 262 : pfree(values);
1473 andres 1003 GIC 262 : pfree(isnull);
1004 262 : }
1005 :
1006 : static bool
1471 andres 1007 CBC 147358 : heapam_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno,
1008 : BufferAccessStrategy bstrategy)
1009 : {
1471 andres 1010 GIC 147358 : HeapScanDesc hscan = (HeapScanDesc) scan;
1471 andres 1011 ECB :
1012 : /*
1013 : * We must maintain a pin on the target page's buffer to ensure that
1014 : * concurrent activity - e.g. HOT pruning - doesn't delete tuples out from
1015 : * under us. Hence, pin the page until we are done looking at it. We
1016 : * also choose to hold sharelock on the buffer throughout --- we could
1017 : * release and re-acquire sharelock for each tuple, but since we aren't
1018 : * doing much work per tuple, the extra lock traffic is probably better
1019 : * avoided.
1020 : */
1471 andres 1021 GIC 147358 : hscan->rs_cblock = blockno;
1022 147358 : hscan->rs_cindex = FirstOffsetNumber;
1471 andres 1023 CBC 147358 : hscan->rs_cbuf = ReadBufferExtended(scan->rs_rd, MAIN_FORKNUM,
1024 : blockno, RBM_NORMAL, bstrategy);
1471 andres 1025 GIC 147358 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
1471 andres 1026 ECB :
1027 : /* in heap all blocks can contain tuples, so always return true */
1471 andres 1028 GIC 147358 : return true;
1029 : }
1030 :
1031 : static bool
1032 10303246 : heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
1033 : double *liverows, double *deadrows,
1034 : TupleTableSlot *slot)
1035 : {
1036 10303246 : HeapScanDesc hscan = (HeapScanDesc) scan;
1471 andres 1037 ECB : Page targpage;
1038 : OffsetNumber maxoffset;
1039 : BufferHeapTupleTableSlot *hslot;
1040 :
1471 andres 1041 CBC 10303246 : Assert(TTS_IS_BUFFERTUPLE(slot));
1042 :
1471 andres 1043 GIC 10303246 : hslot = (BufferHeapTupleTableSlot *) slot;
1471 andres 1044 CBC 10303246 : targpage = BufferGetPage(hscan->rs_cbuf);
1471 andres 1045 GIC 10303246 : maxoffset = PageGetMaxOffsetNumber(targpage);
1046 :
1047 : /* Inner loop over all tuples on the selected page */
1471 andres 1048 CBC 10681263 : for (; hscan->rs_cindex <= maxoffset; hscan->rs_cindex++)
1049 : {
1050 : ItemId itemid;
1471 andres 1051 GIC 10533905 : HeapTuple targtuple = &hslot->base.tupdata;
1471 andres 1052 CBC 10533905 : bool sample_it = false;
1053 :
1471 andres 1054 GIC 10533905 : itemid = PageGetItemId(targpage, hscan->rs_cindex);
1055 :
1056 : /*
1471 andres 1057 ECB : * We ignore unused and redirect line pointers. DEAD line pointers
1058 : * should be counted as dead, because we need vacuum to run to get rid
1059 : * of them. Note that this rule agrees with the way that
1060 : * heap_page_prune() counts things.
1061 : */
1471 andres 1062 GIC 10533905 : if (!ItemIdIsNormal(itemid))
1063 : {
1471 andres 1064 CBC 187102 : if (ItemIdIsDead(itemid))
1471 andres 1065 GIC 79966 : *deadrows += 1;
1066 187102 : continue;
1471 andres 1067 ECB : }
1068 :
1471 andres 1069 GIC 10346803 : ItemPointerSet(&targtuple->t_self, hscan->rs_cblock, hscan->rs_cindex);
1471 andres 1070 ECB :
1471 andres 1071 GIC 10346803 : targtuple->t_tableOid = RelationGetRelid(scan->rs_rd);
1072 10346803 : targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
1073 10346803 : targtuple->t_len = ItemIdGetLength(itemid);
1074 :
1075 10346803 : switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin,
1076 : hscan->rs_cbuf))
1077 : {
1471 andres 1078 CBC 10016800 : case HEAPTUPLE_LIVE:
1471 andres 1079 GIC 10016800 : sample_it = true;
1471 andres 1080 CBC 10016800 : *liverows += 1;
1081 10016800 : break;
1471 andres 1082 ECB :
1471 andres 1083 GIC 189625 : case HEAPTUPLE_DEAD:
1084 : case HEAPTUPLE_RECENTLY_DEAD:
1471 andres 1085 ECB : /* Count dead and recently-dead rows */
1471 andres 1086 GIC 189625 : *deadrows += 1;
1471 andres 1087 CBC 189625 : break;
1471 andres 1088 ECB :
1471 andres 1089 CBC 139520 : case HEAPTUPLE_INSERT_IN_PROGRESS:
1090 :
1471 andres 1091 ECB : /*
1092 : * Insert-in-progress rows are not counted. We assume that
1093 : * when the inserting transaction commits or aborts, it will
1094 : * send a stats message to increment the proper count. This
1095 : * works right only if that transaction ends after we finish
1096 : * analyzing the table; if things happen in the other order,
1097 : * its stats update will be overwritten by ours. However, the
1098 : * error will be large only if the other transaction runs long
1099 : * enough to insert many tuples, so assuming it will finish
1100 : * after us is the safer option.
1101 : *
1102 : * A special case is that the inserting transaction might be
1103 : * our own. In this case we should count and sample the row,
1104 : * to accommodate users who load a table and analyze it in one
1105 : * transaction. (pgstat_report_analyze has to adjust the
1106 : * numbers we report to the cumulative stats system to make
1107 : * this come out right.)
1108 : */
1471 andres 1109 GIC 139520 : if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple->t_data)))
1110 : {
1111 139088 : sample_it = true;
1112 139088 : *liverows += 1;
1113 : }
1114 139520 : break;
1115 :
1116 858 : case HEAPTUPLE_DELETE_IN_PROGRESS:
1117 :
1118 : /*
1119 : * We count and sample delete-in-progress rows the same as
1120 : * live ones, so that the stats counters come out right if the
1121 : * deleting transaction commits after us, per the same
1122 : * reasoning given above.
1123 : *
1124 : * If the delete was done by our own transaction, however, we
1471 andres 1125 ECB : * must count the row as dead to make pgstat_report_analyze's
1126 : * stats adjustments come out right. (Note: this works out
1127 : * properly when the row was both inserted and deleted in our
1128 : * xact.)
1129 : *
1130 : * The net effect of these choices is that we act as though an
1131 : * IN_PROGRESS transaction hasn't happened yet, except if it
1132 : * is our own transaction, which we assume has happened.
1133 : *
1134 : * This approach ensures that we behave sanely if we see both
1135 : * the pre-image and post-image rows for a row being updated
1136 : * by a concurrent transaction: we will sample the pre-image
1137 : * but not the post-image. We also get sane results if the
1138 : * concurrent transaction never commits.
1139 : */
1471 andres 1140 GIC 858 : if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple->t_data)))
1391 1141 858 : *deadrows += 1;
1142 : else
1143 : {
1471 andres 1144 UIC 0 : sample_it = true;
1391 1145 0 : *liverows += 1;
1146 : }
1471 andres 1147 GIC 858 : break;
1148 :
1471 andres 1149 UIC 0 : default:
1150 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1151 : break;
1152 : }
1153 :
1471 andres 1154 GIC 10346803 : if (sample_it)
1155 : {
1471 andres 1156 CBC 10155888 : ExecStoreBufferHeapTuple(targtuple, slot, hscan->rs_cbuf);
1157 10155888 : hscan->rs_cindex++;
1158 :
1159 : /* note that we leave the buffer locked here! */
1471 andres 1160 GBC 10155888 : return true;
1471 andres 1161 EUB : }
1162 : }
1471 andres 1163 ECB :
1164 : /* Now release the lock and pin on the page */
1471 andres 1165 GBC 147358 : UnlockReleaseBuffer(hscan->rs_cbuf);
1166 147358 : hscan->rs_cbuf = InvalidBuffer;
1167 :
1168 : /* also prevent old slot contents from having pin on page */
1471 andres 1169 GIC 147358 : ExecClearTuple(slot);
1471 andres 1170 ECB :
1471 andres 1171 GIC 147358 : return false;
1471 andres 1172 ECB : }
1173 :
1174 : static double
1474 andres 1175 GIC 66563 : heapam_index_build_range_scan(Relation heapRelation,
1474 andres 1176 ECB : Relation indexRelation,
1177 : IndexInfo *indexInfo,
1178 : bool allow_sync,
1179 : bool anyvisible,
1180 : bool progress,
1181 : BlockNumber start_blockno,
1182 : BlockNumber numblocks,
1183 : IndexBuildCallback callback,
1184 : void *callback_state,
1185 : TableScanDesc scan)
1186 : {
1187 : HeapScanDesc hscan;
1188 : bool is_system_catalog;
1189 : bool checking_uniqueness;
1190 : HeapTuple heapTuple;
1191 : Datum values[INDEX_MAX_KEYS];
1192 : bool isnull[INDEX_MAX_KEYS];
1193 : double reltuples;
1194 : ExprState *predicate;
1195 : TupleTableSlot *slot;
1196 : EState *estate;
1197 : ExprContext *econtext;
1198 : Snapshot snapshot;
1474 andres 1199 GIC 66563 : bool need_unregister_snapshot = false;
1200 : TransactionId OldestXmin;
1418 tgl 1201 66563 : BlockNumber previous_blkno = InvalidBlockNumber;
1474 andres 1202 66563 : BlockNumber root_blkno = InvalidBlockNumber;
1203 : OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1204 :
1205 : /*
1206 : * sanity checks
1207 : */
1208 66563 : Assert(OidIsValid(indexRelation->rd_rel->relam));
1209 :
1210 : /* Remember if it's a system catalog */
1211 66563 : is_system_catalog = IsSystemRelation(heapRelation);
1212 :
1213 : /* See whether we're verifying uniqueness/exclusion properties */
1214 76455 : checking_uniqueness = (indexInfo->ii_Unique ||
1474 andres 1215 CBC 9892 : indexInfo->ii_ExclusionOps != NULL);
1216 :
1474 andres 1217 ECB : /*
1218 : * "Any visible" mode is not compatible with uniqueness checks; make sure
1219 : * only one of those is requested.
1220 : */
1474 andres 1221 GIC 66563 : Assert(!(anyvisible && checking_uniqueness));
1222 :
1223 : /*
1474 andres 1224 ECB : * Need an EState for evaluation of index expressions and partial-index
1225 : * predicates. Also a slot to hold the current tuple.
1226 : */
1474 andres 1227 CBC 66563 : estate = CreateExecutorState();
1474 andres 1228 GIC 66563 : econtext = GetPerTupleExprContext(estate);
1229 66563 : slot = table_slot_create(heapRelation, NULL);
1474 andres 1230 ECB :
1231 : /* Arrange for econtext's scan tuple to be the tuple under test */
1474 andres 1232 GIC 66563 : econtext->ecxt_scantuple = slot;
1233 :
1234 : /* Set up execution state for predicate, if any. */
1235 66563 : predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1236 :
1474 andres 1237 ECB : /*
1238 : * Prepare for scan of the base relation. In a normal index build, we use
1239 : * SnapshotAny because we must retrieve all tuples and do our own time
1240 : * qual checks (because we have to index RECENTLY_DEAD tuples). In a
1241 : * concurrent build, or during bootstrap, we take a regular MVCC snapshot
1242 : * and index whatever's live according to that.
1243 : */
1474 andres 1244 CBC 66563 : OldestXmin = InvalidTransactionId;
1474 andres 1245 ECB :
1246 : /* okay to ignore lazy VACUUMs here */
1474 andres 1247 GIC 66563 : if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
970 andres 1248 CBC 17440 : OldestXmin = GetOldestNonRemovableTransactionId(heapRelation);
1249 :
1474 andres 1250 GIC 66563 : if (!scan)
1474 andres 1251 ECB : {
1252 : /*
1253 : * Serial index build.
1254 : *
1255 : * Must begin our own heap scan in this case. We may also need to
1256 : * register a snapshot whose lifetime is under our direct control.
1257 : */
1474 andres 1258 GIC 66367 : if (!TransactionIdIsValid(OldestXmin))
1259 : {
1474 andres 1260 CBC 49079 : snapshot = RegisterSnapshot(GetTransactionSnapshot());
1474 andres 1261 GIC 49079 : need_unregister_snapshot = true;
1262 : }
1474 andres 1263 ECB : else
1474 andres 1264 CBC 17288 : snapshot = SnapshotAny;
1265 :
1266 66367 : scan = table_beginscan_strat(heapRelation, /* relation */
1267 : snapshot, /* snapshot */
1268 : 0, /* number of keys */
1269 : NULL, /* scan key */
1270 : true, /* buffer access strategy OK */
1271 : allow_sync); /* syncscan OK? */
1272 : }
1273 : else
1474 andres 1274 ECB : {
1275 : /*
1276 : * Parallel index build.
1277 : *
1278 : * Parallel case never registers/unregisters own snapshot. Snapshot
1279 : * is taken from parallel heap scan, and is SnapshotAny or an MVCC
1280 : * snapshot, based on same criteria as serial case.
1281 : */
1474 andres 1282 CBC 196 : Assert(!IsBootstrapProcessingMode());
1474 andres 1283 GIC 196 : Assert(allow_sync);
1284 196 : snapshot = scan->rs_snapshot;
1285 : }
1286 :
1287 66563 : hscan = (HeapScanDesc) scan;
1288 :
1289 : /*
1290 : * Must have called GetOldestNonRemovableTransactionId() if using
1291 : * SnapshotAny. Shouldn't have for an MVCC snapshot. (It's especially
1292 : * worth checking this for parallel builds, since ambuild routines that
1293 : * support parallel builds must work these details out for themselves.)
1294 : */
970 1295 66563 : Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot));
1296 66563 : Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) :
1297 : !TransactionIdIsValid(OldestXmin));
970 andres 1298 CBC 66563 : Assert(snapshot == SnapshotAny || !anyvisible);
970 andres 1299 ECB :
1468 alvherre 1300 : /* Publish number of blocks to scan */
1468 alvherre 1301 GIC 66563 : if (progress)
1302 : {
1418 tgl 1303 ECB : BlockNumber nblocks;
1304 :
1468 alvherre 1305 GIC 64971 : if (hscan->rs_base.rs_parallel != NULL)
1306 : {
1307 : ParallelBlockTableScanDesc pbscan;
1308 :
1309 71 : pbscan = (ParallelBlockTableScanDesc) hscan->rs_base.rs_parallel;
1310 71 : nblocks = pbscan->phs_nblocks;
1468 alvherre 1311 ECB : }
1312 : else
1468 alvherre 1313 GIC 64900 : nblocks = hscan->rs_nblocks;
1468 alvherre 1314 ECB :
1468 alvherre 1315 GIC 64971 : pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_TOTAL,
1316 : nblocks);
1468 alvherre 1317 ECB : }
1318 :
1319 : /* set our scan endpoints */
1474 andres 1320 GIC 66563 : if (!allow_sync)
1474 andres 1321 CBC 1738 : heap_setscanlimits(scan, start_blockno, numblocks);
1322 : else
1323 : {
1324 : /* syncscan can only be requested on whole relation */
1325 64825 : Assert(start_blockno == 0);
1326 64825 : Assert(numblocks == InvalidBlockNumber);
1327 : }
1328 :
1329 66563 : reltuples = 0;
1330 :
1474 andres 1331 ECB : /*
1332 : * Scan all tuples in the base relation.
1333 : */
1474 andres 1334 GIC 14096084 : while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1335 : {
1474 andres 1336 ECB : bool tupleIsAlive;
1337 :
1474 andres 1338 GIC 14029524 : CHECK_FOR_INTERRUPTS();
1339 :
1340 : /* Report scan progress, if asked to. */
1468 alvherre 1341 CBC 14029524 : if (progress)
1468 alvherre 1342 ECB : {
1418 tgl 1343 GIC 12751582 : BlockNumber blocks_done = heapam_scan_get_blocks_done(hscan);
1344 :
1468 alvherre 1345 CBC 12751582 : if (blocks_done != previous_blkno)
1346 : {
1468 alvherre 1347 GIC 190470 : pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
1348 : blocks_done);
1349 190470 : previous_blkno = blocks_done;
1468 alvherre 1350 ECB : }
1351 : }
1352 :
1353 : /*
1474 andres 1354 : * When dealing with a HOT-chain of updated tuples, we want to index
1355 : * the values of the live tuple (if any), but index it under the TID
1356 : * of the chain's root tuple. This approach is necessary to preserve
1357 : * the HOT-chain structure in the heap. So we need to be able to find
1358 : * the root item offset for every tuple that's in a HOT-chain. When
1359 : * first reaching a new page of the relation, call
1360 : * heap_get_root_tuples() to build a map of root item offsets on the
1361 : * page.
1362 : *
1363 : * It might look unsafe to use this information across buffer
1364 : * lock/unlock. However, we hold ShareLock on the table so no
1365 : * ordinary insert/update/delete should occur; and we hold pin on the
1366 : * buffer continuously while visiting the page, so no pruning
1367 : * operation can occur either.
1368 : *
1369 : * In cases with only ShareUpdateExclusiveLock on the table, it's
1370 : * possible for some HOT tuples to appear that we didn't know about
1371 : * when we first read the page. To handle that case, we re-obtain the
1372 : * list of root offsets when a HOT tuple points to a root item that we
1373 : * don't know about.
1374 : *
1375 : * Also, although our opinions about tuple liveness could change while
1376 : * we scan the page (due to concurrent transaction commits/aborts),
1377 : * the chain root locations won't, so this info doesn't need to be
1378 : * rebuilt after waiting for another transaction.
1379 : *
1380 : * Note the implied assumption that there is no more than one live
1381 : * tuple per HOT-chain --- else we could create more than one index
1382 : * entry pointing to the same root tuple.
1383 : */
1474 andres 1384 GIC 14029524 : if (hscan->rs_cblock != root_blkno)
1385 : {
1386 203661 : Page page = BufferGetPage(hscan->rs_cbuf);
1387 :
1388 203661 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
1389 203661 : heap_get_root_tuples(page, root_offsets);
1390 203661 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1391 :
1392 203661 : root_blkno = hscan->rs_cblock;
1393 : }
1394 :
1395 14029524 : if (snapshot == SnapshotAny)
1396 : {
1397 : /* do our own time qual check */
1398 : bool indexIt;
1399 : TransactionId xwait;
1474 andres 1400 ECB :
1474 andres 1401 GIC 7002533 : recheck:
1474 andres 1402 ECB :
1403 : /*
1404 : * We could possibly get away with not locking the buffer here,
1405 : * since caller should hold ShareLock on the relation, but let's
1406 : * be conservative about it. (This remark is still correct even
1407 : * with HOT-pruning: our pin on the buffer prevents pruning.)
1408 : */
1474 andres 1409 GIC 7002533 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
1410 :
1474 andres 1411 ECB : /*
1412 : * The criteria for counting a tuple as live in this block need to
1413 : * match what analyze.c's heapam_scan_analyze_next_tuple() does,
1414 : * otherwise CREATE INDEX and ANALYZE may produce wildly different
1415 : * reltuples values, e.g. when there are many recently-dead
1416 : * tuples.
1417 : */
1474 andres 1418 GIC 7002533 : switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
1419 : hscan->rs_cbuf))
1420 : {
1421 334 : case HEAPTUPLE_DEAD:
1422 : /* Definitely dead, we can ignore it */
1423 334 : indexIt = false;
1424 334 : tupleIsAlive = false;
1474 andres 1425 CBC 334 : break;
1474 andres 1426 GIC 5116582 : case HEAPTUPLE_LIVE:
1427 : /* Normal case, index and unique-check it */
1428 5116582 : indexIt = true;
1429 5116582 : tupleIsAlive = true;
1430 : /* Count it as live, too */
1431 5116582 : reltuples += 1;
1432 5116582 : break;
1433 116716 : case HEAPTUPLE_RECENTLY_DEAD:
1474 andres 1434 ECB :
1435 : /*
1436 : * If tuple is recently deleted then we must index it
1437 : * anyway to preserve MVCC semantics. (Pre-existing
1438 : * transactions could try to use the index after we finish
1439 : * building it, and may need to see such tuples.)
1440 : *
1441 : * However, if it was HOT-updated then we must only index
1442 : * the live tuple at the end of the HOT-chain. Since this
1443 : * breaks semantics for pre-existing snapshots, mark the
1444 : * index as unusable for them.
1445 : *
1446 : * We don't count recently-dead tuples in reltuples, even
1471 1447 : * if we index them; see heapam_scan_analyze_next_tuple().
1474 1448 : */
1474 andres 1449 CBC 116716 : if (HeapTupleIsHotUpdated(heapTuple))
1450 : {
1474 andres 1451 GIC 22 : indexIt = false;
1452 : /* mark the index as unsafe for old snapshots */
1453 22 : indexInfo->ii_BrokenHotChain = true;
1454 : }
1455 : else
1456 116694 : indexIt = true;
1457 : /* In any case, exclude the tuple from unique-checking */
1458 116716 : tupleIsAlive = false;
1459 116716 : break;
1460 1768826 : case HEAPTUPLE_INSERT_IN_PROGRESS:
1461 :
1462 : /*
1463 : * In "anyvisible" mode, this tuple is visible and we
1464 : * don't need any further checks.
1474 andres 1465 ECB : */
1474 andres 1466 GIC 1768826 : if (anyvisible)
1474 andres 1467 ECB : {
1474 andres 1468 GIC 30736 : indexIt = true;
1474 andres 1469 CBC 30736 : tupleIsAlive = true;
1474 andres 1470 GIC 30736 : reltuples += 1;
1471 30736 : break;
1474 andres 1472 ECB : }
1473 :
1474 : /*
1475 : * Since caller should hold ShareLock or better, normally
1476 : * the only way to see this is if it was inserted earlier
1477 : * in our own transaction. However, it can happen in
1478 : * system catalogs, since we tend to release write lock
1479 : * before commit there. Give a warning if neither case
1480 : * applies.
1481 : */
1474 andres 1482 CBC 1738090 : xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
1474 andres 1483 GIC 1738090 : if (!TransactionIdIsCurrentTransactionId(xwait))
1474 andres 1484 ECB : {
1474 andres 1485 CBC 27 : if (!is_system_catalog)
1474 andres 1486 LBC 0 : elog(WARNING, "concurrent insert in progress within table \"%s\"",
1474 andres 1487 ECB : RelationGetRelationName(heapRelation));
1488 :
1489 : /*
1490 : * If we are performing uniqueness checks, indexing
1491 : * such a tuple could lead to a bogus uniqueness
1492 : * failure. In that case we wait for the inserting
1493 : * transaction to finish and check again.
1494 : */
1474 andres 1495 GIC 27 : if (checking_uniqueness)
1496 : {
1497 : /*
1474 andres 1498 ECB : * Must drop the lock on the buffer before we wait
1499 : */
1474 andres 1500 UIC 0 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1474 andres 1501 LBC 0 : XactLockTableWait(xwait, heapRelation,
1474 andres 1502 EUB : &heapTuple->t_self,
1503 : XLTW_InsertIndexUnique);
1474 andres 1504 UIC 0 : CHECK_FOR_INTERRUPTS();
1505 0 : goto recheck;
1506 : }
1507 : }
1508 : else
1509 : {
1510 : /*
1471 andres 1511 ECB : * For consistency with
1512 : * heapam_scan_analyze_next_tuple(), count
1513 : * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
1514 : * when inserted by our own transaction.
1515 : */
1474 andres 1516 GBC 1738063 : reltuples += 1;
1474 andres 1517 EUB : }
1518 :
1519 : /*
1520 : * We must index such tuples, since if the index build
1521 : * commits then they're good.
1522 : */
1474 andres 1523 GIC 1738090 : indexIt = true;
1524 1738090 : tupleIsAlive = true;
1525 1738090 : break;
1526 75 : case HEAPTUPLE_DELETE_IN_PROGRESS:
1527 :
1528 : /*
1529 : * As with INSERT_IN_PROGRESS case, this is unexpected
1530 : * unless it's our own deletion or a system catalog; but
1531 : * in anyvisible mode, this tuple is visible.
1474 andres 1532 ECB : */
1474 andres 1533 GIC 75 : if (anyvisible)
1534 : {
1474 andres 1535 UIC 0 : indexIt = true;
1536 0 : tupleIsAlive = false;
1537 0 : reltuples += 1;
1538 0 : break;
1474 andres 1539 ECB : }
1540 :
1474 andres 1541 CBC 75 : xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1542 75 : if (!TransactionIdIsCurrentTransactionId(xwait))
1543 : {
1474 andres 1544 GIC 36 : if (!is_system_catalog)
1474 andres 1545 UIC 0 : elog(WARNING, "concurrent delete in progress within table \"%s\"",
1546 : RelationGetRelationName(heapRelation));
1547 :
1548 : /*
1474 andres 1549 ECB : * If we are performing uniqueness checks, assuming
1550 : * the tuple is dead could lead to missing a
1474 andres 1551 EUB : * uniqueness violation. In that case we wait for the
1552 : * deleting transaction to finish and check again.
1553 : *
1554 : * Also, if it's a HOT-updated tuple, we should not
1555 : * index it but rather the live tuple at the end of
1556 : * the HOT-chain. However, the deleting transaction
1474 andres 1557 ECB : * could abort, possibly leaving this tuple as live
1558 : * after all, in which case it has to be indexed. The
1559 : * only way to know what to do is to wait for the
1560 : * deleting transaction to finish and check again.
1474 andres 1561 EUB : */
1474 andres 1562 GIC 36 : if (checking_uniqueness ||
1563 36 : HeapTupleIsHotUpdated(heapTuple))
1564 : {
1565 : /*
1566 : * Must drop the lock on the buffer before we wait
1567 : */
1474 andres 1568 UIC 0 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1569 0 : XactLockTableWait(xwait, heapRelation,
1570 : &heapTuple->t_self,
1571 : XLTW_InsertIndexUnique);
1572 0 : CHECK_FOR_INTERRUPTS();
1573 0 : goto recheck;
1574 : }
1575 :
1576 : /*
1577 : * Otherwise index it but don't check for uniqueness,
1474 andres 1578 ECB : * the same as a RECENTLY_DEAD tuple.
1579 : */
1474 andres 1580 GIC 36 : indexIt = true;
1581 :
1582 : /*
1583 : * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
1474 andres 1584 EUB : * if they were not deleted by the current
1471 1585 : * transaction. That's what
1586 : * heapam_scan_analyze_next_tuple() does, and we want
1587 : * the behavior to be consistent.
1474 1588 : */
1474 andres 1589 GBC 36 : reltuples += 1;
1590 : }
1474 andres 1591 GIC 39 : else if (HeapTupleIsHotUpdated(heapTuple))
1592 : {
1593 : /*
1594 : * It's a HOT-updated tuple deleted by our own xact.
1595 : * We can assume the deletion will commit (else the
1474 andres 1596 ECB : * index contents don't matter), so treat the same as
1597 : * RECENTLY_DEAD HOT-updated tuples.
1598 : */
1474 andres 1599 UIC 0 : indexIt = false;
1600 : /* mark the index as unsafe for old snapshots */
1601 0 : indexInfo->ii_BrokenHotChain = true;
1602 : }
1603 : else
1604 : {
1474 andres 1605 ECB : /*
1606 : * It's a regular tuple deleted by our own xact. Index
1607 : * it, but don't check for uniqueness nor count in
1608 : * reltuples, the same as a RECENTLY_DEAD tuple.
1609 : */
1474 andres 1610 GIC 39 : indexIt = true;
1611 : }
1612 : /* In any case, exclude the tuple from unique-checking */
1613 75 : tupleIsAlive = false;
1614 75 : break;
1474 andres 1615 UBC 0 : default:
1474 andres 1616 UIC 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1474 andres 1617 EUB : indexIt = tupleIsAlive = false; /* keep compiler quiet */
1618 : break;
1619 : }
1620 :
1474 andres 1621 GIC 7002533 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1622 :
1623 7002533 : if (!indexIt)
1624 356 : continue;
1625 : }
1474 andres 1626 ECB : else
1627 : {
1628 : /* heap_getnext did the time qual check */
1474 andres 1629 CBC 7026991 : tupleIsAlive = true;
1630 7026991 : reltuples += 1;
1474 andres 1631 EUB : }
1632 :
1474 andres 1633 GIC 14029168 : MemoryContextReset(econtext->ecxt_per_tuple_memory);
1634 :
1635 : /* Set up for predicate or expression evaluation */
1636 14029168 : ExecStoreBufferHeapTuple(heapTuple, slot, hscan->rs_cbuf);
1474 andres 1637 ECB :
1638 : /*
1639 : * In a partial index, discard tuples that don't satisfy the
1640 : * predicate.
1641 : */
1474 andres 1642 GIC 14029168 : if (predicate != NULL)
1643 : {
1644 41274 : if (!ExecQual(predicate, econtext))
1474 andres 1645 CBC 11793 : continue;
1474 andres 1646 ECB : }
1647 :
1648 : /*
1649 : * For the current heap tuple, extract all the attributes we use in
1650 : * this index, and note which are null. This also performs evaluation
1651 : * of any expressions needed.
1652 : */
1474 andres 1653 GIC 14017375 : FormIndexDatum(indexInfo,
1654 : slot,
1655 : estate,
1656 : values,
1657 : isnull);
1474 andres 1658 ECB :
1659 : /*
1660 : * You'd think we should go ahead and build the index tuple here, but
1661 : * some index AMs want to do further processing on the data first. So
1662 : * pass the values[] and isnull[] arrays, instead.
1663 : */
1664 :
1474 andres 1665 GIC 14017372 : if (HeapTupleIsHeapOnly(heapTuple))
1666 : {
1667 : /*
1668 : * For a heap-only tuple, pretend its TID is that of the root. See
1474 andres 1669 ECB : * src/backend/access/heap/README.HOT for discussion.
1670 : */
1671 : ItemPointerData tid;
1672 : OffsetNumber offnum;
1673 :
1474 andres 1674 GIC 1589 : offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
1675 :
1676 : /*
1677 : * If a HOT tuple points to a root that we don't know about,
1678 : * obtain root items afresh. If that still fails, report it as
1679 : * corruption.
1680 : */
969 alvherre 1681 CBC 1589 : if (root_offsets[offnum - 1] == InvalidOffsetNumber)
1682 : {
697 tgl 1683 UIC 0 : Page page = BufferGetPage(hscan->rs_cbuf);
1684 :
969 alvherre 1685 0 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
1686 0 : heap_get_root_tuples(page, root_offsets);
1687 0 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1688 : }
1689 :
1474 andres 1690 CBC 1589 : if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
1474 andres 1691 UIC 0 : ereport(ERROR,
1692 : (errcode(ERRCODE_DATA_CORRUPTED),
1693 : errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1694 : ItemPointerGetBlockNumber(&heapTuple->t_self),
1695 : offnum,
1696 : RelationGetRelationName(heapRelation))));
1474 andres 1697 ECB :
1248 andres 1698 GIC 1589 : ItemPointerSet(&tid, ItemPointerGetBlockNumber(&heapTuple->t_self),
1248 andres 1699 GBC 1589 : root_offsets[offnum - 1]);
1700 :
1474 andres 1701 EUB : /* Call the AM's callback routine to process the tuple */
1248 andres 1702 GBC 1589 : callback(indexRelation, &tid, values, isnull, tupleIsAlive,
1474 andres 1703 EUB : callback_state);
1704 : }
1705 : else
1474 andres 1706 ECB : {
1474 andres 1707 EUB : /* Call the AM's callback routine to process the tuple */
1248 andres 1708 GIC 14015783 : callback(indexRelation, &heapTuple->t_self, values, isnull,
1709 : tupleIsAlive, callback_state);
1710 : }
1711 : }
1712 :
1713 : /* Report scan progress one last time. */
1468 alvherre 1714 CBC 66560 : if (progress)
1468 alvherre 1715 ECB : {
1716 : BlockNumber blks_done;
1717 :
1468 alvherre 1718 CBC 64968 : if (hscan->rs_base.rs_parallel != NULL)
1719 : {
1720 : ParallelBlockTableScanDesc pbscan;
1721 :
1468 alvherre 1722 GIC 71 : pbscan = (ParallelBlockTableScanDesc) hscan->rs_base.rs_parallel;
1723 71 : blks_done = pbscan->phs_nblocks;
1468 alvherre 1724 ECB : }
1725 : else
1468 alvherre 1726 GIC 64897 : blks_done = hscan->rs_nblocks;
1727 :
1728 64968 : pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
1729 : blks_done);
1468 alvherre 1730 ECB : }
1731 :
1474 andres 1732 GIC 66560 : table_endscan(scan);
1733 :
1474 andres 1734 ECB : /* we can now forget our snapshot, if set and registered by us */
1474 andres 1735 GIC 66560 : if (need_unregister_snapshot)
1736 49079 : UnregisterSnapshot(snapshot);
1737 :
1474 andres 1738 CBC 66560 : ExecDropSingleTupleTableSlot(slot);
1474 andres 1739 ECB :
1474 andres 1740 GIC 66560 : FreeExecutorState(estate);
1741 :
1474 andres 1742 ECB : /* These may have been pointing to the now-gone estate */
1474 andres 1743 GIC 66560 : indexInfo->ii_ExpressionsState = NIL;
1474 andres 1744 CBC 66560 : indexInfo->ii_PredicateState = NULL;
1745 :
1474 andres 1746 GIC 66560 : return reltuples;
1747 : }
1474 andres 1748 ECB :
1749 : static void
1474 andres 1750 GIC 270 : heapam_index_validate_scan(Relation heapRelation,
1474 andres 1751 ECB : Relation indexRelation,
1752 : IndexInfo *indexInfo,
1753 : Snapshot snapshot,
1471 1754 : ValidateIndexState *state)
1755 : {
1474 1756 : TableScanDesc scan;
1757 : HeapScanDesc hscan;
1758 : HeapTuple heapTuple;
1759 : Datum values[INDEX_MAX_KEYS];
1760 : bool isnull[INDEX_MAX_KEYS];
1761 : ExprState *predicate;
1762 : TupleTableSlot *slot;
1763 : EState *estate;
1764 : ExprContext *econtext;
1474 andres 1765 GIC 270 : BlockNumber root_blkno = InvalidBlockNumber;
1474 andres 1766 ECB : OffsetNumber root_offsets[MaxHeapTuplesPerPage];
1767 : bool in_index[MaxHeapTuplesPerPage];
1418 tgl 1768 GIC 270 : BlockNumber previous_blkno = InvalidBlockNumber;
1769 :
1770 : /* state variables for the merge */
1474 andres 1771 270 : ItemPointer indexcursor = NULL;
1772 : ItemPointerData decoded;
1773 270 : bool tuplesort_empty = false;
1774 :
1775 : /*
1776 : * sanity checks
1777 : */
1778 270 : Assert(OidIsValid(indexRelation->rd_rel->relam));
1779 :
1780 : /*
1474 andres 1781 ECB : * Need an EState for evaluation of index expressions and partial-index
1782 : * predicates. Also a slot to hold the current tuple.
1783 : */
1474 andres 1784 CBC 270 : estate = CreateExecutorState();
1474 andres 1785 GIC 270 : econtext = GetPerTupleExprContext(estate);
1786 270 : slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
1474 andres 1787 ECB : &TTSOpsHeapTuple);
1788 :
1789 : /* Arrange for econtext's scan tuple to be the tuple under test */
1474 andres 1790 GIC 270 : econtext->ecxt_scantuple = slot;
1791 :
1792 : /* Set up execution state for predicate, if any. */
1793 270 : predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
1474 andres 1794 ECB :
1795 : /*
1796 : * Prepare for scan of the base relation. We need just those tuples
1797 : * satisfying the passed-in reference snapshot. We must disable syncscan
1798 : * here, because it's critical that we read from block zero forward to
1799 : * match the sorted TIDs.
1800 : */
1474 andres 1801 CBC 270 : scan = table_beginscan_strat(heapRelation, /* relation */
1474 andres 1802 ECB : snapshot, /* snapshot */
1803 : 0, /* number of keys */
1804 : NULL, /* scan key */
1805 : true, /* buffer access strategy OK */
1806 : false); /* syncscan not OK */
1474 andres 1807 GIC 270 : hscan = (HeapScanDesc) scan;
1808 :
1468 alvherre 1809 CBC 270 : pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_TOTAL,
1468 alvherre 1810 GIC 270 : hscan->rs_nblocks);
1811 :
1812 : /*
1813 : * Scan all tuples matching the snapshot.
1814 : */
1474 andres 1815 16419 : while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1816 : {
1474 andres 1817 CBC 16149 : ItemPointer heapcursor = &heapTuple->t_self;
1818 : ItemPointerData rootTuple;
1819 : OffsetNumber root_offnum;
1820 :
1474 andres 1821 GIC 16149 : CHECK_FOR_INTERRUPTS();
1822 :
1474 andres 1823 CBC 16149 : state->htups += 1;
1824 :
1468 alvherre 1825 16149 : if ((previous_blkno == InvalidBlockNumber) ||
1826 15974 : (hscan->rs_cblock != previous_blkno))
1827 : {
1468 alvherre 1828 GIC 380 : pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
1829 380 : hscan->rs_cblock);
1830 380 : previous_blkno = hscan->rs_cblock;
1468 alvherre 1831 ECB : }
1832 :
1474 andres 1833 : /*
1834 : * As commented in table_index_build_scan, we should index heap-only
1835 : * tuples under the TIDs of their root tuples; so when we advance onto
1836 : * a new heap page, build a map of root item offsets on the page.
1837 : *
1838 : * This complicates merging against the tuplesort output: we will
1839 : * visit the live tuples in order by their offsets, but the root
1840 : * offsets that we need to compare against the index contents might be
1841 : * ordered differently. So we might have to "look back" within the
1842 : * tuplesort output, but only within the current page. We handle that
1843 : * by keeping a bool array in_index[] showing all the
1844 : * already-passed-over tuplesort output TIDs of the current page. We
1845 : * clear that array here, when advancing onto a new heap page.
1846 : */
1474 andres 1847 GIC 16149 : if (hscan->rs_cblock != root_blkno)
1848 : {
1849 380 : Page page = BufferGetPage(hscan->rs_cbuf);
1850 :
1851 380 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
1852 380 : heap_get_root_tuples(page, root_offsets);
1853 380 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1854 :
1855 380 : memset(in_index, 0, sizeof(in_index));
1856 :
1857 380 : root_blkno = hscan->rs_cblock;
1858 : }
1859 :
1860 : /* Convert actual tuple TID to root TID */
1861 16149 : rootTuple = *heapcursor;
1862 16149 : root_offnum = ItemPointerGetOffsetNumber(heapcursor);
1474 andres 1863 ECB :
1474 andres 1864 GIC 16149 : if (HeapTupleIsHeapOnly(heapTuple))
1474 andres 1865 ECB : {
1474 andres 1866 GIC 4 : root_offnum = root_offsets[root_offnum - 1];
1474 andres 1867 CBC 4 : if (!OffsetNumberIsValid(root_offnum))
1474 andres 1868 LBC 0 : ereport(ERROR,
1474 andres 1869 ECB : (errcode(ERRCODE_DATA_CORRUPTED),
1870 : errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
1871 : ItemPointerGetBlockNumber(heapcursor),
1872 : ItemPointerGetOffsetNumber(heapcursor),
1873 : RelationGetRelationName(heapRelation))));
1474 andres 1874 GIC 4 : ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
1875 : }
1876 :
1474 andres 1877 ECB : /*
1878 : * "merge" by skipping through the index tuples until we find or pass
1879 : * the current root tuple.
1880 : */
1474 andres 1881 GIC 32251 : while (!tuplesort_empty &&
1474 andres 1882 CBC 32036 : (!indexcursor ||
1883 32036 : ItemPointerCompare(indexcursor, &rootTuple) < 0))
1474 andres 1884 EUB : {
1885 : Datum ts_val;
1886 : bool ts_isnull;
1887 :
1474 andres 1888 GIC 16102 : if (indexcursor)
1889 : {
1474 andres 1890 ECB : /*
1891 : * Remember index items seen earlier on the current heap page
1892 : */
1474 andres 1893 GIC 15927 : if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
1894 15722 : in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
1895 : }
1896 :
1474 andres 1897 CBC 16102 : tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
1898 : false, &ts_val, &ts_isnull,
163 drowley 1899 GNC 16102 : NULL);
1474 andres 1900 CBC 16102 : Assert(tuplesort_empty || !ts_isnull);
1474 andres 1901 GIC 16102 : if (!tuplesort_empty)
1902 : {
1903 16085 : itemptr_decode(&decoded, DatumGetInt64(ts_val));
1904 16085 : indexcursor = &decoded;
1474 andres 1905 ECB : }
1906 : else
1907 : {
1908 : /* Be tidy */
1474 andres 1909 CBC 17 : indexcursor = NULL;
1910 : }
1474 andres 1911 ECB : }
1912 :
1913 : /*
1914 : * If the tuplesort has overshot *and* we didn't see a match earlier,
1915 : * then this tuple is missing from the index, so insert it.
1916 : */
1474 andres 1917 GIC 32258 : if ((tuplesort_empty ||
1918 16109 : ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
1919 74 : !in_index[root_offnum - 1])
1920 : {
1474 andres 1921 CBC 70 : MemoryContextReset(econtext->ecxt_per_tuple_memory);
1922 :
1923 : /* Set up for predicate or expression evaluation */
1474 andres 1924 GIC 70 : ExecStoreHeapTuple(heapTuple, slot, false);
1925 :
1926 : /*
1927 : * In a partial index, discard tuples that don't satisfy the
1928 : * predicate.
1474 andres 1929 ECB : */
1474 andres 1930 CBC 70 : if (predicate != NULL)
1474 andres 1931 ECB : {
1474 andres 1932 GIC 24 : if (!ExecQual(predicate, econtext))
1474 andres 1933 CBC 24 : continue;
1934 : }
1935 :
1474 andres 1936 ECB : /*
1937 : * For the current heap tuple, extract all the attributes we use
1938 : * in this index, and note which are null. This also performs
1939 : * evaluation of any expressions needed.
1940 : */
1474 andres 1941 GIC 46 : FormIndexDatum(indexInfo,
1474 andres 1942 ECB : slot,
1943 : estate,
1944 : values,
1945 : isnull);
1946 :
1947 : /*
1948 : * You'd think we should go ahead and build the index tuple here,
1949 : * but some index AMs want to do further processing on the data
1950 : * first. So pass the values[] and isnull[] arrays, instead.
1951 : */
1952 :
1953 : /*
1954 : * If the tuple is already committed dead, you might think we
1955 : * could suppress uniqueness checking, but this is no longer true
1956 : * in the presence of HOT, because the insert is actually a proxy
1957 : * for a uniqueness check on the whole HOT-chain. That is, the
1958 : * tuple we have here could be dead because it was already
1959 : * HOT-updated, and if so the updating transaction will not have
1960 : * thought it should insert index entries. The index AM will
1961 : * check the whole HOT-chain and correctly detect a conflict if
1962 : * there is one.
1963 : */
1964 :
1474 andres 1965 GIC 46 : index_insert(indexRelation,
1966 : values,
1967 : isnull,
1968 : &rootTuple,
1969 : heapRelation,
1970 46 : indexInfo->ii_Unique ?
1971 : UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
1972 : false,
1973 : indexInfo);
1974 :
1975 46 : state->tups_inserted += 1;
1976 : }
1474 andres 1977 ECB : }
1978 :
1474 andres 1979 GIC 270 : table_endscan(scan);
1980 :
1981 270 : ExecDropSingleTupleTableSlot(slot);
1474 andres 1982 ECB :
1474 andres 1983 GIC 270 : FreeExecutorState(estate);
1984 :
1985 : /* These may have been pointing to the now-gone estate */
1986 270 : indexInfo->ii_ExpressionsState = NIL;
1474 andres 1987 CBC 270 : indexInfo->ii_PredicateState = NULL;
1474 andres 1988 GIC 270 : }
1989 :
1990 : /*
1468 alvherre 1991 ECB : * Return the number of blocks that have been read by this scan since
1992 : * starting. This is meant for progress reporting rather than be fully
1993 : * accurate: in a parallel scan, workers can be concurrently reading blocks
1994 : * further ahead than what we report.
1995 : */
1996 : static BlockNumber
1468 alvherre 1997 GIC 12751582 : heapam_scan_get_blocks_done(HeapScanDesc hscan)
1468 alvherre 1998 ECB : {
1468 alvherre 1999 CBC 12751582 : ParallelBlockTableScanDesc bpscan = NULL;
1418 tgl 2000 ECB : BlockNumber startblock;
2001 : BlockNumber blocks_done;
2002 :
1468 alvherre 2003 GIC 12751582 : if (hscan->rs_base.rs_parallel != NULL)
2004 : {
2005 1013615 : bpscan = (ParallelBlockTableScanDesc) hscan->rs_base.rs_parallel;
2006 1013615 : startblock = bpscan->phs_startblock;
2007 : }
2008 : else
1468 alvherre 2009 CBC 11737967 : startblock = hscan->rs_startblock;
2010 :
1468 alvherre 2011 ECB : /*
2012 : * Might have wrapped around the end of the relation, if startblock was
2013 : * not zero.
2014 : */
1468 alvherre 2015 CBC 12751582 : if (hscan->rs_cblock > startblock)
1468 alvherre 2016 GIC 11876578 : blocks_done = hscan->rs_cblock - startblock;
1468 alvherre 2017 ECB : else
2018 : {
2019 : BlockNumber nblocks;
2020 :
1468 alvherre 2021 CBC 875004 : nblocks = bpscan != NULL ? bpscan->phs_nblocks : hscan->rs_nblocks;
1468 alvherre 2022 GIC 875004 : blocks_done = nblocks - startblock +
2023 875004 : hscan->rs_cblock;
2024 : }
2025 :
2026 12751582 : return blocks_done;
1468 alvherre 2027 ECB : }
2028 :
2029 :
2030 : /* ------------------------------------------------------------------------
2031 : * Miscellaneous callbacks for the heap AM
2032 : * ------------------------------------------------------------------------
1423 andres 2033 : */
2034 :
1419 rhaas 2035 : /*
2036 : * Check to see whether the table needs a TOAST table. It does only if
2037 : * (1) there are any toastable attributes, and (2) the maximum length
2038 : * of a tuple could exceed TOAST_TUPLE_THRESHOLD. (We don't want to
2039 : * create a toast table for something like "f1 varchar(20)".)
2040 : */
2041 : static bool
1419 rhaas 2042 GIC 30583 : heapam_relation_needs_toast_table(Relation rel)
2043 : {
2044 30583 : int32 data_length = 0;
2045 30583 : bool maxlength_unknown = false;
2046 30583 : bool has_toastable_attrs = false;
2047 30583 : TupleDesc tupdesc = rel->rd_att;
2048 : int32 tuple_length;
2049 : int i;
2050 :
2051 190212 : for (i = 0; i < tupdesc->natts; i++)
2052 : {
2053 159629 : Form_pg_attribute att = TupleDescAttr(tupdesc, i);
1419 rhaas 2054 ECB :
1419 rhaas 2055 GIC 159629 : if (att->attisdropped)
1419 rhaas 2056 CBC 480 : continue;
2057 159149 : data_length = att_align_nominal(data_length, att->attalign);
2058 159149 : if (att->attlen > 0)
1419 rhaas 2059 ECB : {
2060 : /* Fixed-length types are never toastable */
1419 rhaas 2061 GIC 113551 : data_length += att->attlen;
2062 : }
1419 rhaas 2063 ECB : else
2064 : {
1419 rhaas 2065 CBC 45598 : int32 maxlen = type_maximum_size(att->atttypid,
2066 : att->atttypmod);
1419 rhaas 2067 ECB :
1419 rhaas 2068 CBC 45598 : if (maxlen < 0)
2069 44155 : maxlength_unknown = true;
1419 rhaas 2070 ECB : else
1419 rhaas 2071 GIC 1443 : data_length += maxlen;
1131 tgl 2072 45598 : if (att->attstorage != TYPSTORAGE_PLAIN)
1419 rhaas 2073 CBC 43419 : has_toastable_attrs = true;
2074 : }
2075 : }
1419 rhaas 2076 GIC 30583 : if (!has_toastable_attrs)
1419 rhaas 2077 CBC 11659 : return false; /* nothing to toast? */
1419 rhaas 2078 GIC 18924 : if (maxlength_unknown)
2079 17777 : return true; /* any unlimited-length attrs? */
1419 rhaas 2080 CBC 1147 : tuple_length = MAXALIGN(SizeofHeapTupleHeader +
2081 1147 : BITMAPLEN(tupdesc->natts)) +
1419 rhaas 2082 GIC 1147 : MAXALIGN(data_length);
1419 rhaas 2083 CBC 1147 : return (tuple_length > TOAST_TUPLE_THRESHOLD);
1419 rhaas 2084 ECB : }
2085 :
2086 : /*
2087 : * TOAST tables for heap relations are just heap relations.
1188 2088 : */
2089 : static Oid
1188 rhaas 2090 CBC 18026 : heapam_relation_toast_am(Relation rel)
1188 rhaas 2091 ECB : {
1188 rhaas 2092 CBC 18026 : return rel->rd_rel->relam;
1188 rhaas 2093 ECB : }
2094 :
1474 andres 2095 :
2096 : /* ------------------------------------------------------------------------
2097 : * Planner related callbacks for the heap AM
2098 : * ------------------------------------------------------------------------
2099 : */
2100 :
2101 : #define HEAP_OVERHEAD_BYTES_PER_TUPLE \
1371 rhaas 2102 : (MAXALIGN(SizeofHeapTupleHeader) + sizeof(ItemIdData))
2103 : #define HEAP_USABLE_BYTES_PER_PAGE \
2104 : (BLCKSZ - SizeOfPageHeaderData)
2105 :
2106 : static void
1471 andres 2107 GIC 174504 : heapam_estimate_rel_size(Relation rel, int32 *attr_widths,
2108 : BlockNumber *pages, double *tuples,
2109 : double *allvisfrac)
2110 : {
1371 rhaas 2111 174504 : table_block_relation_estimate_size(rel, attr_widths, pages,
2112 : tuples, allvisfrac,
2113 : HEAP_OVERHEAD_BYTES_PER_TUPLE,
2114 : HEAP_USABLE_BYTES_PER_PAGE);
1471 andres 2115 174504 : }
2116 :
2117 :
2118 : /* ------------------------------------------------------------------------
1471 andres 2119 ECB : * Executor related callbacks for the heap AM
2120 : * ------------------------------------------------------------------------
2121 : */
2122 :
1470 2123 : static bool
1470 andres 2124 GIC 254417 : heapam_scan_bitmap_next_block(TableScanDesc scan,
2125 : TBMIterateResult *tbmres)
2126 : {
1470 andres 2127 CBC 254417 : HeapScanDesc hscan = (HeapScanDesc) scan;
144 peter 2128 GNC 254417 : BlockNumber block = tbmres->blockno;
2129 : Buffer buffer;
2130 : Snapshot snapshot;
2131 : int ntup;
2132 :
1470 andres 2133 GIC 254417 : hscan->rs_cindex = 0;
2134 254417 : hscan->rs_ntuples = 0;
2135 :
1470 andres 2136 ECB : /*
2137 : * Ignore any claimed entries past what we think is the end of the
2138 : * relation. It may have been extended after the start of our scan (we
2139 : * only hold an AccessShareLock, and it could be inserts from this
2140 : * backend).
2141 : */
144 peter 2142 GNC 254417 : if (block >= hscan->rs_nblocks)
1470 andres 2143 GIC 6 : return false;
2144 :
1470 andres 2145 ECB : /*
2146 : * Acquire pin on the target heap page, trading in any pin we held before.
2147 : */
1470 andres 2148 GIC 254411 : hscan->rs_cbuf = ReleaseAndReadBuffer(hscan->rs_cbuf,
2149 : scan->rs_rd,
2150 : block);
144 peter 2151 GNC 254411 : hscan->rs_cblock = block;
1470 andres 2152 GIC 254411 : buffer = hscan->rs_cbuf;
2153 254411 : snapshot = scan->rs_snapshot;
1470 andres 2154 ECB :
1470 andres 2155 CBC 254411 : ntup = 0;
2156 :
2157 : /*
2158 : * Prune and repair fragmentation for the whole page, if possible.
2159 : */
2160 254411 : heap_page_prune_opt(scan->rs_rd, buffer);
2161 :
2162 : /*
1470 andres 2163 ECB : * We must hold share lock on the buffer content while examining tuple
2164 : * visibility. Afterwards, however, the tuples we have found to be
2165 : * visible are guaranteed good as long as we hold the buffer pin.
2166 : */
1470 andres 2167 CBC 254411 : LockBuffer(buffer, BUFFER_LOCK_SHARE);
2168 :
2169 : /*
2170 : * We need two separate strategies for lossy and non-lossy cases.
2171 : */
2172 254411 : if (tbmres->ntuples >= 0)
2173 : {
2174 : /*
2175 : * Bitmap is non-lossy, so we just look through the offsets listed in
2176 : * tbmres; but we have to follow any HOT chain starting at each such
2177 : * offset.
2178 : */
1470 andres 2179 ECB : int curslot;
2180 :
1470 andres 2181 GIC 3683137 : for (curslot = 0; curslot < tbmres->ntuples; curslot++)
2182 : {
2183 3501059 : OffsetNumber offnum = tbmres->offsets[curslot];
1470 andres 2184 ECB : ItemPointerData tid;
2185 : HeapTupleData heapTuple;
2186 :
144 peter 2187 GNC 3501059 : ItemPointerSet(&tid, block, offnum);
1470 andres 2188 GIC 3501059 : if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
2189 : &heapTuple, NULL, true))
2190 3351716 : hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
2191 : }
2192 : }
1470 andres 2193 ECB : else
2194 : {
2195 : /*
2196 : * Bitmap is lossy, so we must examine each line pointer on the page.
2197 : * But we can ignore HOT chains, since we'll check each tuple anyway.
2198 : */
144 peter 2199 GNC 72330 : Page page = BufferGetPage(buffer);
2200 72330 : OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
2201 : OffsetNumber offnum;
1470 andres 2202 ECB :
1470 andres 2203 GIC 486285 : for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
2204 : {
2205 : ItemId lp;
2206 : HeapTupleData loctup;
2207 : bool valid;
2208 :
144 peter 2209 GNC 413955 : lp = PageGetItemId(page, offnum);
1470 andres 2210 GIC 413955 : if (!ItemIdIsNormal(lp))
1470 andres 2211 LBC 0 : continue;
144 peter 2212 GNC 413955 : loctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
1470 andres 2213 GIC 413955 : loctup.t_len = ItemIdGetLength(lp);
2214 413955 : loctup.t_tableOid = scan->rs_rd->rd_id;
144 peter 2215 GNC 413955 : ItemPointerSet(&loctup.t_self, block, offnum);
1470 andres 2216 GIC 413955 : valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
2217 413955 : if (valid)
2218 : {
2219 413892 : hscan->rs_vistuples[ntup++] = offnum;
1167 tmunro 2220 413892 : PredicateLockTID(scan->rs_rd, &loctup.t_self, snapshot,
1167 tmunro 2221 CBC 413892 : HeapTupleHeaderGetXmin(loctup.t_data));
1470 andres 2222 ECB : }
1167 tmunro 2223 GBC 413955 : HeapCheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
1167 tmunro 2224 ECB : buffer, snapshot);
1470 andres 2225 : }
2226 : }
2227 :
1470 andres 2228 CBC 254408 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1470 andres 2229 ECB :
1470 andres 2230 GIC 254408 : Assert(ntup <= MaxHeapTuplesPerPage);
1470 andres 2231 CBC 254408 : hscan->rs_ntuples = ntup;
1470 andres 2232 ECB :
1470 andres 2233 CBC 254408 : return ntup > 0;
2234 : }
1470 andres 2235 ECB :
2236 : static bool
1470 andres 2237 GIC 4018247 : heapam_scan_bitmap_next_tuple(TableScanDesc scan,
2238 : TBMIterateResult *tbmres,
2239 : TupleTableSlot *slot)
1470 andres 2240 ECB : {
1470 andres 2241 GIC 4018247 : HeapScanDesc hscan = (HeapScanDesc) scan;
1470 andres 2242 ECB : OffsetNumber targoffset;
2243 : Page page;
2244 : ItemId lp;
2245 :
2246 : /*
2247 : * Out of range? If so, nothing more to look at on this page
2248 : */
1470 andres 2249 CBC 4018247 : if (hscan->rs_cindex < 0 || hscan->rs_cindex >= hscan->rs_ntuples)
1470 andres 2250 GIC 254228 : return false;
2251 :
2252 3764019 : targoffset = hscan->rs_vistuples[hscan->rs_cindex];
144 peter 2253 GNC 3764019 : page = BufferGetPage(hscan->rs_cbuf);
2254 3764019 : lp = PageGetItemId(page, targoffset);
1470 andres 2255 GIC 3764019 : Assert(ItemIdIsNormal(lp));
2256 :
144 peter 2257 GNC 3764019 : hscan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
1470 andres 2258 GIC 3764019 : hscan->rs_ctup.t_len = ItemIdGetLength(lp);
2259 3764019 : hscan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
2260 3764019 : ItemPointerSet(&hscan->rs_ctup.t_self, hscan->rs_cblock, targoffset);
1470 andres 2261 ECB :
1470 andres 2262 CBC 3764019 : pgstat_count_heap_fetch(scan->rs_rd);
2263 :
1470 andres 2264 ECB : /*
2265 : * Set up the result slot to point to this tuple. Note that the slot
2266 : * acquires a pin on the buffer.
2267 : */
1470 andres 2268 GIC 3764019 : ExecStoreBufferHeapTuple(&hscan->rs_ctup,
1470 andres 2269 ECB : slot,
2270 : hscan->rs_cbuf);
2271 :
1470 andres 2272 CBC 3764019 : hscan->rs_cindex++;
2273 :
2274 3764019 : return true;
2275 : }
2276 :
2277 : static bool
1471 andres 2278 GIC 6455 : heapam_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate)
2279 : {
1471 andres 2280 CBC 6455 : HeapScanDesc hscan = (HeapScanDesc) scan;
1471 andres 2281 GIC 6455 : TsmRoutine *tsm = scanstate->tsmroutine;
2282 : BlockNumber blockno;
2283 :
1471 andres 2284 ECB : /* return false immediately if relation is empty */
1471 andres 2285 GIC 6455 : if (hscan->rs_nblocks == 0)
1471 andres 2286 LBC 0 : return false;
2287 :
1471 andres 2288 GIC 6455 : if (tsm->NextSampleBlock)
2289 : {
1471 andres 2290 CBC 2222 : blockno = tsm->NextSampleBlock(scanstate, hscan->rs_nblocks);
1471 andres 2291 GIC 2222 : hscan->rs_cblock = blockno;
1471 andres 2292 ECB : }
2293 : else
2294 : {
2295 : /* scanning table sequentially */
2296 :
1471 andres 2297 CBC 4233 : if (hscan->rs_cblock == InvalidBlockNumber)
1471 andres 2298 EUB : {
1471 andres 2299 GIC 39 : Assert(!hscan->rs_inited);
1471 andres 2300 CBC 39 : blockno = hscan->rs_startblock;
2301 : }
1471 andres 2302 ECB : else
2303 : {
1471 andres 2304 GIC 4194 : Assert(hscan->rs_inited);
2305 :
2306 4194 : blockno = hscan->rs_cblock + 1;
2307 :
2308 4194 : if (blockno >= hscan->rs_nblocks)
1471 andres 2309 ECB : {
2310 : /* wrap to beginning of rel, might not have started at 0 */
1471 andres 2311 CBC 39 : blockno = 0;
1471 andres 2312 ECB : }
2313 :
2314 : /*
2315 : * Report our new scan position for synchronization purposes.
2316 : *
2317 : * Note: we do this before checking for end of scan so that the
2318 : * final state of the position hint is back at the start of the
2319 : * rel. That's not strictly necessary, but otherwise when you run
2320 : * the same query multiple times the starting position would shift
2321 : * a little bit backwards on every invocation, which is confusing.
2322 : * We don't guarantee any specific ordering in general, though.
2323 : */
1421 andres 2324 GIC 4194 : if (scan->rs_flags & SO_ALLOW_SYNC)
1471 andres 2325 UIC 0 : ss_report_location(scan->rs_rd, blockno);
2326 :
1471 andres 2327 GIC 4194 : if (blockno == hscan->rs_startblock)
2328 : {
2329 39 : blockno = InvalidBlockNumber;
2330 : }
2331 : }
2332 : }
2333 :
2334 6455 : if (!BlockNumberIsValid(blockno))
2335 : {
1471 andres 2336 CBC 85 : if (BufferIsValid(hscan->rs_cbuf))
1471 andres 2337 GBC 72 : ReleaseBuffer(hscan->rs_cbuf);
1471 andres 2338 GIC 85 : hscan->rs_cbuf = InvalidBuffer;
1471 andres 2339 CBC 85 : hscan->rs_cblock = InvalidBlockNumber;
1471 andres 2340 GIC 85 : hscan->rs_inited = false;
1471 andres 2341 ECB :
1471 andres 2342 GIC 85 : return false;
2343 : }
2344 :
2345 6370 : heapgetpage(scan, blockno);
1471 andres 2346 CBC 6370 : hscan->rs_inited = true;
2347 :
2348 6370 : return true;
1471 andres 2349 ECB : }
2350 :
2351 : static bool
1471 andres 2352 CBC 126947 : heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate,
2353 : TupleTableSlot *slot)
1471 andres 2354 ECB : {
1471 andres 2355 GIC 126947 : HeapScanDesc hscan = (HeapScanDesc) scan;
2356 126947 : TsmRoutine *tsm = scanstate->tsmroutine;
1471 andres 2357 CBC 126947 : BlockNumber blockno = hscan->rs_cblock;
1421 2358 126947 : bool pagemode = (scan->rs_flags & SO_ALLOW_PAGEMODE) != 0;
2359 :
1471 andres 2360 ECB : Page page;
2361 : bool all_visible;
2362 : OffsetNumber maxoffset;
2363 :
2364 : /*
2365 : * When not using pagemode, we must lock the buffer during tuple
2366 : * visibility checks.
2367 : */
1471 andres 2368 CBC 126947 : if (!pagemode)
2369 2097 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
1471 andres 2370 ECB :
1471 andres 2371 GIC 126947 : page = (Page) BufferGetPage(hscan->rs_cbuf);
2372 253343 : all_visible = PageIsAllVisible(page) &&
2373 126396 : !scan->rs_snapshot->takenDuringRecovery;
2374 126947 : maxoffset = PageGetMaxOffsetNumber(page);
2375 :
2376 : for (;;)
1471 andres 2377 UIC 0 : {
2378 : OffsetNumber tupoffset;
2379 :
1471 andres 2380 CBC 126947 : CHECK_FOR_INTERRUPTS();
1471 andres 2381 ECB :
2382 : /* Ask the tablesample method which tuples to check on this page. */
1471 andres 2383 CBC 126947 : tupoffset = tsm->NextSampleTuple(scanstate,
1471 andres 2384 ECB : blockno,
2385 : maxoffset);
2386 :
1471 andres 2387 GIC 126947 : if (OffsetNumberIsValid(tupoffset))
2388 : {
1471 andres 2389 EUB : ItemId itemid;
2390 : bool visible;
1471 andres 2391 GIC 120580 : HeapTuple tuple = &(hscan->rs_ctup);
1471 andres 2392 ECB :
2393 : /* Skip invalid tuple pointers. */
1471 andres 2394 GIC 120580 : itemid = PageGetItemId(page, tupoffset);
1471 andres 2395 CBC 120580 : if (!ItemIdIsNormal(itemid))
1471 andres 2396 UIC 0 : continue;
2397 :
1471 andres 2398 GIC 120580 : tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1471 andres 2399 CBC 120580 : tuple->t_len = ItemIdGetLength(itemid);
1471 andres 2400 GIC 120580 : ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
2401 :
2402 :
1471 andres 2403 CBC 120580 : if (all_visible)
1471 andres 2404 GIC 120174 : visible = true;
2405 : else
1471 andres 2406 CBC 406 : visible = SampleHeapTupleVisible(scan, hscan->rs_cbuf,
1471 andres 2407 ECB : tuple, tupoffset);
1471 andres 2408 EUB :
2409 : /* in pagemode, heapgetpage did this for us */
1471 andres 2410 CBC 120580 : if (!pagemode)
1167 tmunro 2411 3 : HeapCheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
1167 tmunro 2412 ECB : hscan->rs_cbuf, scan->rs_snapshot);
2413 :
2414 : /* Try next tuple from same page. */
1471 andres 2415 CBC 120580 : if (!visible)
1471 andres 2416 LBC 0 : continue;
2417 :
1471 andres 2418 ECB : /* Found visible tuple, return it. */
1471 andres 2419 GIC 120580 : if (!pagemode)
2420 3 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2421 :
1471 andres 2422 CBC 120580 : ExecStoreBufferHeapTuple(tuple, slot, hscan->rs_cbuf);
1471 andres 2423 ECB :
2424 : /* Count successfully-fetched tuples as heap fetches */
1471 andres 2425 GIC 120580 : pgstat_count_heap_getnext(scan->rs_rd);
2426 :
1471 andres 2427 CBC 120580 : return true;
1471 andres 2428 EUB : }
2429 : else
2430 : {
1471 andres 2431 ECB : /*
2432 : * If we get here, it means we've exhausted the items on this page
2433 : * and it's time to move to the next.
2434 : */
1471 andres 2435 GIC 6367 : if (!pagemode)
2436 2094 : LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
1471 andres 2437 ECB :
1471 andres 2438 GIC 6367 : ExecClearTuple(slot);
1471 andres 2439 CBC 6367 : return false;
2440 : }
2441 : }
2442 :
2443 : Assert(0);
2444 : }
2445 :
2446 :
1471 andres 2447 ECB : /* ----------------------------------------------------------------------------
2448 : * Helper functions for the above.
2449 : * ----------------------------------------------------------------------------
2450 : */
2451 :
2452 : /*
2453 : * Reconstruct and rewrite the given tuple
2454 : *
2455 : * We cannot simply copy the tuple as-is, for several reasons:
2456 : *
2457 : * 1. We'd like to squeeze out the values of any dropped columns, both
2458 : * to save space and to ensure we have no corner-case failures. (It's
2459 : * possible for example that the new table hasn't got a TOAST table
2460 : * and so is unable to store any large values of dropped cols.)
2461 : *
2462 : * 2. The tuple might not even be legal for the new table; this is
2463 : * currently only known to happen as an after-effect of ALTER TABLE
2464 : * SET WITHOUT OIDS.
2465 : *
2466 : * So, we must reconstruct the tuple from component Datums.
2467 : */
2468 : static void
1471 andres 2469 GIC 384627 : reform_and_rewrite_tuple(HeapTuple tuple,
2470 : Relation OldHeap, Relation NewHeap,
2471 : Datum *values, bool *isnull, RewriteState rwstate)
2472 : {
2473 384627 : TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
2474 384627 : TupleDesc newTupDesc = RelationGetDescr(NewHeap);
2475 : HeapTuple copiedTuple;
2476 : int i;
2477 :
2478 384627 : heap_deform_tuple(tuple, oldTupDesc, values, isnull);
2479 :
2480 : /* Be sure to null out any dropped columns */
1471 andres 2481 CBC 3083520 : for (i = 0; i < newTupDesc->natts; i++)
2482 : {
1471 andres 2483 GIC 2698893 : if (TupleDescAttr(newTupDesc, i)->attisdropped)
1471 andres 2484 UIC 0 : isnull[i] = true;
1471 andres 2485 ECB : }
2486 :
1471 andres 2487 GIC 384627 : copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
2488 :
2489 : /* The heap rewrite module does the rest */
1471 andres 2490 CBC 384627 : rewrite_heap_tuple(rwstate, tuple, copiedTuple);
2491 :
1471 andres 2492 GIC 384627 : heap_freetuple(copiedTuple);
1471 andres 2493 CBC 384627 : }
2494 :
1471 andres 2495 ECB : /*
1471 andres 2496 EUB : * Check visibility of the tuple.
2497 : */
2498 : static bool
1471 andres 2499 CBC 406 : SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
2500 : HeapTuple tuple,
2501 : OffsetNumber tupoffset)
1471 andres 2502 ECB : {
1471 andres 2503 GIC 406 : HeapScanDesc hscan = (HeapScanDesc) scan;
1471 andres 2504 ECB :
1421 andres 2505 CBC 406 : if (scan->rs_flags & SO_ALLOW_PAGEMODE)
2506 : {
2507 : /*
2508 : * In pageatatime mode, heapgetpage() already did visibility checks,
2509 : * so just look at the info it left in rs_vistuples[].
2510 : *
1471 andres 2511 ECB : * We use a binary search over the known-sorted array. Note: we could
2512 : * save some effort if we insisted that NextSampleTuple select tuples
2513 : * in increasing order, but it's not clear that there would be enough
2514 : * gain to justify the restriction.
2515 : */
1471 andres 2516 GIC 403 : int start = 0,
1471 andres 2517 CBC 403 : end = hscan->rs_ntuples - 1;
2518 :
1471 andres 2519 GIC 776 : while (start <= end)
2520 : {
2521 776 : int mid = (start + end) / 2;
2522 776 : OffsetNumber curoffset = hscan->rs_vistuples[mid];
2523 :
2524 776 : if (tupoffset == curoffset)
2525 403 : return true;
2526 373 : else if (tupoffset < curoffset)
2527 145 : end = mid - 1;
1471 andres 2528 ECB : else
1471 andres 2529 CBC 228 : start = mid + 1;
2530 : }
1471 andres 2531 ECB :
1471 andres 2532 UIC 0 : return false;
1471 andres 2533 ECB : }
2534 : else
2535 : {
2536 : /* Otherwise, we have to check the tuple individually. */
1471 andres 2537 CBC 3 : return HeapTupleSatisfiesVisibility(tuple, scan->rs_snapshot,
1471 andres 2538 ECB : buffer);
2539 : }
2540 : }
2541 :
2542 :
2543 : /* ------------------------------------------------------------------------
1490 andres 2544 EUB : * Definition of the heap table access method.
2545 : * ------------------------------------------------------------------------
2546 : */
2547 :
2548 : static const TableAmRoutine heapam_methods = {
1495 andres 2549 ECB : .type = T_TableAmRoutine,
2550 :
2551 : .slot_callbacks = heapam_slot_callbacks,
2552 :
2553 : .scan_begin = heap_beginscan,
2554 : .scan_end = heap_endscan,
2555 : .scan_rescan = heap_rescan,
2556 : .scan_getnextslot = heap_getnextslot,
2557 :
2558 : .scan_set_tidrange = heap_set_tidrange,
2559 : .scan_getnextslot_tidrange = heap_getnextslot_tidrange,
2560 :
2561 : .parallelscan_estimate = table_block_parallelscan_estimate,
2562 : .parallelscan_initialize = table_block_parallelscan_initialize,
2563 : .parallelscan_reinitialize = table_block_parallelscan_reinitialize,
2564 :
2565 : .index_fetch_begin = heapam_index_fetch_begin,
2566 : .index_fetch_reset = heapam_index_fetch_reset,
2567 : .index_fetch_end = heapam_index_fetch_end,
2568 : .index_fetch_tuple = heapam_index_fetch_tuple,
2569 :
2570 : .tuple_insert = heapam_tuple_insert,
2571 : .tuple_insert_speculative = heapam_tuple_insert_speculative,
2572 : .tuple_complete_speculative = heapam_tuple_complete_speculative,
2573 : .multi_insert = heap_multi_insert,
2574 : .tuple_delete = heapam_tuple_delete,
2575 : .tuple_update = heapam_tuple_update,
2576 : .tuple_lock = heapam_tuple_lock,
2577 :
2578 : .tuple_fetch_row_version = heapam_fetch_row_version,
2579 : .tuple_get_latest_tid = heap_get_latest_tid,
2580 : .tuple_tid_valid = heapam_tuple_tid_valid,
2581 : .tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
2582 : .index_delete_tuples = heap_index_delete_tuples,
2583 :
2584 : .relation_set_new_filelocator = heapam_relation_set_new_filelocator,
2585 : .relation_nontransactional_truncate = heapam_relation_nontransactional_truncate,
2586 : .relation_copy_data = heapam_relation_copy_data,
2587 : .relation_copy_for_cluster = heapam_relation_copy_for_cluster,
2588 : .relation_vacuum = heap_vacuum_rel,
2589 : .scan_analyze_next_block = heapam_scan_analyze_next_block,
2590 : .scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
2591 : .index_build_range_scan = heapam_index_build_range_scan,
2592 : .index_validate_scan = heapam_index_validate_scan,
2593 :
2594 : .relation_size = table_block_relation_size,
2595 : .relation_needs_toast_table = heapam_relation_needs_toast_table,
2596 : .relation_toast_am = heapam_relation_toast_am,
2597 : .relation_fetch_toast_slice = heap_fetch_toast_slice,
2598 :
2599 : .relation_estimate_size = heapam_estimate_rel_size,
2600 :
2601 : .scan_bitmap_next_block = heapam_scan_bitmap_next_block,
2602 : .scan_bitmap_next_tuple = heapam_scan_bitmap_next_tuple,
2603 : .scan_sample_next_block = heapam_scan_sample_next_block,
2604 : .scan_sample_next_tuple = heapam_scan_sample_next_tuple
2605 : };
2606 :
2607 :
2608 : const TableAmRoutine *
1495 andres 2609 GIC 14674113 : GetHeapamTableAmRoutine(void)
2610 : {
2611 14674113 : return &heapam_methods;
2612 : }
2613 :
2614 : Datum
2615 881174 : heap_tableam_handler(PG_FUNCTION_ARGS)
2616 : {
2617 881174 : PG_RETURN_POINTER(&heapam_methods);
2618 : }
|