Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * storage.c
4 : * code to create and destroy physical storage for relations
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/catalog/storage.c
12 : *
13 : * NOTES
14 : * Some of this code used to be in storage/smgr/smgr.c, and the
15 : * function names still reflect that.
16 : *
17 : *-------------------------------------------------------------------------
18 : */
19 :
20 : #include "postgres.h"
21 :
22 : #include "access/parallel.h"
23 : #include "access/visibilitymap.h"
24 : #include "access/xact.h"
25 : #include "access/xlog.h"
26 : #include "access/xloginsert.h"
27 : #include "access/xlogutils.h"
28 : #include "catalog/storage.h"
29 : #include "catalog/storage_xlog.h"
30 : #include "miscadmin.h"
31 : #include "storage/freespace.h"
32 : #include "storage/smgr.h"
33 : #include "utils/hsearch.h"
34 : #include "utils/memutils.h"
35 : #include "utils/rel.h"
36 :
37 : /* GUC variables */
38 : int wal_skip_threshold = 2048; /* in kilobytes */
39 :
40 : /*
41 : * We keep a list of all relations (represented as RelFileLocator values)
42 : * that have been created or deleted in the current transaction. When
43 : * a relation is created, we create the physical file immediately, but
44 : * remember it so that we can delete the file again if the current
45 : * transaction is aborted. Conversely, a deletion request is NOT
46 : * executed immediately, but is just entered in the list. When and if
47 : * the transaction commits, we can delete the physical file.
48 : *
49 : * To handle subtransactions, every entry is marked with its transaction
50 : * nesting level. At subtransaction commit, we reassign the subtransaction's
51 : * entries to the parent nesting level. At subtransaction abort, we can
52 : * immediately execute the abort-time actions for all entries of the current
53 : * nesting level.
54 : *
55 : * NOTE: the list is kept in TopMemoryContext to be sure it won't disappear
56 : * unbetimes. It'd probably be OK to keep it in TopTransactionContext,
57 : * but I'm being paranoid.
58 : */
59 :
60 : typedef struct PendingRelDelete
61 : {
62 : RelFileLocator rlocator; /* relation that may need to be deleted */
63 : BackendId backend; /* InvalidBackendId if not a temp rel */
64 : bool atCommit; /* T=delete at commit; F=delete at abort */
65 : int nestLevel; /* xact nesting level of request */
66 : struct PendingRelDelete *next; /* linked-list link */
67 : } PendingRelDelete;
68 :
69 : typedef struct PendingRelSync
70 : {
71 : RelFileLocator rlocator;
72 : bool is_truncated; /* Has the file experienced truncation? */
73 : } PendingRelSync;
74 :
75 : static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
76 : static HTAB *pendingSyncHash = NULL;
77 :
78 :
79 : /*
80 : * AddPendingSync
81 : * Queue an at-commit fsync.
82 : */
83 : static void
277 rhaas 84 GNC 28159 : AddPendingSync(const RelFileLocator *rlocator)
85 : {
86 : PendingRelSync *pending;
87 : bool found;
88 :
89 : /* create the hash if not yet */
1100 noah 90 CBC 28159 : if (!pendingSyncHash)
91 : {
92 : HASHCTL ctl;
93 :
277 rhaas 94 GNC 5535 : ctl.keysize = sizeof(RelFileLocator);
1100 noah 95 CBC 5535 : ctl.entrysize = sizeof(PendingRelSync);
96 5535 : ctl.hcxt = TopTransactionContext;
97 5535 : pendingSyncHash = hash_create("pending sync hash", 16, &ctl,
98 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
99 : }
100 :
277 rhaas 101 GNC 28159 : pending = hash_search(pendingSyncHash, rlocator, HASH_ENTER, &found);
1100 noah 102 CBC 28159 : Assert(!found);
103 28159 : pending->is_truncated = false;
104 28159 : }
105 :
106 : /*
107 : * RelationCreateStorage
108 : * Create physical storage for a relation.
109 : *
110 : * Create the underlying disk file storage for the relation. This only
111 : * creates the main fork; additional forks are created lazily by the
112 : * modules that need them.
113 : *
114 : * This function is transactional. The creation is WAL-logged, and if the
115 : * transaction aborts later on, the storage will be destroyed. A caller
116 : * that does not want the storage to be destroyed in case of an abort may
117 : * pass register_delete = false.
118 : */
119 : SMgrRelation
277 rhaas 120 GNC 162426 : RelationCreateStorage(RelFileLocator rlocator, char relpersistence,
121 : bool register_delete)
122 : {
123 : SMgrRelation srel;
124 : BackendId backend;
125 : bool needs_wal;
126 :
1100 noah 127 CBC 162426 : Assert(!IsInParallelMode()); /* couldn't update pendingSyncHash */
128 :
4500 rhaas 129 162426 : switch (relpersistence)
130 : {
131 2808 : case RELPERSISTENCE_TEMP:
2495 tgl 132 2808 : backend = BackendIdForTempRelations();
4500 rhaas 133 2808 : needs_wal = false;
134 2808 : break;
4484 135 217 : case RELPERSISTENCE_UNLOGGED:
136 217 : backend = InvalidBackendId;
137 217 : needs_wal = false;
138 217 : break;
4500 139 159401 : case RELPERSISTENCE_PERMANENT:
140 159401 : backend = InvalidBackendId;
141 159401 : needs_wal = true;
142 159401 : break;
4500 rhaas 143 UBC 0 : default:
144 0 : elog(ERROR, "invalid relpersistence: %c", relpersistence);
145 : return NULL; /* placate compiler */
146 : }
147 :
277 rhaas 148 GNC 162426 : srel = smgropen(rlocator, backend);
5254 heikki.linnakangas 149 CBC 162426 : smgrcreate(srel, MAIN_FORKNUM, false);
150 :
4500 rhaas 151 162426 : if (needs_wal)
277 rhaas 152 GNC 159401 : log_smgrcreate(&srel->smgr_rlocator.locator, MAIN_FORKNUM);
153 :
154 : /*
155 : * Add the relation to the list of stuff to delete at abort, if we are
156 : * asked to do so.
157 : */
376 rhaas 158 CBC 162426 : if (register_delete)
159 : {
160 : PendingRelDelete *pending;
161 :
162 : pending = (PendingRelDelete *)
163 121565 : MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
277 rhaas 164 GNC 121565 : pending->rlocator = rlocator;
376 rhaas 165 CBC 121565 : pending->backend = backend;
166 121565 : pending->atCommit = false; /* delete if abort */
167 121565 : pending->nestLevel = GetCurrentTransactionNestLevel();
168 121565 : pending->next = pendingDeletes;
169 121565 : pendingDeletes = pending;
170 : }
171 :
1100 noah 172 162426 : if (relpersistence == RELPERSISTENCE_PERMANENT && !XLogIsNeeded())
173 : {
174 26407 : Assert(backend == InvalidBackendId);
277 rhaas 175 GNC 26407 : AddPendingSync(&rlocator);
176 : }
177 :
1441 andres 178 CBC 162426 : return srel;
179 : }
180 :
181 : /*
182 : * Perform XLogInsert of an XLOG_SMGR_CREATE record to WAL.
183 : */
184 : void
277 rhaas 185 GNC 173160 : log_smgrcreate(const RelFileLocator *rlocator, ForkNumber forkNum)
186 : {
187 : xl_smgr_create xlrec;
188 :
189 : /*
190 : * Make an XLOG entry reporting the file creation.
191 : */
192 173160 : xlrec.rlocator = *rlocator;
4484 rhaas 193 CBC 173160 : xlrec.forkNum = forkNum;
194 :
3062 heikki.linnakangas 195 173160 : XLogBeginInsert();
196 173160 : XLogRegisterData((char *) &xlrec, sizeof(xlrec));
197 173160 : XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLR_SPECIAL_REL_UPDATE);
4484 rhaas 198 173160 : }
199 :
200 : /*
201 : * RelationDropStorage
202 : * Schedule unlinking of physical storage at transaction commit.
203 : */
204 : void
5254 heikki.linnakangas 205 30351 : RelationDropStorage(Relation rel)
206 : {
207 : PendingRelDelete *pending;
208 :
209 : /* Add the relation to the list of stuff to delete at commit */
210 : pending = (PendingRelDelete *)
211 30351 : MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
277 rhaas 212 GNC 30351 : pending->rlocator = rel->rd_locator;
4622 rhaas 213 CBC 30351 : pending->backend = rel->rd_backend;
5254 heikki.linnakangas 214 30351 : pending->atCommit = true; /* delete if commit */
215 30351 : pending->nestLevel = GetCurrentTransactionNestLevel();
216 30351 : pending->next = pendingDeletes;
217 30351 : pendingDeletes = pending;
218 :
219 : /*
220 : * NOTE: if the relation was created in this transaction, it will now be
221 : * present in the pending-delete list twice, once with atCommit true and
222 : * once with atCommit false. Hence, it will be physically deleted at end
223 : * of xact in either case (and the other entry will be ignored by
224 : * smgrDoPendingDeletes, so no error will occur). We could instead remove
225 : * the existing list entry and delete the physical file immediately, but
226 : * for now I'll keep the logic simple.
227 : */
228 :
229 30351 : RelationCloseSmgr(rel);
230 30351 : }
231 :
232 : /*
233 : * RelationPreserveStorage
234 : * Mark a relation as not to be deleted after all.
235 : *
236 : * We need this function because relation mapping changes are committed
237 : * separately from commit of the whole transaction, so it's still possible
238 : * for the transaction to abort after the mapping update is done.
239 : * When a new physical relation is installed in the map, it would be
240 : * scheduled for delete-on-abort, so we'd delete it, and be in trouble.
241 : * The relation mapper fixes this by telling us to not delete such relations
242 : * after all as part of its commit.
243 : *
244 : * We also use this to reuse an old build of an index during ALTER TABLE, this
245 : * time removing the delete-at-commit entry.
246 : *
247 : * No-op if the relation is not among those scheduled for deletion.
248 : */
249 : void
277 rhaas 250 GNC 3096 : RelationPreserveStorage(RelFileLocator rlocator, bool atCommit)
251 : {
252 : PendingRelDelete *pending;
253 : PendingRelDelete *prev;
254 : PendingRelDelete *next;
255 :
4809 tgl 256 CBC 3096 : prev = NULL;
257 19823 : for (pending = pendingDeletes; pending != NULL; pending = next)
258 : {
259 16727 : next = pending->next;
277 rhaas 260 GNC 16727 : if (RelFileLocatorEquals(rlocator, pending->rlocator)
4283 rhaas 261 CBC 313 : && pending->atCommit == atCommit)
262 : {
263 : /* unlink and delete list entry */
4809 tgl 264 310 : if (prev)
265 243 : prev->next = next;
266 : else
267 67 : pendingDeletes = next;
268 310 : pfree(pending);
269 : /* prev does not change */
270 : }
271 : else
272 : {
273 : /* unrelated entry, don't touch it */
274 16417 : prev = pending;
275 : }
276 : }
277 3096 : }
278 :
279 : /*
280 : * RelationTruncate
281 : * Physically truncate a relation to the specified number of blocks.
282 : *
283 : * This includes getting rid of any buffers for the blocks that are to be
284 : * dropped.
285 : */
286 : void
5254 heikki.linnakangas 287 451 : RelationTruncate(Relation rel, BlockNumber nblocks)
288 : {
289 : bool fsm;
290 : bool vm;
1293 fujii 291 451 : bool need_fsm_vacuum = false;
292 : ForkNumber forks[MAX_FORKNUM];
293 : BlockNumber blocks[MAX_FORKNUM];
1060 tgl 294 451 : int nforks = 0;
295 : SMgrRelation reln;
296 :
297 : /*
298 : * Make sure smgr_targblock etc aren't pointing somewhere past new end.
299 : * (Note: don't rely on this reln pointer below this loop.)
300 : */
636 301 451 : reln = RelationGetSmgr(rel);
302 451 : reln->smgr_targblock = InvalidBlockNumber;
982 tmunro 303 2255 : for (int i = 0; i <= MAX_FORKNUM; ++i)
636 tgl 304 1804 : reln->smgr_cached_nblocks[i] = InvalidBlockNumber;
305 :
306 : /* Prepare for truncation of MAIN fork of the relation */
1293 fujii 307 451 : forks[nforks] = MAIN_FORKNUM;
308 451 : blocks[nforks] = nblocks;
309 451 : nforks++;
310 :
311 : /* Prepare for truncation of the FSM if it exists */
636 tgl 312 451 : fsm = smgrexists(RelationGetSmgr(rel), FSM_FORKNUM);
5254 heikki.linnakangas 313 451 : if (fsm)
314 : {
1293 fujii 315 92 : blocks[nforks] = FreeSpaceMapPrepareTruncateRel(rel, nblocks);
316 92 : if (BlockNumberIsValid(blocks[nforks]))
317 : {
318 92 : forks[nforks] = FSM_FORKNUM;
319 92 : nforks++;
320 92 : need_fsm_vacuum = true;
321 : }
322 : }
323 :
324 : /* Prepare for truncation of the visibility map too if it exists */
636 tgl 325 451 : vm = smgrexists(RelationGetSmgr(rel), VISIBILITYMAP_FORKNUM);
5240 heikki.linnakangas 326 451 : if (vm)
327 : {
1293 fujii 328 92 : blocks[nforks] = visibilitymap_prepare_truncate(rel, nblocks);
329 92 : if (BlockNumberIsValid(blocks[nforks]))
330 : {
331 47 : forks[nforks] = VISIBILITYMAP_FORKNUM;
332 47 : nforks++;
333 : }
334 : }
335 :
1100 noah 336 451 : RelationPreTruncate(rel);
337 :
338 : /*
339 : * Make sure that a concurrent checkpoint can't complete while truncation
340 : * is in progress.
341 : *
342 : * The truncation operation might drop buffers that the checkpoint
343 : * otherwise would have flushed. If it does, then it's essential that the
344 : * files actually get truncated on disk before the checkpoint record is
345 : * written. Otherwise, if reply begins from that checkpoint, the
346 : * to-be-truncated blocks might still exist on disk but have older
347 : * contents than expected, which can cause replay to fail. It's OK for the
348 : * blocks to not exist on disk at all, but not for them to have the wrong
349 : * contents.
350 : */
366 rhaas 351 451 : Assert((MyProc->delayChkptFlags & DELAY_CHKPT_COMPLETE) == 0);
352 451 : MyProc->delayChkptFlags |= DELAY_CHKPT_COMPLETE;
353 :
354 : /*
355 : * We WAL-log the truncation before actually truncating, which means
356 : * trouble if the truncation fails. If we then crash, the WAL replay
357 : * likely isn't going to succeed in the truncation either, and cause a
358 : * PANIC. It's tempting to put a critical section here, but that cure
359 : * would be worse than the disease. It would turn a usually harmless
360 : * failure to truncate, that might spell trouble at WAL replay, into a
361 : * certain PANIC.
362 : */
4500 363 451 : if (RelationNeedsWAL(rel))
364 : {
365 : /*
366 : * Make an XLOG entry reporting the file truncation.
367 : */
368 : XLogRecPtr lsn;
369 : xl_smgr_truncate xlrec;
370 :
5254 heikki.linnakangas 371 119 : xlrec.blkno = nblocks;
277 rhaas 372 GNC 119 : xlrec.rlocator = rel->rd_locator;
2487 rhaas 373 CBC 119 : xlrec.flags = SMGR_TRUNCATE_ALL;
374 :
3062 heikki.linnakangas 375 119 : XLogBeginInsert();
376 119 : XLogRegisterData((char *) &xlrec, sizeof(xlrec));
377 :
378 119 : lsn = XLogInsert(RM_SMGR_ID,
379 : XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE);
380 :
381 : /*
382 : * Flush, because otherwise the truncation of the main relation might
383 : * hit the disk before the WAL record, and the truncation of the FSM
384 : * or visibility map. If we crashed during that window, we'd be left
385 : * with a truncated heap, but the FSM or visibility map would still
386 : * contain entries for the non-existent heap pages.
387 : */
5240 388 119 : if (fsm || vm)
5254 389 82 : XLogFlush(lsn);
390 : }
391 :
392 : /*
393 : * This will first remove any buffers from the buffer pool that should no
394 : * longer exist after truncation is complete, and then truncate the
395 : * corresponding files on disk.
396 : */
636 tgl 397 451 : smgrtruncate(RelationGetSmgr(rel), forks, nforks, blocks);
398 :
399 : /* We've done all the critical work, so checkpoints are OK now. */
366 rhaas 400 451 : MyProc->delayChkptFlags &= ~DELAY_CHKPT_COMPLETE;
401 :
402 : /*
403 : * Update upper-level FSM pages to account for the truncation. This is
404 : * important because the just-truncated pages were likely marked as
405 : * all-free, and would be preferentially selected.
406 : *
407 : * NB: There's no point in delaying checkpoints until this is done.
408 : * Because the FSM is not WAL-logged, we have to be prepared for the
409 : * possibility of corruption after a crash anyway.
410 : */
1293 fujii 411 451 : if (need_fsm_vacuum)
412 92 : FreeSpaceMapVacuumRange(rel, nblocks, InvalidBlockNumber);
5254 heikki.linnakangas 413 451 : }
414 :
415 : /*
416 : * RelationPreTruncate
417 : * Perform AM-independent work before a physical truncation.
418 : *
419 : * If an access method's relation_nontransactional_truncate does not call
420 : * RelationTruncate(), it must call this before decreasing the table size.
421 : */
422 : void
1100 noah 423 451 : RelationPreTruncate(Relation rel)
424 : {
425 : PendingRelSync *pending;
426 :
427 451 : if (!pendingSyncHash)
428 448 : return;
429 :
636 tgl 430 3 : pending = hash_search(pendingSyncHash,
277 rhaas 431 GNC 3 : &(RelationGetSmgr(rel)->smgr_rlocator.locator),
432 : HASH_FIND, NULL);
1100 noah 433 CBC 3 : if (pending)
434 3 : pending->is_truncated = true;
435 : }
436 :
437 : /*
438 : * Copy a fork's data, block by block.
439 : *
440 : * Note that this requires that there is no dirty data in shared buffers. If
441 : * it's possible that there are, callers need to flush those using
442 : * e.g. FlushRelationBuffers(rel).
443 : *
444 : * Also note that this is frequently called via locutions such as
445 : * RelationCopyStorage(RelationGetSmgr(rel), ...);
446 : * That's safe only because we perform only smgr and WAL operations here.
447 : * If we invoked anything else, a relcache flush could cause our SMgrRelation
448 : * argument to become a dangling pointer.
449 : */
450 : void
1473 andres 451 86 : RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
452 : ForkNumber forkNum, char relpersistence)
453 : {
454 : PGIOAlignedBlock buf;
455 : Page page;
456 : bool use_wal;
457 : bool copying_initfork;
458 : BlockNumber nblocks;
459 : BlockNumber blkno;
460 :
461 86 : page = (Page) buf.data;
462 :
463 : /*
464 : * The init fork for an unlogged relation in many respects has to be
465 : * treated the same as normal relation, changes need to be WAL logged and
466 : * it needs to be synced to disk.
467 : */
468 86 : copying_initfork = relpersistence == RELPERSISTENCE_UNLOGGED &&
469 : forkNum == INIT_FORKNUM;
470 :
471 : /*
472 : * We need to log the copied data in WAL iff WAL archiving/streaming is
473 : * enabled AND it's a permanent relation. This gives the same answer as
474 : * "RelationNeedsWAL(rel) || copying_initfork", because we know the
475 : * current operation created new relation storage.
476 : */
477 94 : use_wal = XLogIsNeeded() &&
478 8 : (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork);
479 :
480 86 : nblocks = smgrnblocks(src, forkNum);
481 :
482 661 : for (blkno = 0; blkno < nblocks; blkno++)
483 : {
484 : /* If we got a cancel signal during the copy of the data, quit */
485 575 : CHECK_FOR_INTERRUPTS();
486 :
487 575 : smgrread(src, forkNum, blkno, buf.data);
488 :
895 michael 489 575 : if (!PageIsVerifiedExtended(page, blkno,
490 : PIV_LOG_WARNING | PIV_REPORT_STAT))
491 : {
492 : /*
493 : * For paranoia's sake, capture the file path before invoking the
494 : * ereport machinery. This guards against the possibility of a
495 : * relcache flush caused by, e.g., an errcontext callback.
496 : * (errcontext callbacks shouldn't be risking any such thing, but
497 : * people have been known to forget that rule.)
498 : */
277 rhaas 499 UNC 0 : char *relpath = relpathbackend(src->smgr_rlocator.locator,
500 : src->smgr_rlocator.backend,
501 : forkNum);
502 :
1473 andres 503 UBC 0 : ereport(ERROR,
504 : (errcode(ERRCODE_DATA_CORRUPTED),
505 : errmsg("invalid page in block %u of relation %s",
506 : blkno, relpath)));
507 : }
508 :
509 : /*
510 : * WAL-log the copied page. Unfortunately we don't know what kind of a
511 : * page this is, so we have to log the full page including any unused
512 : * space.
513 : */
1473 andres 514 CBC 575 : if (use_wal)
277 rhaas 515 GNC 50 : log_newpage(&dst->smgr_rlocator.locator, forkNum, blkno, page, false);
516 :
1473 andres 517 CBC 575 : PageSetChecksumInplace(page, blkno);
518 :
519 : /*
520 : * Now write the page. We say skipFsync = true because there's no
521 : * need for smgr to schedule an fsync for this write; we'll do it
522 : * ourselves below.
523 : */
524 575 : smgrextend(dst, forkNum, blkno, buf.data, true);
525 : }
526 :
527 : /*
528 : * When we WAL-logged rel pages, we must nonetheless fsync them. The
529 : * reason is that since we're copying outside shared buffers, a CHECKPOINT
530 : * occurring during the copy has no way to flush the previously written
531 : * data to disk (indeed it won't know the new rel even exists). A crash
532 : * later on would replay WAL from the checkpoint, therefore it wouldn't
533 : * replay our earlier WAL entries. If we do not fsync those pages here,
534 : * they might still not be on disk when the crash occurs.
535 : */
1100 noah 536 86 : if (use_wal || copying_initfork)
1473 andres 537 47 : smgrimmedsync(dst, forkNum);
538 86 : }
539 :
540 : /*
541 : * RelFileLocatorSkippingWAL
542 : * Check if a BM_PERMANENT relfilelocator is using WAL.
543 : *
544 : * Changes to certain relations must not write WAL; see "Skipping WAL for
545 : * New RelFileLocator" in src/backend/access/transam/README. Though it is
546 : * known from Relation efficiently, this function is intended for the code
547 : * paths not having access to Relation.
548 : */
549 : bool
277 rhaas 550 GNC 1204250 : RelFileLocatorSkippingWAL(RelFileLocator rlocator)
551 : {
1100 noah 552 CBC 2341193 : if (!pendingSyncHash ||
277 rhaas 553 GNC 1136943 : hash_search(pendingSyncHash, &rlocator, HASH_FIND, NULL) == NULL)
1100 noah 554 CBC 1194223 : return false;
555 :
556 10027 : return true;
557 : }
558 :
559 : /*
560 : * EstimatePendingSyncsSpace
561 : * Estimate space needed to pass syncs to parallel workers.
562 : */
563 : Size
564 403 : EstimatePendingSyncsSpace(void)
565 : {
566 : long entries;
567 :
568 403 : entries = pendingSyncHash ? hash_get_num_entries(pendingSyncHash) : 0;
277 rhaas 569 GNC 403 : return mul_size(1 + entries, sizeof(RelFileLocator));
570 : }
571 :
572 : /*
573 : * SerializePendingSyncs
574 : * Serialize syncs for parallel workers.
575 : */
576 : void
1100 noah 577 CBC 403 : SerializePendingSyncs(Size maxSize, char *startAddress)
578 : {
579 : HTAB *tmphash;
580 : HASHCTL ctl;
581 : HASH_SEQ_STATUS scan;
582 : PendingRelSync *sync;
583 : PendingRelDelete *delete;
584 : RelFileLocator *src;
277 rhaas 585 GNC 403 : RelFileLocator *dest = (RelFileLocator *) startAddress;
586 :
1100 noah 587 CBC 403 : if (!pendingSyncHash)
588 312 : goto terminate;
589 :
590 : /* Create temporary hash to collect active relfilelocators */
277 rhaas 591 GNC 91 : ctl.keysize = sizeof(RelFileLocator);
592 91 : ctl.entrysize = sizeof(RelFileLocator);
1100 noah 593 CBC 91 : ctl.hcxt = CurrentMemoryContext;
277 rhaas 594 GNC 91 : tmphash = hash_create("tmp relfilelocators",
595 : hash_get_num_entries(pendingSyncHash), &ctl,
596 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
597 :
598 : /* collect all rlocator from pending syncs */
1100 noah 599 CBC 91 : hash_seq_init(&scan, pendingSyncHash);
600 790 : while ((sync = (PendingRelSync *) hash_seq_search(&scan)))
277 rhaas 601 GNC 699 : (void) hash_search(tmphash, &sync->rlocator, HASH_ENTER, NULL);
602 :
603 : /* remove deleted rnodes */
1100 noah 604 CBC 951 : for (delete = pendingDeletes; delete != NULL; delete = delete->next)
605 860 : if (delete->atCommit)
62 peter 606 GNC 156 : (void) hash_search(tmphash, &delete->rlocator,
607 : HASH_REMOVE, NULL);
608 :
1100 noah 609 CBC 91 : hash_seq_init(&scan, tmphash);
277 rhaas 610 GNC 638 : while ((src = (RelFileLocator *) hash_seq_search(&scan)))
1100 noah 611 CBC 547 : *dest++ = *src;
612 :
613 91 : hash_destroy(tmphash);
614 :
615 403 : terminate:
277 rhaas 616 GNC 403 : MemSet(dest, 0, sizeof(RelFileLocator));
1100 noah 617 CBC 403 : }
618 :
619 : /*
620 : * RestorePendingSyncs
621 : * Restore syncs within a parallel worker.
622 : *
623 : * RelationNeedsWAL() and RelFileLocatorSkippingWAL() must offer the correct
624 : * answer to parallel workers. Only smgrDoPendingSyncs() reads the
625 : * is_truncated field, at end of transaction. Hence, don't restore it.
626 : */
627 : void
628 1298 : RestorePendingSyncs(char *startAddress)
629 : {
630 : RelFileLocator *rlocator;
631 :
632 1298 : Assert(pendingSyncHash == NULL);
277 rhaas 633 GNC 3050 : for (rlocator = (RelFileLocator *) startAddress; rlocator->relNumber != 0;
634 1752 : rlocator++)
635 1752 : AddPendingSync(rlocator);
1100 noah 636 CBC 1298 : }
1100 noah 637 ECB :
638 : /*
639 : * smgrDoPendingDeletes() -- Take care of relation deletes at end of xact.
640 : *
641 : * This also runs when aborting a subxact; we want to clean up a failed
642 : * subxact immediately.
643 : *
644 : * Note: It's possible that we're being asked to remove a relation that has
645 : * no physical storage in any fork. In particular, it's possible that we're
646 : * cleaning up an old temporary relation for which RemovePgTempFiles has
647 : * already recovered the physical storage.
648 : */
649 : void
5254 heikki.linnakangas 650 GIC 489882 : smgrDoPendingDeletes(bool isCommit)
5254 heikki.linnakangas 651 ECB : {
5254 heikki.linnakangas 652 GIC 489882 : int nestLevel = GetCurrentTransactionNestLevel();
5254 heikki.linnakangas 653 ECB : PendingRelDelete *pending;
654 : PendingRelDelete *prev;
655 : PendingRelDelete *next;
3734 alvherre 656 GIC 489882 : int nrels = 0,
3397 alvherre 657 CBC 489882 : maxrels = 0;
658 489882 : SMgrRelation *srels = NULL;
5254 heikki.linnakangas 659 ECB :
5254 heikki.linnakangas 660 GIC 489882 : prev = NULL;
5254 heikki.linnakangas 661 CBC 645337 : for (pending = pendingDeletes; pending != NULL; pending = next)
5254 heikki.linnakangas 662 ECB : {
5254 heikki.linnakangas 663 GIC 155455 : next = pending->next;
5254 heikki.linnakangas 664 CBC 155455 : if (pending->nestLevel < nestLevel)
5254 heikki.linnakangas 665 ECB : {
666 : /* outer-level entries should not be processed yet */
5254 heikki.linnakangas 667 GIC 3908 : prev = pending;
5254 heikki.linnakangas 668 ECB : }
669 : else
670 : {
671 : /* unlink list entry first, so we don't retry on failure */
5254 heikki.linnakangas 672 GIC 151547 : if (prev)
5254 heikki.linnakangas 673 LBC 0 : prev->next = next;
5254 heikki.linnakangas 674 EUB : else
5254 heikki.linnakangas 675 GIC 151547 : pendingDeletes = next;
5254 heikki.linnakangas 676 ECB : /* do deletion if called for */
5254 heikki.linnakangas 677 GIC 151547 : if (pending->atCommit == isCommit)
5254 heikki.linnakangas 678 ECB : {
679 : SMgrRelation srel;
680 :
277 rhaas 681 GNC 31252 : srel = smgropen(pending->rlocator, pending->backend);
3734 alvherre 682 ECB :
683 : /* allocate the initial array, or extend it, if needed */
3397 alvherre 684 GIC 31252 : if (maxrels == 0)
3397 alvherre 685 ECB : {
3397 alvherre 686 GIC 8790 : maxrels = 8;
3260 bruce 687 CBC 8790 : srels = palloc(sizeof(SMgrRelation) * maxrels);
3397 alvherre 688 ECB : }
3397 alvherre 689 GIC 22462 : else if (maxrels <= nrels)
3734 alvherre 690 ECB : {
3734 alvherre 691 GIC 807 : maxrels *= 2;
3734 alvherre 692 CBC 807 : srels = repalloc(srels, sizeof(SMgrRelation) * maxrels);
3734 alvherre 693 ECB : }
694 :
3734 alvherre 695 GIC 31252 : srels[nrels++] = srel;
5254 heikki.linnakangas 696 ECB : }
697 : /* must explicitly free the list entry */
5254 heikki.linnakangas 698 GIC 151547 : pfree(pending);
5254 heikki.linnakangas 699 ECB : /* prev does not change */
700 : }
701 : }
702 :
3734 alvherre 703 GIC 489882 : if (nrels > 0)
3734 alvherre 704 ECB : {
3734 alvherre 705 GIC 8790 : smgrdounlinkall(srels, nrels, false);
3734 alvherre 706 ECB :
946 tgl 707 GIC 40042 : for (int i = 0; i < nrels; i++)
3734 alvherre 708 CBC 31252 : smgrclose(srels[i]);
3734 alvherre 709 ECB :
3397 alvherre 710 GIC 8790 : pfree(srels);
3397 alvherre 711 ECB : }
5254 heikki.linnakangas 712 GIC 489882 : }
5254 heikki.linnakangas 713 ECB :
714 : /*
715 : * smgrDoPendingSyncs() -- Take care of relation syncs at end of xact.
716 : */
717 : void
1100 noah 718 GIC 485972 : smgrDoPendingSyncs(bool isCommit, bool isParallelWorker)
1100 noah 719 ECB : {
720 : PendingRelDelete *pending;
1100 noah 721 GIC 485972 : int nrels = 0,
1100 noah 722 CBC 485972 : maxrels = 0;
723 485972 : SMgrRelation *srels = NULL;
1100 noah 724 ECB : HASH_SEQ_STATUS scan;
725 : PendingRelSync *pendingsync;
726 :
1100 noah 727 GIC 485972 : Assert(GetCurrentTransactionNestLevel() == 1);
1100 noah 728 ECB :
1100 noah 729 GIC 485972 : if (!pendingSyncHash)
1100 noah 730 CBC 480840 : return; /* no relation needs sync */
1100 noah 731 ECB :
732 : /* Abort -- just throw away all pending syncs */
1100 noah 733 GIC 5535 : if (!isCommit)
1100 noah 734 ECB : {
1100 noah 735 GIC 208 : pendingSyncHash = NULL;
1100 noah 736 CBC 208 : return;
1100 noah 737 ECB : }
738 :
1100 noah 739 GIC 5327 : AssertPendingSyncs_RelationCache();
1100 noah 740 ECB :
741 : /* Parallel worker -- just throw away all pending syncs */
1100 noah 742 GIC 5327 : if (isParallelWorker)
1100 noah 743 ECB : {
1100 noah 744 GIC 195 : pendingSyncHash = NULL;
1100 noah 745 CBC 195 : return;
1100 noah 746 ECB : }
747 :
748 : /* Skip syncing nodes that smgrDoPendingDeletes() will delete. */
1100 noah 749 GIC 17075 : for (pending = pendingDeletes; pending != NULL; pending = pending->next)
1100 noah 750 CBC 11943 : if (pending->atCommit)
62 peter 751 GNC 2148 : (void) hash_search(pendingSyncHash, &pending->rlocator,
1100 noah 752 ECB : HASH_REMOVE, NULL);
753 :
1100 noah 754 GIC 5132 : hash_seq_init(&scan, pendingSyncHash);
1100 noah 755 CBC 31052 : while ((pendingsync = (PendingRelSync *) hash_seq_search(&scan)))
1100 noah 756 ECB : {
757 : ForkNumber fork;
758 : BlockNumber nblocks[MAX_FORKNUM + 1];
1100 noah 759 GIC 25920 : BlockNumber total_blocks = 0;
1100 noah 760 ECB : SMgrRelation srel;
761 :
277 rhaas 762 GNC 25920 : srel = smgropen(pendingsync->rlocator, InvalidBackendId);
1100 noah 763 ECB :
764 : /*
765 : * We emit newpage WAL records for smaller relations.
766 : *
767 : * Small WAL records have a chance to be emitted along with other
768 : * backends' WAL records. We emit WAL records instead of syncing for
769 : * files that are smaller than a certain threshold, expecting faster
770 : * commit. The threshold is defined by the GUC wal_skip_threshold.
771 : */
1100 noah 772 GIC 25920 : if (!pendingsync->is_truncated)
1100 noah 773 ECB : {
1100 noah 774 GIC 129600 : for (fork = 0; fork <= MAX_FORKNUM; fork++)
1100 noah 775 ECB : {
1100 noah 776 GIC 103680 : if (smgrexists(srel, fork))
1100 noah 777 ECB : {
1100 noah 778 GIC 31413 : BlockNumber n = smgrnblocks(srel, fork);
1100 noah 779 ECB :
780 : /* we shouldn't come here for unlogged relations */
1100 noah 781 GIC 31413 : Assert(fork != INIT_FORKNUM);
1100 noah 782 CBC 31413 : nblocks[fork] = n;
783 31413 : total_blocks += n;
1100 noah 784 ECB : }
785 : else
1100 noah 786 GIC 72267 : nblocks[fork] = InvalidBlockNumber;
1100 noah 787 ECB : }
788 : }
789 :
790 : /*
791 : * Sync file or emit WAL records for its contents.
792 : *
793 : * Although we emit WAL record if the file is small enough, do file
794 : * sync regardless of the size if the file has experienced a
795 : * truncation. It is because the file would be followed by trailing
796 : * garbage blocks after a crash recovery if, while a past longer file
797 : * had been flushed out, we omitted syncing-out of the file and
798 : * emitted WAL instead. You might think that we could choose WAL if
799 : * the current main fork is longer than ever, but there's a case where
800 : * main fork is longer than ever but FSM fork gets shorter.
801 : */
1100 noah 802 GIC 25920 : if (pendingsync->is_truncated ||
1100 noah 803 CBC 25920 : total_blocks * BLCKSZ / 1024 >= wal_skip_threshold)
1100 noah 804 ECB : {
805 : /* allocate the initial array, or extend it, if needed */
1100 noah 806 GIC 9 : if (maxrels == 0)
1100 noah 807 ECB : {
1100 noah 808 GIC 9 : maxrels = 8;
1100 noah 809 CBC 9 : srels = palloc(sizeof(SMgrRelation) * maxrels);
1100 noah 810 ECB : }
1100 noah 811 UIC 0 : else if (maxrels <= nrels)
1100 noah 812 EUB : {
1100 noah 813 UIC 0 : maxrels *= 2;
1100 noah 814 UBC 0 : srels = repalloc(srels, sizeof(SMgrRelation) * maxrels);
1100 noah 815 EUB : }
816 :
1100 noah 817 GIC 9 : srels[nrels++] = srel;
1100 noah 818 ECB : }
819 : else
820 : {
821 : /* Emit WAL records for all blocks. The file is small enough. */
1100 noah 822 GIC 129555 : for (fork = 0; fork <= MAX_FORKNUM; fork++)
1100 noah 823 ECB : {
1100 noah 824 GIC 103644 : int n = nblocks[fork];
1100 noah 825 ECB : Relation rel;
826 :
1100 noah 827 GIC 103644 : if (!BlockNumberIsValid(n))
1100 noah 828 CBC 72241 : continue;
1100 noah 829 ECB :
830 : /*
831 : * Emit WAL for the whole file. Unfortunately we don't know
832 : * what kind of a page this is, so we have to log the full
833 : * page including any unused space. ReadBufferExtended()
834 : * counts some pgstat events; unfortunately, we discard them.
835 : */
277 rhaas 836 GNC 31403 : rel = CreateFakeRelcacheEntry(srel->smgr_rlocator.locator);
1100 noah 837 CBC 31403 : log_newpage_range(rel, fork, 0, n, false);
838 31403 : FreeFakeRelcacheEntry(rel);
1100 noah 839 ECB : }
840 : }
841 : }
842 :
1100 noah 843 GIC 5132 : pendingSyncHash = NULL;
1100 noah 844 ECB :
1100 noah 845 GIC 5132 : if (nrels > 0)
1100 noah 846 ECB : {
1100 noah 847 GIC 9 : smgrdosyncall(srels, nrels);
1100 noah 848 CBC 9 : pfree(srels);
1100 noah 849 ECB : }
850 : }
851 :
852 : /*
853 : * smgrGetPendingDeletes() -- Get a list of non-temp relations to be deleted.
854 : *
855 : * The return value is the number of relations scheduled for termination.
856 : * *ptr is set to point to a freshly-palloc'd array of RelFileLocators.
857 : * If there are no relations to be deleted, *ptr is set to NULL.
858 : *
859 : * Only non-temporary relations are included in the returned list. This is OK
860 : * because the list is used only in contexts where temporary relations don't
861 : * matter: we're either writing to the two-phase state file (and transactions
862 : * that have touched temp tables can't be prepared) or we're writing to xlog
863 : * (and all temporary files will be zapped if we restart anyway, so no need
864 : * for redo to do it also).
865 : *
866 : * Note that the list does not include anything scheduled for termination
867 : * by upper-level transactions.
868 : */
869 : int
277 rhaas 870 GNC 469517 : smgrGetPendingDeletes(bool forCommit, RelFileLocator **ptr)
5254 heikki.linnakangas 871 ECB : {
5254 heikki.linnakangas 872 GIC 469517 : int nestLevel = GetCurrentTransactionNestLevel();
5254 heikki.linnakangas 873 ECB : int nrels;
874 : RelFileLocator *rptr;
875 : PendingRelDelete *pending;
876 :
5254 heikki.linnakangas 877 GIC 469517 : nrels = 0;
5254 heikki.linnakangas 878 CBC 623106 : for (pending = pendingDeletes; pending != NULL; pending = pending->next)
5254 heikki.linnakangas 879 ECB : {
4622 rhaas 880 GIC 153589 : if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit
4622 rhaas 881 CBC 31311 : && pending->backend == InvalidBackendId)
5254 heikki.linnakangas 882 28503 : nrels++;
5254 heikki.linnakangas 883 ECB : }
5254 heikki.linnakangas 884 GIC 469517 : if (nrels == 0)
5254 heikki.linnakangas 885 ECB : {
5254 heikki.linnakangas 886 GIC 461402 : *ptr = NULL;
5254 heikki.linnakangas 887 CBC 461402 : return 0;
5254 heikki.linnakangas 888 ECB : }
277 rhaas 889 GNC 8115 : rptr = (RelFileLocator *) palloc(nrels * sizeof(RelFileLocator));
5254 heikki.linnakangas 890 CBC 8115 : *ptr = rptr;
891 42420 : for (pending = pendingDeletes; pending != NULL; pending = pending->next)
5254 heikki.linnakangas 892 ECB : {
4622 rhaas 893 GIC 34305 : if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit
4622 rhaas 894 CBC 28562 : && pending->backend == InvalidBackendId)
5254 heikki.linnakangas 895 ECB : {
277 rhaas 896 GNC 28503 : *rptr = pending->rlocator;
5254 heikki.linnakangas 897 CBC 28503 : rptr++;
5254 heikki.linnakangas 898 ECB : }
899 : }
5254 heikki.linnakangas 900 GIC 8115 : return nrels;
5254 heikki.linnakangas 901 ECB : }
902 :
903 : /*
904 : * PostPrepare_smgr -- Clean up after a successful PREPARE
905 : *
906 : * What we have to do here is throw away the in-memory state about pending
907 : * relation deletes. It's all been recorded in the 2PC state file and
908 : * it's no longer smgr's job to worry about it.
909 : */
910 : void
5254 heikki.linnakangas 911 GIC 356 : PostPrepare_smgr(void)
5254 heikki.linnakangas 912 ECB : {
913 : PendingRelDelete *pending;
914 : PendingRelDelete *next;
915 :
5254 heikki.linnakangas 916 GIC 415 : for (pending = pendingDeletes; pending != NULL; pending = next)
5254 heikki.linnakangas 917 ECB : {
5254 heikki.linnakangas 918 GIC 59 : next = pending->next;
5254 heikki.linnakangas 919 CBC 59 : pendingDeletes = next;
5254 heikki.linnakangas 920 ECB : /* must explicitly free the list entry */
5254 heikki.linnakangas 921 GIC 59 : pfree(pending);
5254 heikki.linnakangas 922 ECB : }
5254 heikki.linnakangas 923 GIC 356 : }
5254 heikki.linnakangas 924 ECB :
925 :
926 : /*
927 : * AtSubCommit_smgr() --- Take care of subtransaction commit.
928 : *
929 : * Reassign all items in the pending-deletes list to the parent transaction.
930 : */
931 : void
5254 heikki.linnakangas 932 GIC 4317 : AtSubCommit_smgr(void)
5254 heikki.linnakangas 933 ECB : {
5254 heikki.linnakangas 934 GIC 4317 : int nestLevel = GetCurrentTransactionNestLevel();
5254 heikki.linnakangas 935 ECB : PendingRelDelete *pending;
936 :
5254 heikki.linnakangas 937 GIC 4543 : for (pending = pendingDeletes; pending != NULL; pending = pending->next)
5254 heikki.linnakangas 938 ECB : {
5254 heikki.linnakangas 939 GIC 226 : if (pending->nestLevel >= nestLevel)
5254 heikki.linnakangas 940 CBC 105 : pending->nestLevel = nestLevel - 1;
5254 heikki.linnakangas 941 ECB : }
5254 heikki.linnakangas 942 GIC 4317 : }
5254 heikki.linnakangas 943 ECB :
944 : /*
945 : * AtSubAbort_smgr() --- Take care of subtransaction abort.
946 : *
947 : * Delete created relations and forget about deleted relations.
948 : * We can execute these operations immediately because we know this
949 : * subtransaction will not commit.
950 : */
951 : void
5254 heikki.linnakangas 952 GIC 4468 : AtSubAbort_smgr(void)
5254 heikki.linnakangas 953 ECB : {
5254 heikki.linnakangas 954 GIC 4468 : smgrDoPendingDeletes(false);
5254 heikki.linnakangas 955 CBC 4468 : }
5254 heikki.linnakangas 956 ECB :
957 : void
3062 heikki.linnakangas 958 GIC 12961 : smgr_redo(XLogReaderState *record)
5254 heikki.linnakangas 959 ECB : {
3062 heikki.linnakangas 960 GIC 12961 : XLogRecPtr lsn = record->EndRecPtr;
3062 heikki.linnakangas 961 CBC 12961 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
5254 heikki.linnakangas 962 ECB :
963 : /* Backup blocks are not used in smgr records */
3062 heikki.linnakangas 964 GIC 12961 : Assert(!XLogRecHasAnyBlockRefs(record));
5192 heikki.linnakangas 965 ECB :
5254 heikki.linnakangas 966 GIC 12961 : if (info == XLOG_SMGR_CREATE)
5254 heikki.linnakangas 967 ECB : {
5254 heikki.linnakangas 968 GIC 12919 : xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record);
5254 heikki.linnakangas 969 ECB : SMgrRelation reln;
970 :
277 rhaas 971 GNC 12919 : reln = smgropen(xlrec->rlocator, InvalidBackendId);
4484 rhaas 972 CBC 12919 : smgrcreate(reln, xlrec->forkNum, true);
5254 heikki.linnakangas 973 ECB : }
5254 heikki.linnakangas 974 GIC 42 : else if (info == XLOG_SMGR_TRUNCATE)
5254 heikki.linnakangas 975 ECB : {
5254 heikki.linnakangas 976 GIC 42 : xl_smgr_truncate *xlrec = (xl_smgr_truncate *) XLogRecGetData(record);
5254 heikki.linnakangas 977 ECB : SMgrRelation reln;
978 : Relation rel;
979 : ForkNumber forks[MAX_FORKNUM];
980 : BlockNumber blocks[MAX_FORKNUM];
1060 tgl 981 GIC 42 : int nforks = 0;
1293 fujii 982 CBC 42 : bool need_fsm_vacuum = false;
5254 heikki.linnakangas 983 ECB :
277 rhaas 984 GNC 42 : reln = smgropen(xlrec->rlocator, InvalidBackendId);
5254 heikki.linnakangas 985 ECB :
986 : /*
987 : * Forcibly create relation if it doesn't exist (which suggests that
988 : * it was dropped somewhere later in the WAL sequence). As in
989 : * XLogReadBufferForRedo, we prefer to recreate the rel and replay the
990 : * log as best we can until the drop is seen.
991 : */
5254 heikki.linnakangas 992 GIC 42 : smgrcreate(reln, MAIN_FORKNUM, true);
5254 heikki.linnakangas 993 ECB :
994 : /*
995 : * Before we perform the truncation, update minimum recovery point to
996 : * cover this WAL record. Once the relation is truncated, there's no
997 : * going back. The buffer manager enforces the WAL-first rule for
998 : * normal updates to relation files, so that the minimum recovery
999 : * point is always updated before the corresponding change in the data
1000 : * file is flushed to disk. We have to do the same manually here.
1001 : *
1002 : * Doing this before the truncation means that if the truncation fails
1003 : * for some reason, you cannot start up the system even after restart,
1004 : * until you fix the underlying situation so that the truncation will
1005 : * succeed. Alternatively, we could update the minimum recovery point
1006 : * after truncation, but that would leave a small window where the
1007 : * WAL-first rule could be violated.
1008 : */
3772 heikki.linnakangas 1009 GIC 42 : XLogFlush(lsn);
3772 heikki.linnakangas 1010 ECB :
1011 : /* Prepare for truncation of MAIN fork */
2487 rhaas 1012 GIC 42 : if ((xlrec->flags & SMGR_TRUNCATE_HEAP) != 0)
2487 rhaas 1013 ECB : {
1293 fujii 1014 GIC 42 : forks[nforks] = MAIN_FORKNUM;
1293 fujii 1015 CBC 42 : blocks[nforks] = xlrec->blkno;
1016 42 : nforks++;
5254 heikki.linnakangas 1017 ECB :
1018 : /* Also tell xlogutils.c about it */
277 rhaas 1019 GNC 42 : XLogTruncateRelation(xlrec->rlocator, MAIN_FORKNUM, xlrec->blkno);
2487 rhaas 1020 ECB : }
1021 :
1022 : /* Prepare for truncation of FSM and VM too */
277 rhaas 1023 GNC 42 : rel = CreateFakeRelcacheEntry(xlrec->rlocator);
5050 bruce 1024 ECB :
2487 rhaas 1025 GIC 84 : if ((xlrec->flags & SMGR_TRUNCATE_FSM) != 0 &&
2487 rhaas 1026 CBC 42 : smgrexists(reln, FSM_FORKNUM))
1293 fujii 1027 ECB : {
1293 fujii 1028 GIC 24 : blocks[nforks] = FreeSpaceMapPrepareTruncateRel(rel, xlrec->blkno);
1293 fujii 1029 CBC 24 : if (BlockNumberIsValid(blocks[nforks]))
1293 fujii 1030 ECB : {
1293 fujii 1031 GIC 24 : forks[nforks] = FSM_FORKNUM;
1293 fujii 1032 CBC 24 : nforks++;
1033 24 : need_fsm_vacuum = true;
1293 fujii 1034 ECB : }
1035 : }
2487 rhaas 1036 GIC 84 : if ((xlrec->flags & SMGR_TRUNCATE_VM) != 0 &&
2487 rhaas 1037 CBC 42 : smgrexists(reln, VISIBILITYMAP_FORKNUM))
1293 fujii 1038 ECB : {
1293 fujii 1039 GIC 19 : blocks[nforks] = visibilitymap_prepare_truncate(rel, xlrec->blkno);
1293 fujii 1040 CBC 19 : if (BlockNumberIsValid(blocks[nforks]))
1293 fujii 1041 ECB : {
1293 fujii 1042 GIC 6 : forks[nforks] = VISIBILITYMAP_FORKNUM;
1293 fujii 1043 CBC 6 : nforks++;
1293 fujii 1044 ECB : }
1045 : }
1046 :
1047 : /* Do the real work to truncate relation forks */
1293 fujii 1048 GIC 42 : if (nforks > 0)
1293 fujii 1049 CBC 42 : smgrtruncate(reln, forks, nforks, blocks);
1293 fujii 1050 ECB :
1051 : /*
1052 : * Update upper-level FSM pages to account for the truncation. This is
1053 : * important because the just-truncated pages were likely marked as
1054 : * all-free, and would be preferentially selected.
1055 : */
1293 fujii 1056 GIC 42 : if (need_fsm_vacuum)
1293 fujii 1057 CBC 24 : FreeSpaceMapVacuumRange(rel, xlrec->blkno,
1293 fujii 1058 ECB : InvalidBlockNumber);
1059 :
4807 tgl 1060 GIC 42 : FreeFakeRelcacheEntry(rel);
5254 heikki.linnakangas 1061 ECB : }
1062 : else
5254 heikki.linnakangas 1063 UIC 0 : elog(PANIC, "smgr_redo: unknown op code %u", info);
5254 heikki.linnakangas 1064 GBC 12961 : }
|