Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * smgr.c
4 : : * public interface routines to storage manager switch.
5 : : *
6 : : * All file system operations on relations dispatch through these routines.
7 : : * An SMgrRelation represents physical on-disk relation files that are open
8 : : * for reading and writing.
9 : : *
10 : : * When a relation is first accessed through the relation cache, the
11 : : * corresponding SMgrRelation entry is opened by calling smgropen(), and the
12 : : * reference is stored in the relation cache entry.
13 : : *
14 : : * Accesses that don't go through the relation cache open the SMgrRelation
15 : : * directly. That includes flushing buffers from the buffer cache, as well as
16 : : * all accesses in auxiliary processes like the checkpointer or the WAL redo
17 : : * in the startup process.
18 : : *
19 : : * Operations like CREATE, DROP, ALTER TABLE also hold SMgrRelation references
20 : : * independent of the relation cache. They need to prepare the physical files
21 : : * before updating the relation cache.
22 : : *
23 : : * There is a hash table that holds all the SMgrRelation entries in the
24 : : * backend. If you call smgropen() twice for the same rel locator, you get a
25 : : * reference to the same SMgrRelation. The reference is valid until the end of
26 : : * transaction. This makes repeated access to the same relation efficient,
27 : : * and allows caching things like the relation size in the SMgrRelation entry.
28 : : *
29 : : * At end of transaction, all SMgrRelation entries that haven't been pinned
30 : : * are removed. An SMgrRelation can hold kernel file system descriptors for
31 : : * the underlying files, and we'd like to close those reasonably soon if the
32 : : * file gets deleted. The SMgrRelations references held by the relcache are
33 : : * pinned to prevent them from being closed.
34 : : *
35 : : * There is another mechanism to close file descriptors early:
36 : : * PROCSIGNAL_BARRIER_SMGRRELEASE. It is a request to immediately close all
37 : : * file descriptors. Upon receiving that signal, the backend closes all file
38 : : * descriptors held open by SMgrRelations, but because it can happen in the
39 : : * middle of a transaction, we cannot destroy the SMgrRelation objects
40 : : * themselves, as there could pointers to them in active use. See
41 : : * smgrrelease() and smgrreleaseall().
42 : : *
43 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
44 : : * Portions Copyright (c) 1994, Regents of the University of California
45 : : *
46 : : *
47 : : * IDENTIFICATION
48 : : * src/backend/storage/smgr/smgr.c
49 : : *
50 : : *-------------------------------------------------------------------------
51 : : */
52 : : #include "postgres.h"
53 : :
54 : : #include "access/xlogutils.h"
55 : : #include "lib/ilist.h"
56 : : #include "storage/bufmgr.h"
57 : : #include "storage/ipc.h"
58 : : #include "storage/md.h"
59 : : #include "storage/smgr.h"
60 : : #include "utils/hsearch.h"
61 : : #include "utils/inval.h"
62 : :
63 : :
64 : : /*
65 : : * This struct of function pointers defines the API between smgr.c and
66 : : * any individual storage manager module. Note that smgr subfunctions are
67 : : * generally expected to report problems via elog(ERROR). An exception is
68 : : * that smgr_unlink should use elog(WARNING), rather than erroring out,
69 : : * because we normally unlink relations during post-commit/abort cleanup,
70 : : * and so it's too late to raise an error. Also, various conditions that
71 : : * would normally be errors should be allowed during bootstrap and/or WAL
72 : : * recovery --- see comments in md.c for details.
73 : : */
74 : : typedef struct f_smgr
75 : : {
76 : : void (*smgr_init) (void); /* may be NULL */
77 : : void (*smgr_shutdown) (void); /* may be NULL */
78 : : void (*smgr_open) (SMgrRelation reln);
79 : : void (*smgr_close) (SMgrRelation reln, ForkNumber forknum);
80 : : void (*smgr_create) (SMgrRelation reln, ForkNumber forknum,
81 : : bool isRedo);
82 : : bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum);
83 : : void (*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum,
84 : : bool isRedo);
85 : : void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
86 : : BlockNumber blocknum, const void *buffer, bool skipFsync);
87 : : void (*smgr_zeroextend) (SMgrRelation reln, ForkNumber forknum,
88 : : BlockNumber blocknum, int nblocks, bool skipFsync);
89 : : bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
90 : : BlockNumber blocknum, int nblocks);
91 : : void (*smgr_readv) (SMgrRelation reln, ForkNumber forknum,
92 : : BlockNumber blocknum,
93 : : void **buffers, BlockNumber nblocks);
94 : : void (*smgr_writev) (SMgrRelation reln, ForkNumber forknum,
95 : : BlockNumber blocknum,
96 : : const void **buffers, BlockNumber nblocks,
97 : : bool skipFsync);
98 : : void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum,
99 : : BlockNumber blocknum, BlockNumber nblocks);
100 : : BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
101 : : void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
102 : : BlockNumber nblocks);
103 : : void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
104 : : void (*smgr_registersync) (SMgrRelation reln, ForkNumber forknum);
105 : : } f_smgr;
106 : :
107 : : static const f_smgr smgrsw[] = {
108 : : /* magnetic disk */
109 : : {
110 : : .smgr_init = mdinit,
111 : : .smgr_shutdown = NULL,
112 : : .smgr_open = mdopen,
113 : : .smgr_close = mdclose,
114 : : .smgr_create = mdcreate,
115 : : .smgr_exists = mdexists,
116 : : .smgr_unlink = mdunlink,
117 : : .smgr_extend = mdextend,
118 : : .smgr_zeroextend = mdzeroextend,
119 : : .smgr_prefetch = mdprefetch,
120 : : .smgr_readv = mdreadv,
121 : : .smgr_writev = mdwritev,
122 : : .smgr_writeback = mdwriteback,
123 : : .smgr_nblocks = mdnblocks,
124 : : .smgr_truncate = mdtruncate,
125 : : .smgr_immedsync = mdimmedsync,
126 : : .smgr_registersync = mdregistersync,
127 : : }
128 : : };
129 : :
130 : : static const int NSmgr = lengthof(smgrsw);
131 : :
132 : : /*
133 : : * Each backend has a hashtable that stores all extant SMgrRelation objects.
134 : : * In addition, "unpinned" SMgrRelation objects are chained together in a list.
135 : : */
136 : : static HTAB *SMgrRelationHash = NULL;
137 : :
138 : : static dlist_head unpinned_relns;
139 : :
140 : : /* local function prototypes */
141 : : static void smgrshutdown(int code, Datum arg);
142 : : static void smgrdestroy(SMgrRelation reln);
143 : :
144 : :
145 : : /*
146 : : * smgrinit(), smgrshutdown() -- Initialize or shut down storage
147 : : * managers.
148 : : *
149 : : * Note: smgrinit is called during backend startup (normal or standalone
150 : : * case), *not* during postmaster start. Therefore, any resources created
151 : : * here or destroyed in smgrshutdown are backend-local.
152 : : */
153 : : void
7922 tgl@sss.pgh.pa.us 154 :CBC 19578 : smgrinit(void)
155 : : {
156 : : int i;
157 : :
9716 bruce@momjian.us 158 [ + + ]: 39156 : for (i = 0; i < NSmgr; i++)
159 : : {
160 [ + - ]: 19578 : if (smgrsw[i].smgr_init)
2411 peter_e@gmx.net 161 : 19578 : smgrsw[i].smgr_init();
162 : : }
163 : :
164 : : /* register the shutdown proc */
8595 165 : 19578 : on_proc_exit(smgrshutdown, 0);
10141 scrappy@hub.org 166 : 19578 : }
167 : :
168 : : /*
169 : : * on_proc_exit hook for smgr cleanup during backend shutdown
170 : : */
171 : : static void
7429 peter_e@gmx.net 172 : 18048 : smgrshutdown(int code, Datum arg)
173 : : {
174 : : int i;
175 : :
9716 bruce@momjian.us 176 [ + + ]: 36096 : for (i = 0; i < NSmgr; i++)
177 : : {
178 [ - + ]: 18048 : if (smgrsw[i].smgr_shutdown)
2411 peter_e@gmx.net 179 :UBC 0 : smgrsw[i].smgr_shutdown();
180 : : }
10141 scrappy@hub.org 181 :CBC 18048 : }
182 : :
183 : : /*
184 : : * smgropen() -- Return an SMgrRelation object, creating it if need be.
185 : : *
186 : : * In versions of PostgreSQL prior to 17, this function returned an object
187 : : * with no defined lifetime. Now, however, the object remains valid for the
188 : : * lifetime of the transaction, up to the point where AtEOXact_SMgr() is
189 : : * called, making it much easier for callers to know for how long they can
190 : : * hold on to a pointer to the returned object. If this function is called
191 : : * outside of a transaction, the object remains valid until smgrdestroy() or
192 : : * smgrdestroyall() is called. Background processes that use smgr but not
193 : : * transactions typically do this once per checkpoint cycle.
194 : : *
195 : : * This does not attempt to actually open the underlying files.
196 : : */
197 : : SMgrRelation
42 heikki.linnakangas@i 198 :GNC 8835259 : smgropen(RelFileLocator rlocator, ProcNumber backend)
199 : : {
200 : : RelFileLocatorBackend brlocator;
201 : : SMgrRelation reln;
202 : : bool found;
203 : :
128 204 [ - + ]: 8835259 : Assert(RelFileNumberIsValid(rlocator.relNumber));
205 : :
7369 tgl@sss.pgh.pa.us 206 [ + + ]:CBC 8835259 : if (SMgrRelationHash == NULL)
207 : : {
208 : : /* First time through: initialize the hash table */
209 : : HASHCTL ctl;
210 : :
564 rhaas@postgresql.org 211 : 16509 : ctl.keysize = sizeof(RelFileLocatorBackend);
7369 tgl@sss.pgh.pa.us 212 : 16509 : ctl.entrysize = sizeof(SMgrRelationData);
213 : 16509 : SMgrRelationHash = hash_create("smgr relation table", 400,
214 : : &ctl, HASH_ELEM | HASH_BLOBS);
74 heikki.linnakangas@i 215 :GNC 16509 : dlist_init(&unpinned_relns);
216 : : }
217 : :
218 : : /* Look up or create an entry */
648 rhaas@postgresql.org 219 :CBC 8835259 : brlocator.locator = rlocator;
220 : 8835259 : brlocator.backend = backend;
7369 tgl@sss.pgh.pa.us 221 : 8835259 : reln = (SMgrRelation) hash_search(SMgrRelationHash,
222 : : &brlocator,
223 : : HASH_ENTER, &found);
224 : :
225 : : /* Initialize it if not present before */
226 [ + + ]: 8835259 : if (!found)
227 : : {
228 : : /* hash_search already filled in the lookup key */
5178 229 : 976733 : reln->smgr_targblock = InvalidBlockNumber;
1353 tmunro@postgresql.or 230 [ + + ]: 4883665 : for (int i = 0; i <= MAX_FORKNUM; ++i)
231 : 3906932 : reln->smgr_cached_nblocks[i] = InvalidBlockNumber;
7369 tgl@sss.pgh.pa.us 232 : 976733 : reln->smgr_which = 0; /* we only have md.c at present */
233 : :
234 : : /* implementation-specific initialization */
1733 tmunro@postgresql.or 235 : 976733 : smgrsw[reln->smgr_which].smgr_open(reln);
236 : :
237 : : /* it is not pinned yet */
74 heikki.linnakangas@i 238 :GNC 976733 : reln->pincount = 0;
239 : 976733 : dlist_push_tail(&unpinned_relns, &reln->node);
240 : : }
241 : :
7369 tgl@sss.pgh.pa.us 242 :CBC 8835259 : return reln;
243 : : }
244 : :
245 : : /*
246 : : * smgrpin() -- Prevent an SMgrRelation object from being destroyed at end of
247 : : * of transaction
248 : : */
249 : : void
74 heikki.linnakangas@i 250 :GNC 841516 : smgrpin(SMgrRelation reln)
251 : : {
252 [ + - ]: 841516 : if (reln->pincount == 0)
1845 tomas.vondra@postgre 253 :CBC 841516 : dlist_delete(&reln->node);
74 heikki.linnakangas@i 254 :GNC 841516 : reln->pincount++;
7034 tgl@sss.pgh.pa.us 255 :CBC 841516 : }
256 : :
257 : : /*
258 : : * smgrunpin() -- Allow an SMgrRelation object to be destroyed at end of
259 : : * transaction
260 : : *
261 : : * The object remains valid, but if there are no other pins on it, it is moved
262 : : * to the unpinned list where it will be destroyed by AtEOXact_SMgr().
263 : : */
264 : : void
74 heikki.linnakangas@i 265 :GNC 199689 : smgrunpin(SMgrRelation reln)
266 : : {
267 [ - + ]: 199689 : Assert(reln->pincount > 0);
268 : 199689 : reln->pincount--;
269 [ + - ]: 199689 : if (reln->pincount == 0)
270 : 199689 : dlist_push_tail(&unpinned_relns, &reln->node);
5725 heikki.linnakangas@i 271 :GIC 199689 : }
272 : :
273 : : /*
274 : : * smgrdestroy() -- Delete an SMgrRelation object.
275 : : */
276 : : static void
74 heikki.linnakangas@i 277 :GNC 293329 : smgrdestroy(SMgrRelation reln)
278 : : {
279 : : ForkNumber forknum;
280 : :
281 [ - + ]: 293329 : Assert(reln->pincount == 0);
282 : :
5725 heikki.linnakangas@i 283 [ + + ]:CBC 1466645 : for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
2411 peter_e@gmx.net 284 : 1173316 : smgrsw[reln->smgr_which].smgr_close(reln, forknum);
285 : :
74 heikki.linnakangas@i 286 :GNC 293329 : dlist_delete(&reln->node);
287 : :
7369 tgl@sss.pgh.pa.us 288 [ - + ]:CBC 293329 : if (hash_search(SMgrRelationHash,
433 peter@eisentraut.org 289 : 293329 : &(reln->smgr_rlocator),
290 : : HASH_REMOVE, NULL) == NULL)
7369 tgl@sss.pgh.pa.us 291 [ # # ]:UBC 0 : elog(ERROR, "SMgrRelation hashtable corrupted");
7369 tgl@sss.pgh.pa.us 292 :CBC 293329 : }
293 : :
294 : : /*
295 : : * smgrrelease() -- Release all resources used by this object.
296 : : *
297 : : * The object remains valid.
298 : : */
299 : : void
708 tmunro@postgresql.or 300 : 371764 : smgrrelease(SMgrRelation reln)
301 : : {
302 [ + + ]: 1858820 : for (ForkNumber forknum = 0; forknum <= MAX_FORKNUM; forknum++)
303 : : {
304 : 1487056 : smgrsw[reln->smgr_which].smgr_close(reln, forknum);
305 : 1487056 : reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
306 : : }
241 307 : 371764 : reln->smgr_targblock = InvalidBlockNumber;
708 308 : 371764 : }
309 : :
310 : : /*
311 : : * smgrclose() -- Close an SMgrRelation object.
312 : : *
313 : : * The SMgrRelation reference should not be used after this call. However,
314 : : * because we don't keep track of the references returned by smgropen(), we
315 : : * don't know if there are other references still pointing to the same object,
316 : : * so we cannot remove the SMgrRelation object yet. Therefore, this is just a
317 : : * synonym for smgrrelease() at the moment.
318 : : */
319 : : void
74 heikki.linnakangas@i 320 :GNC 275350 : smgrclose(SMgrRelation reln)
321 : : {
322 : 275350 : smgrrelease(reln);
323 : 275350 : }
324 : :
325 : : /*
326 : : * smgrdestroyall() -- Release resources used by all unpinned objects.
327 : : *
328 : : * It must be known that there are no pointers to SMgrRelations, other than
329 : : * those pinned with smgrpin().
330 : : */
331 : : void
332 : 434024 : smgrdestroyall(void)
333 : : {
334 : : dlist_mutable_iter iter;
335 : :
336 : : /*
337 : : * Zap all unpinned SMgrRelations. We rely on smgrdestroy() to remove
338 : : * each one from the list.
339 : : */
340 [ + + + + ]: 727353 : dlist_foreach_modify(iter, &unpinned_relns)
341 : : {
342 : 293329 : SMgrRelation rel = dlist_container(SMgrRelationData, node,
343 : : iter.cur);
344 : :
345 : 293329 : smgrdestroy(rel);
346 : : }
708 tmunro@postgresql.or 347 : 434024 : }
348 : :
349 : : /*
350 : : * smgrreleaseall() -- Release resources used by all objects.
351 : : */
352 : : void
74 heikki.linnakangas@i 353 :CBC 2610 : smgrreleaseall(void)
354 : : {
355 : : HASH_SEQ_STATUS status;
356 : : SMgrRelation reln;
357 : :
358 : : /* Nothing to do if hashtable not set up */
7369 tgl@sss.pgh.pa.us 359 [ + + ]: 2610 : if (SMgrRelationHash == NULL)
360 : 209 : return;
361 : :
362 : 2401 : hash_seq_init(&status, SMgrRelationHash);
363 : :
364 [ + + ]: 87450 : while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL)
365 : : {
74 heikki.linnakangas@i 366 : 85049 : smgrrelease(reln);
367 : : }
368 : : }
369 : :
370 : : /*
371 : : * smgrreleaserellocator() -- Release resources for given RelFileLocator, if
372 : : * it's open.
373 : : *
374 : : * This has the same effects as smgrrelease(smgropen(rlocator)), but avoids
375 : : * uselessly creating a hashtable entry only to drop it again when no
376 : : * such entry exists already.
377 : : */
378 : : void
74 heikki.linnakangas@i 379 :GNC 220012 : smgrreleaserellocator(RelFileLocatorBackend rlocator)
380 : : {
381 : : SMgrRelation reln;
382 : :
383 : : /* Nothing to do if hashtable not set up */
7369 tgl@sss.pgh.pa.us 384 [ + + ]:CBC 220012 : if (SMgrRelationHash == NULL)
385 : 92 : return;
386 : :
387 : 219920 : reln = (SMgrRelation) hash_search(SMgrRelationHash,
388 : : &rlocator,
389 : : HASH_FIND, NULL);
390 [ + + ]: 219920 : if (reln != NULL)
74 heikki.linnakangas@i 391 :GNC 11365 : smgrrelease(reln);
392 : : }
393 : :
394 : : /*
395 : : * smgrexists() -- Does the underlying file for a fork exist?
396 : : */
397 : : bool
398 : 1123709 : smgrexists(SMgrRelation reln, ForkNumber forknum)
399 : : {
400 : 1123709 : return smgrsw[reln->smgr_which].smgr_exists(reln, forknum);
401 : : }
402 : :
403 : : /*
404 : : * smgrcreate() -- Create a new relation.
405 : : *
406 : : * Given an already-created (but presumably unused) SMgrRelation,
407 : : * cause the underlying disk file or other storage for the fork
408 : : * to be created.
409 : : */
410 : : void
5625 heikki.linnakangas@i 411 :CBC 3127289 : smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
412 : : {
2411 peter_e@gmx.net 413 : 3127289 : smgrsw[reln->smgr_which].smgr_create(reln, forknum, isRedo);
10141 scrappy@hub.org 414 : 3127289 : }
415 : :
416 : : /*
417 : : * smgrdosyncall() -- Immediately sync all forks of all given relations
418 : : *
419 : : * All forks of all given relations are synced out to the store.
420 : : *
421 : : * This is equivalent to FlushRelationBuffers() for each smgr relation,
422 : : * then calling smgrimmedsync() for all forks of each relation, but it's
423 : : * significantly quicker so should be preferred when possible.
424 : : */
425 : : void
1471 noah@leadboat.com 426 : 9 : smgrdosyncall(SMgrRelation *rels, int nrels)
427 : : {
428 : 9 : int i = 0;
429 : : ForkNumber forknum;
430 : :
431 [ - + ]: 9 : if (nrels == 0)
1471 noah@leadboat.com 432 :UBC 0 : return;
433 : :
1471 noah@leadboat.com 434 :CBC 9 : FlushRelationsAllBuffers(rels, nrels);
435 : :
436 : : /*
437 : : * Sync the physical file(s).
438 : : */
439 [ + + ]: 18 : for (i = 0; i < nrels; i++)
440 : : {
441 : 9 : int which = rels[i]->smgr_which;
442 : :
443 [ + + ]: 45 : for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
444 : : {
445 [ + + ]: 36 : if (smgrsw[which].smgr_exists(rels[i], forknum))
446 : 10 : smgrsw[which].smgr_immedsync(rels[i], forknum);
447 : : }
448 : : }
449 : : }
450 : :
451 : : /*
452 : : * smgrdounlinkall() -- Immediately unlink all forks of all given relations
453 : : *
454 : : * All forks of all given relations are removed from the store. This
455 : : * should not be used during transactional operations, since it can't be
456 : : * undone.
457 : : *
458 : : * If isRedo is true, it is okay for the underlying file(s) to be gone
459 : : * already.
460 : : */
461 : : void
4105 alvherre@alvh.no-ip. 462 : 12495 : smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
463 : : {
3973 bruce@momjian.us 464 : 12495 : int i = 0;
465 : : RelFileLocatorBackend *rlocators;
466 : : ForkNumber forknum;
467 : :
4105 alvherre@alvh.no-ip. 468 [ + + ]: 12495 : if (nrels == 0)
469 : 386 : return;
470 : :
471 : : /*
472 : : * Get rid of any remaining buffers for the relations. bufmgr will just
473 : : * drop them without bothering to write the contents.
474 : : */
642 rhaas@postgresql.org 475 : 12109 : DropRelationsAllBuffers(rels, nrels);
476 : :
477 : : /*
478 : : * create an array which contains all relations to be dropped, and close
479 : : * each relation's forks at the smgr level while at it
480 : : */
648 481 : 12109 : rlocators = palloc(sizeof(RelFileLocatorBackend) * nrels);
4105 alvherre@alvh.no-ip. 482 [ + + ]: 54590 : for (i = 0; i < nrels; i++)
483 : : {
648 rhaas@postgresql.org 484 : 42481 : RelFileLocatorBackend rlocator = rels[i]->smgr_rlocator;
4105 alvherre@alvh.no-ip. 485 : 42481 : int which = rels[i]->smgr_which;
486 : :
648 rhaas@postgresql.org 487 : 42481 : rlocators[i] = rlocator;
488 : :
489 : : /* Close the forks at smgr level */
4105 alvherre@alvh.no-ip. 490 [ + + ]: 212405 : for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
2411 peter_e@gmx.net 491 : 169924 : smgrsw[which].smgr_close(rels[i], forknum);
492 : : }
493 : :
494 : : /*
495 : : * Send a shared-inval message to force other backends to close any
496 : : * dangling smgr references they may have for these rels. We should do
497 : : * this before starting the actual unlinking, in case we fail partway
498 : : * through that step. Note that the sinval messages will eventually come
499 : : * back to this backend, too, and thereby provide a backstop that we
500 : : * closed our own smgr rel.
501 : : */
4105 alvherre@alvh.no-ip. 502 [ + + ]: 54590 : for (i = 0; i < nrels; i++)
648 rhaas@postgresql.org 503 : 42481 : CacheInvalidateSmgr(rlocators[i]);
504 : :
505 : : /*
506 : : * Delete the physical file(s).
507 : : *
508 : : * Note: smgr_unlink must treat deletion failure as a WARNING, not an
509 : : * ERROR, because we've already decided to commit or abort the current
510 : : * xact.
511 : : */
512 : :
4105 alvherre@alvh.no-ip. 513 [ + + ]: 54590 : for (i = 0; i < nrels; i++)
514 : : {
3973 bruce@momjian.us 515 : 42481 : int which = rels[i]->smgr_which;
516 : :
4105 alvherre@alvh.no-ip. 517 [ + + ]: 212405 : for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
648 rhaas@postgresql.org 518 : 169924 : smgrsw[which].smgr_unlink(rlocators[i], forknum, isRedo);
519 : : }
520 : :
521 : 12109 : pfree(rlocators);
522 : : }
523 : :
524 : :
525 : : /*
526 : : * smgrextend() -- Add a new block to a file.
527 : : *
528 : : * The semantics are nearly the same as smgrwrite(): write at the
529 : : * specified position. However, this is to be used for the case of
530 : : * extending a relation (i.e., blocknum is at or beyond the current
531 : : * EOF). Note that we assume writing a block beyond current EOF
532 : : * causes intervening file space to become filled with zeroes.
533 : : */
534 : : void
5421 bruce@momjian.us 535 : 106809 : smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
536 : : const void *buffer, bool skipFsync)
537 : : {
2411 peter_e@gmx.net 538 : 106809 : smgrsw[reln->smgr_which].smgr_extend(reln, forknum, blocknum,
539 : : buffer, skipFsync);
540 : :
541 : : /*
542 : : * Normally we expect this to increase nblocks by one, but if the cached
543 : : * value isn't as expected, just invalidate it so the next call asks the
544 : : * kernel.
545 : : */
1353 tmunro@postgresql.or 546 [ + + ]: 106809 : if (reln->smgr_cached_nblocks[forknum] == blocknum)
547 : 52912 : reln->smgr_cached_nblocks[forknum] = blocknum + 1;
548 : : else
549 : 53897 : reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
10141 scrappy@hub.org 550 : 106809 : }
551 : :
552 : : /*
553 : : * smgrzeroextend() -- Add new zeroed out blocks to a file.
554 : : *
555 : : * Similar to smgrextend(), except the relation can be extended by
556 : : * multiple blocks at once and the added blocks will be filled with
557 : : * zeroes.
558 : : */
559 : : void
375 andres@anarazel.de 560 : 195784 : smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
561 : : int nblocks, bool skipFsync)
562 : : {
563 : 195784 : smgrsw[reln->smgr_which].smgr_zeroextend(reln, forknum, blocknum,
564 : : nblocks, skipFsync);
565 : :
566 : : /*
567 : : * Normally we expect this to increase the fork size by nblocks, but if
568 : : * the cached value isn't as expected, just invalidate it so the next call
569 : : * asks the kernel.
570 : : */
571 [ + - ]: 195784 : if (reln->smgr_cached_nblocks[forknum] == blocknum)
572 : 195784 : reln->smgr_cached_nblocks[forknum] = blocknum + nblocks;
573 : : else
375 andres@anarazel.de 574 :UBC 0 : reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
375 andres@anarazel.de 575 :CBC 195784 : }
576 : :
577 : : /*
578 : : * smgrprefetch() -- Initiate asynchronous read of the specified block of a relation.
579 : : *
580 : : * In recovery only, this can return false to indicate that a file
581 : : * doesn't exist (presumably it has been dropped by a later WAL
582 : : * record).
583 : : */
584 : : bool
120 tmunro@postgresql.or 585 :GNC 137257 : smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
586 : : int nblocks)
587 : : {
588 : 137257 : return smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum, nblocks);
589 : : }
590 : :
591 : : /*
592 : : * smgrreadv() -- read a particular block range from a relation into the
593 : : * supplied buffers.
594 : : *
595 : : * This routine is called from the buffer manager in order to
596 : : * instantiate pages in the shared buffer cache. All storage managers
597 : : * return pages in the format that POSTGRES expects.
598 : : */
599 : : void
118 600 : 1105525 : smgrreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
601 : : void **buffers, BlockNumber nblocks)
602 : : {
603 : 1105525 : smgrsw[reln->smgr_which].smgr_readv(reln, forknum, blocknum, buffers,
604 : : nblocks);
10141 scrappy@hub.org 605 :CBC 1105510 : }
606 : :
607 : : /*
608 : : * smgrwritev() -- Write the supplied buffers out.
609 : : *
610 : : * This is to be used only for updating already-existing blocks of a
611 : : * relation (ie, those before the current EOF). To extend a relation,
612 : : * use smgrextend().
613 : : *
614 : : * This is not a synchronous write -- the block is not necessarily
615 : : * on disk at return, only dumped out to the kernel. However,
616 : : * provisions will be made to fsync the write before the next checkpoint.
617 : : *
618 : : * NB: The mechanism to ensure fsync at next checkpoint assumes that there is
619 : : * something that prevents a concurrent checkpoint from "racing ahead" of the
620 : : * write. One way to prevent that is by holding a lock on the buffer; the
621 : : * buffer manager's writes are protected by that. The bulk writer facility
622 : : * in bulk_write.c checks the redo pointer and calls smgrimmedsync() if a
623 : : * checkpoint happened; that relies on the fact that no other backend can be
624 : : * concurrently modifying the page.
625 : : *
626 : : * skipFsync indicates that the caller will make other provisions to
627 : : * fsync the relation, so we needn't bother. Temporary relations also
628 : : * do not require fsync.
629 : : */
630 : : void
118 tmunro@postgresql.or 631 :GNC 531288 : smgrwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
632 : : const void **buffers, BlockNumber nblocks, bool skipFsync)
633 : : {
634 : 531288 : smgrsw[reln->smgr_which].smgr_writev(reln, forknum, blocknum,
635 : : buffers, nblocks, skipFsync);
8581 vadim4o@yahoo.com 636 :CBC 531288 : }
637 : :
638 : : /*
639 : : * smgrwriteback() -- Trigger kernel writeback for the supplied range of
640 : : * blocks.
641 : : */
642 : : void
2977 andres@anarazel.de 643 : 85655 : smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
644 : : BlockNumber nblocks)
645 : : {
2411 peter_e@gmx.net 646 : 85655 : smgrsw[reln->smgr_which].smgr_writeback(reln, forknum, blocknum,
647 : : nblocks);
2977 andres@anarazel.de 648 : 85655 : }
649 : :
650 : : /*
651 : : * smgrnblocks() -- Calculate the number of blocks in the
652 : : * supplied relation.
653 : : */
654 : : BlockNumber
5725 heikki.linnakangas@i 655 : 5567708 : smgrnblocks(SMgrRelation reln, ForkNumber forknum)
656 : : {
657 : : BlockNumber result;
658 : :
659 : : /* Check and return if we get the cached value for the number of blocks. */
1188 akapila@postgresql.o 660 : 5567708 : result = smgrnblocks_cached(reln, forknum);
661 [ + + ]: 5567708 : if (result != InvalidBlockNumber)
662 : 3652660 : return result;
663 : :
664 : 1915048 : result = smgrsw[reln->smgr_which].smgr_nblocks(reln, forknum);
665 : :
666 : 1915029 : reln->smgr_cached_nblocks[forknum] = result;
667 : :
668 : 1915029 : return result;
669 : : }
670 : :
671 : : /*
672 : : * smgrnblocks_cached() -- Get the cached number of blocks in the supplied
673 : : * relation.
674 : : *
675 : : * Returns an InvalidBlockNumber when not in recovery and when the relation
676 : : * fork size is not cached.
677 : : */
678 : : BlockNumber
679 : 5587245 : smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
680 : : {
681 : : /*
682 : : * For now, this function uses cached values only in recovery due to lack
683 : : * of a shared invalidation mechanism for changes in file size. Code
684 : : * elsewhere reads smgr_cached_nblocks and copes with stale data.
685 : : */
1353 tmunro@postgresql.or 686 [ + + + + ]: 5587245 : if (InRecovery && reln->smgr_cached_nblocks[forknum] != InvalidBlockNumber)
687 : 3655091 : return reln->smgr_cached_nblocks[forknum];
688 : :
1188 akapila@postgresql.o 689 : 1932154 : return InvalidBlockNumber;
690 : : }
691 : :
692 : : /*
693 : : * smgrtruncate() -- Truncate the given forks of supplied relation to
694 : : * each specified numbers of blocks
695 : : *
696 : : * The truncation is done immediately, so this can't be rolled back.
697 : : *
698 : : * The caller must hold AccessExclusiveLock on the relation, to ensure that
699 : : * other backends receive the smgr invalidation event that this function sends
700 : : * before they access any forks of the relation again.
701 : : */
702 : : void
1664 fujii@postgresql.org 703 : 578 : smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nblocks)
704 : : {
705 : : int i;
706 : :
707 : : /*
708 : : * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will
709 : : * just drop them without bothering to write the contents.
710 : : */
642 rhaas@postgresql.org 711 : 578 : DropRelationBuffers(reln, forknum, nforks, nblocks);
712 : :
713 : : /*
714 : : * Send a shared-inval message to force other backends to close any smgr
715 : : * references they may have for this rel. This is useful because they
716 : : * might have open file pointers to segments that got removed, and/or
717 : : * smgr_targblock variables pointing past the new rel end. (The inval
718 : : * message will come back to our backend, too, causing a
719 : : * probably-unnecessary local smgr flush. But we don't expect that this
720 : : * is a performance-critical path.) As in the unlink code, we want to be
721 : : * sure the message is sent before we start changing things on-disk.
722 : : */
648 723 : 578 : CacheInvalidateSmgr(reln->smgr_rlocator);
724 : :
725 : : /* Do the truncation */
1664 fujii@postgresql.org 726 [ + + ]: 1429 : for (i = 0; i < nforks; i++)
727 : : {
728 : : /* Make the cached size is invalid if we encounter an error. */
1353 tmunro@postgresql.or 729 : 851 : reln->smgr_cached_nblocks[forknum[i]] = InvalidBlockNumber;
730 : :
1664 fujii@postgresql.org 731 : 851 : smgrsw[reln->smgr_which].smgr_truncate(reln, forknum[i], nblocks[i]);
732 : :
733 : : /*
734 : : * We might as well update the local smgr_cached_nblocks values. The
735 : : * smgr cache inval message that this function sent will cause other
736 : : * backends to invalidate their copies of smgr_fsm_nblocks and
737 : : * smgr_vm_nblocks, and these ones too at the next command boundary.
738 : : * But these ensure they aren't outright wrong until then.
739 : : */
1353 tmunro@postgresql.or 740 : 851 : reln->smgr_cached_nblocks[forknum[i]] = nblocks[i];
741 : : }
10000 vadim4o@yahoo.com 742 : 578 : }
743 : :
744 : : /*
745 : : * smgrregistersync() -- Request a relation to be sync'd at next checkpoint
746 : : *
747 : : * This can be used after calling smgrwrite() or smgrextend() with skipFsync =
748 : : * true, to register the fsyncs that were skipped earlier.
749 : : *
750 : : * Note: be mindful that a checkpoint could already have happened between the
751 : : * smgrwrite or smgrextend calls and this! In that case, the checkpoint
752 : : * already missed fsyncing this relation, and you should use smgrimmedsync
753 : : * instead. Most callers should use the bulk loading facility in bulk_write.c
754 : : * which handles all that.
755 : : */
756 : : void
51 heikki.linnakangas@i 757 :GNC 21978 : smgrregistersync(SMgrRelation reln, ForkNumber forknum)
758 : : {
759 : 21978 : smgrsw[reln->smgr_which].smgr_registersync(reln, forknum);
760 : 21978 : }
761 : :
762 : : /*
763 : : * smgrimmedsync() -- Force the specified relation to stable storage.
764 : : *
765 : : * Synchronously force all previous writes to the specified relation
766 : : * down to disk.
767 : : *
768 : : * This is useful for building completely new relations (eg, new
769 : : * indexes). Instead of incrementally WAL-logging the index build
770 : : * steps, we can just write completed index pages to disk with smgrwrite
771 : : * or smgrextend, and then fsync the completed index file before
772 : : * committing the transaction. (This is sufficient for purposes of
773 : : * crash recovery, since it effectively duplicates forcing a checkpoint
774 : : * for the completed index. But it is *not* sufficient if one wishes
775 : : * to use the WAL log for PITR or replication purposes: in that case
776 : : * we have to make WAL entries as well.)
777 : : *
778 : : * The preceding writes should specify skipFsync = true to avoid
779 : : * duplicative fsyncs.
780 : : *
781 : : * Note that you need to do FlushRelationBuffers() first if there is
782 : : * any possibility that there are dirty buffers for the relation;
783 : : * otherwise the sync is not very meaningful.
784 : : *
785 : : * Most callers should use the bulk loading facility in bulk_write.c
786 : : * instead of calling this directly.
787 : : */
788 : : void
5725 heikki.linnakangas@i 789 :CBC 1 : smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
790 : : {
2411 peter_e@gmx.net 791 : 1 : smgrsw[reln->smgr_which].smgr_immedsync(reln, forknum);
7256 tgl@sss.pgh.pa.us 792 : 1 : }
793 : :
794 : : /*
795 : : * AtEOXact_SMgr
796 : : *
797 : : * This routine is called during transaction commit or abort (it doesn't
798 : : * particularly care which). All unpinned SMgrRelation objects are destroyed.
799 : : *
800 : : * We do this as a compromise between wanting transient SMgrRelations to
801 : : * live awhile (to amortize the costs of blind writes of multiple blocks)
802 : : * and needing them to not live forever (since we're probably holding open
803 : : * a kernel file descriptor for the underlying file, and we need to ensure
804 : : * that gets closed reasonably soon if the file gets deleted).
805 : : */
806 : : void
4197 807 : 432915 : AtEOXact_SMgr(void)
808 : : {
74 heikki.linnakangas@i 809 :GNC 432915 : smgrdestroyall();
4197 tgl@sss.pgh.pa.us 810 :CBC 432915 : }
811 : :
812 : : /*
813 : : * This routine is called when we are ordered to release all open files by a
814 : : * ProcSignalBarrier.
815 : : */
816 : : bool
792 tmunro@postgresql.or 817 : 542 : ProcessBarrierSmgrRelease(void)
818 : : {
708 819 : 542 : smgrreleaseall();
792 820 : 542 : return true;
821 : : }
|