Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * smgr.c
4 : * public interface routines to storage manager switch.
5 : *
6 : * All file system operations in POSTGRES dispatch through these
7 : * routines.
8 : *
9 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
10 : * Portions Copyright (c) 1994, Regents of the University of California
11 : *
12 : *
13 : * IDENTIFICATION
14 : * src/backend/storage/smgr/smgr.c
15 : *
16 : *-------------------------------------------------------------------------
17 : */
18 : #include "postgres.h"
19 :
20 : #include "access/xlogutils.h"
21 : #include "lib/ilist.h"
22 : #include "storage/bufmgr.h"
23 : #include "storage/fd.h"
24 : #include "storage/ipc.h"
25 : #include "storage/md.h"
26 : #include "storage/smgr.h"
27 : #include "utils/hsearch.h"
28 : #include "utils/inval.h"
29 :
30 :
31 : /*
32 : * This struct of function pointers defines the API between smgr.c and
33 : * any individual storage manager module. Note that smgr subfunctions are
34 : * generally expected to report problems via elog(ERROR). An exception is
35 : * that smgr_unlink should use elog(WARNING), rather than erroring out,
36 : * because we normally unlink relations during post-commit/abort cleanup,
37 : * and so it's too late to raise an error. Also, various conditions that
38 : * would normally be errors should be allowed during bootstrap and/or WAL
39 : * recovery --- see comments in md.c for details.
40 : */
41 : typedef struct f_smgr
42 : {
43 : void (*smgr_init) (void); /* may be NULL */
44 : void (*smgr_shutdown) (void); /* may be NULL */
45 : void (*smgr_open) (SMgrRelation reln);
46 : void (*smgr_close) (SMgrRelation reln, ForkNumber forknum);
47 : void (*smgr_create) (SMgrRelation reln, ForkNumber forknum,
48 : bool isRedo);
49 : bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum);
50 : void (*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum,
51 : bool isRedo);
52 : void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
53 : BlockNumber blocknum, const void *buffer, bool skipFsync);
54 : void (*smgr_zeroextend) (SMgrRelation reln, ForkNumber forknum,
55 : BlockNumber blocknum, int nblocks, bool skipFsync);
56 : bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
57 : BlockNumber blocknum);
58 : void (*smgr_read) (SMgrRelation reln, ForkNumber forknum,
59 : BlockNumber blocknum, void *buffer);
60 : void (*smgr_write) (SMgrRelation reln, ForkNumber forknum,
61 : BlockNumber blocknum, const void *buffer, bool skipFsync);
62 : void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum,
63 : BlockNumber blocknum, BlockNumber nblocks);
64 : BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
65 : void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
66 : BlockNumber nblocks);
67 : void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
68 : } f_smgr;
69 :
70 : static const f_smgr smgrsw[] = {
71 : /* magnetic disk */
72 : {
73 : .smgr_init = mdinit,
74 : .smgr_shutdown = NULL,
75 : .smgr_open = mdopen,
76 : .smgr_close = mdclose,
77 : .smgr_create = mdcreate,
78 : .smgr_exists = mdexists,
79 : .smgr_unlink = mdunlink,
80 : .smgr_extend = mdextend,
81 : .smgr_zeroextend = mdzeroextend,
82 : .smgr_prefetch = mdprefetch,
83 : .smgr_read = mdread,
84 : .smgr_write = mdwrite,
85 : .smgr_writeback = mdwriteback,
86 : .smgr_nblocks = mdnblocks,
87 : .smgr_truncate = mdtruncate,
88 : .smgr_immedsync = mdimmedsync,
89 : }
90 : };
91 :
92 : static const int NSmgr = lengthof(smgrsw);
93 :
94 : /*
95 : * Each backend has a hashtable that stores all extant SMgrRelation objects.
96 : * In addition, "unowned" SMgrRelation objects are chained together in a list.
97 : */
98 : static HTAB *SMgrRelationHash = NULL;
99 :
100 : static dlist_head unowned_relns;
101 :
102 : /* local function prototypes */
103 : static void smgrshutdown(int code, Datum arg);
104 :
105 :
106 : /*
107 : * smgrinit(), smgrshutdown() -- Initialize or shut down storage
108 : * managers.
109 : *
110 : * Note: smgrinit is called during backend startup (normal or standalone
111 : * case), *not* during postmaster start. Therefore, any resources created
112 : * here or destroyed in smgrshutdown are backend-local.
113 : */
114 : void
7551 tgl 115 GIC 13296 : smgrinit(void)
116 : {
117 : int i;
118 :
9345 bruce 119 CBC 26592 : for (i = 0; i < NSmgr; i++)
120 : {
9345 bruce 121 GIC 13296 : if (smgrsw[i].smgr_init)
2040 peter_e 122 13296 : smgrsw[i].smgr_init();
9770 scrappy 123 ECB : }
124 :
9345 bruce 125 : /* register the shutdown proc */
8224 peter_e 126 CBC 13296 : on_proc_exit(smgrshutdown, 0);
9770 scrappy 127 GIC 13296 : }
128 :
129 : /*
6453 tgl 130 ECB : * on_proc_exit hook for smgr cleanup during backend shutdown
131 : */
132 : static void
7058 peter_e 133 GIC 13296 : smgrshutdown(int code, Datum arg)
134 : {
135 : int i;
136 :
9345 bruce 137 CBC 26592 : for (i = 0; i < NSmgr; i++)
138 : {
9345 bruce 139 GIC 13296 : if (smgrsw[i].smgr_shutdown)
2040 peter_e 140 UIC 0 : smgrsw[i].smgr_shutdown();
9770 scrappy 141 ECB : }
9770 scrappy 142 GIC 13296 : }
9770 scrappy 143 ECB :
6998 tgl 144 EUB : /*
145 : * smgropen() -- Return an SMgrRelation object, creating it if need be.
6998 tgl 146 ECB : *
147 : * This does not attempt to actually open the underlying file.
148 : */
149 : SMgrRelation
277 rhaas 150 GNC 8211764 : smgropen(RelFileLocator rlocator, BackendId backend)
151 : {
152 : RelFileLocatorBackend brlocator;
153 : SMgrRelation reln;
6998 tgl 154 ECB : bool found;
155 :
6998 tgl 156 GIC 8211764 : if (SMgrRelationHash == NULL)
157 : {
158 : /* First time through: initialize the hash table */
159 : HASHCTL ctl;
6998 tgl 160 ECB :
193 rhaas 161 GNC 11982 : ctl.keysize = sizeof(RelFileLocatorBackend);
6998 tgl 162 GIC 11982 : ctl.entrysize = sizeof(SMgrRelationData);
163 11982 : SMgrRelationHash = hash_create("smgr relation table", 400,
164 : &ctl, HASH_ELEM | HASH_BLOBS);
1474 tomas.vondra 165 CBC 11982 : dlist_init(&unowned_relns);
6998 tgl 166 ECB : }
167 :
168 : /* Look up or create an entry */
277 rhaas 169 GNC 8211764 : brlocator.locator = rlocator;
170 8211764 : brlocator.backend = backend;
6998 tgl 171 GIC 8211764 : reln = (SMgrRelation) hash_search(SMgrRelationHash,
172 : &brlocator,
6998 tgl 173 ECB : HASH_ENTER, &found);
174 :
175 : /* Initialize it if not present before */
6998 tgl 176 GIC 8211764 : if (!found)
177 : {
178 : /* hash_search already filled in the lookup key */
6663 179 1342539 : reln->smgr_owner = NULL;
4807 tgl 180 CBC 1342539 : reln->smgr_targblock = InvalidBlockNumber;
982 tmunro 181 GIC 6712695 : for (int i = 0; i <= MAX_FORKNUM; ++i)
182 5370156 : reln->smgr_cached_nblocks[i] = InvalidBlockNumber;
6998 tgl 183 CBC 1342539 : reln->smgr_which = 0; /* we only have md.c at present */
5354 heikki.linnakangas 184 ECB :
1362 tmunro 185 : /* implementation-specific initialization */
1362 tmunro 186 CBC 1342539 : smgrsw[reln->smgr_which].smgr_open(reln);
3826 tgl 187 ECB :
188 : /* it has no owner yet */
1474 tomas.vondra 189 GIC 1342539 : dlist_push_tail(&unowned_relns, &reln->node);
6998 tgl 190 ECB : }
191 :
6998 tgl 192 GIC 8211764 : return reln;
6998 tgl 193 ECB : }
194 :
195 : /*
6663 196 : * smgrsetowner() -- Establish a long-lived reference to an SMgrRelation object
197 : *
198 : * There can be only one owner at a time; this is sufficient since currently
199 : * the only such owners exist in the relcache.
200 : */
201 : void
6663 tgl 202 GIC 1083249 : smgrsetowner(SMgrRelation *owner, SMgrRelation reln)
203 : {
204 : /* We don't support "disowning" an SMgrRelation here, use smgrclearowner */
3826 205 1083249 : Assert(owner != NULL);
3826 tgl 206 ECB :
207 : /*
208 : * First, unhook any old owner. (Normally there shouldn't be any, but it
6385 bruce 209 : * seems possible that this can happen during swap_relation_files()
210 : * depending on the order of processing. It's ok to close the old
211 : * relcache entry early in that case.)
212 : *
213 : * If there isn't an old owner, then the reln should be in the unowned
214 : * list, and we need to remove it.
215 : */
6663 tgl 216 GIC 1083249 : if (reln->smgr_owner)
217 475 : *(reln->smgr_owner) = NULL;
218 : else
1474 tomas.vondra 219 1082774 : dlist_delete(&reln->node);
6663 tgl 220 ECB :
221 : /* Now establish the ownership relationship. */
6663 tgl 222 GIC 1083249 : reln->smgr_owner = owner;
6663 tgl 223 CBC 1083249 : *owner = reln;
6663 tgl 224 GIC 1083249 : }
225 :
3320 heikki.linnakangas 226 ECB : /*
227 : * smgrclearowner() -- Remove long-lived reference to an SMgrRelation object
228 : * if one exists
229 : */
230 : void
3320 heikki.linnakangas 231 GIC 23828 : smgrclearowner(SMgrRelation *owner, SMgrRelation reln)
232 : {
233 : /* Do nothing if the SMgrRelation object is not owned by the owner */
234 23828 : if (reln->smgr_owner != owner)
3320 heikki.linnakangas 235 LBC 0 : return;
236 :
237 : /* unset the owner's reference */
3320 heikki.linnakangas 238 CBC 23828 : *owner = NULL;
3320 heikki.linnakangas 239 EUB :
240 : /* unset our reference to the owner */
3320 heikki.linnakangas 241 GIC 23828 : reln->smgr_owner = NULL;
3320 heikki.linnakangas 242 ECB :
243 : /* add to list of unowned relations */
1474 tomas.vondra 244 GIC 23828 : dlist_push_tail(&unowned_relns, &reln->node);
3826 tgl 245 ECB : }
246 :
247 : /*
5354 heikki.linnakangas 248 : * smgrexists() -- Does the underlying file for a fork exist?
249 : */
250 : bool
5354 heikki.linnakangas 251 GIC 1061892 : smgrexists(SMgrRelation reln, ForkNumber forknum)
252 : {
2040 peter_e 253 1061892 : return smgrsw[reln->smgr_which].smgr_exists(reln, forknum);
254 : }
5354 heikki.linnakangas 255 ECB :
256 : /*
6663 tgl 257 : * smgrclose() -- Close and delete an SMgrRelation object.
258 : */
259 : void
6998 tgl 260 GIC 853901 : smgrclose(SMgrRelation reln)
261 : {
262 : SMgrRelation *owner;
263 : ForkNumber forknum;
6663 tgl 264 ECB :
5354 heikki.linnakangas 265 GIC 4269505 : for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
2040 peter_e 266 3415604 : smgrsw[reln->smgr_which].smgr_close(reln, forknum);
267 :
6663 tgl 268 853901 : owner = reln->smgr_owner;
6663 tgl 269 ECB :
3826 tgl 270 CBC 853901 : if (!owner)
1474 tomas.vondra 271 GIC 248727 : dlist_delete(&reln->node);
3826 tgl 272 ECB :
6998 tgl 273 GIC 853901 : if (hash_search(SMgrRelationHash,
62 peter 274 GNC 853901 : &(reln->smgr_rlocator),
6998 tgl 275 ECB : HASH_REMOVE, NULL) == NULL)
6998 tgl 276 UIC 0 : elog(ERROR, "SMgrRelation hashtable corrupted");
6663 tgl 277 ECB :
278 : /*
279 : * Unhook the owner pointer, if any. We do this last since in the remote
6385 bruce 280 EUB : * possibility of failure above, the SMgrRelation object will still exist.
281 : */
6663 tgl 282 GIC 853901 : if (owner)
283 605174 : *owner = NULL;
6998 284 853901 : }
285 :
337 tmunro 286 ECB : /*
287 : * smgrrelease() -- Release all resources used by this object.
288 : *
289 : * The object remains valid.
290 : */
291 : void
337 tmunro 292 GIC 10753 : smgrrelease(SMgrRelation reln)
293 : {
294 53765 : for (ForkNumber forknum = 0; forknum <= MAX_FORKNUM; forknum++)
295 : {
337 tmunro 296 CBC 43012 : smgrsw[reln->smgr_which].smgr_close(reln, forknum);
337 tmunro 297 GIC 43012 : reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
337 tmunro 298 ECB : }
337 tmunro 299 GIC 10753 : }
337 tmunro 300 ECB :
301 : /*
302 : * smgrreleaseall() -- Release resources used by all objects.
303 : *
304 : * This is called for PROCSIGNAL_BARRIER_SMGRRELEASE.
305 : */
306 : void
337 tmunro 307 GIC 248 : smgrreleaseall(void)
308 : {
309 : HASH_SEQ_STATUS status;
310 : SMgrRelation reln;
337 tmunro 311 ECB :
312 : /* Nothing to do if hashtable not set up */
337 tmunro 313 GIC 248 : if (SMgrRelationHash == NULL)
314 66 : return;
315 :
316 182 : hash_seq_init(&status, SMgrRelationHash);
337 tmunro 317 ECB :
337 tmunro 318 CBC 10935 : while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL)
337 tmunro 319 GIC 10753 : smgrrelease(reln);
337 tmunro 320 ECB : }
321 :
6998 tgl 322 : /*
323 : * smgrcloseall() -- Close all existing SMgrRelation objects.
324 : */
325 : void
6998 tgl 326 GIC 3604 : smgrcloseall(void)
327 : {
328 : HASH_SEQ_STATUS status;
329 : SMgrRelation reln;
6998 tgl 330 ECB :
331 : /* Nothing to do if hashtable not set up */
6998 tgl 332 GIC 3604 : if (SMgrRelationHash == NULL)
333 145 : return;
334 :
335 3459 : hash_seq_init(&status, SMgrRelationHash);
6998 tgl 336 ECB :
6998 tgl 337 CBC 83273 : while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL)
6998 tgl 338 GIC 79814 : smgrclose(reln);
6998 tgl 339 ECB : }
340 :
341 : /*
342 : * smgrcloserellocator() -- Close SMgrRelation object for given RelFileLocator,
343 : * if one exists.
344 : *
345 : * This has the same effects as smgrclose(smgropen(rlocator)), but it avoids
346 : * uselessly creating a hashtable entry only to drop it again when no
347 : * such entry exists already.
348 : */
349 : void
277 rhaas 350 GNC 285256 : smgrcloserellocator(RelFileLocatorBackend rlocator)
351 : {
352 : SMgrRelation reln;
353 :
6998 tgl 354 ECB : /* Nothing to do if hashtable not set up */
6998 tgl 355 GIC 285256 : if (SMgrRelationHash == NULL)
356 19 : return;
357 :
358 285237 : reln = (SMgrRelation) hash_search(SMgrRelationHash,
359 : &rlocator,
6998 tgl 360 ECB : HASH_FIND, NULL);
6998 tgl 361 GIC 285237 : if (reln != NULL)
6998 tgl 362 CBC 95946 : smgrclose(reln);
363 : }
364 :
9770 scrappy 365 ECB : /*
9345 bruce 366 : * smgrcreate() -- Create a new relation.
367 : *
368 : * Given an already-created (but presumably unused) SMgrRelation,
369 : * cause the underlying disk file or other storage for the fork
370 : * to be created.
371 : */
372 : void
5254 heikki.linnakangas 373 GIC 2721301 : smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
374 : {
2040 peter_e 375 2721301 : smgrsw[reln->smgr_which].smgr_create(reln, forknum, isRedo);
9770 scrappy 376 2721301 : }
9770 scrappy 377 ECB :
378 : /*
1100 noah 379 : * smgrdosyncall() -- Immediately sync all forks of all given relations
380 : *
381 : * All forks of all given relations are synced out to the store.
382 : *
383 : * This is equivalent to FlushRelationBuffers() for each smgr relation,
384 : * then calling smgrimmedsync() for all forks of each relation, but it's
385 : * significantly quicker so should be preferred when possible.
386 : */
387 : void
1100 noah 388 GIC 9 : smgrdosyncall(SMgrRelation *rels, int nrels)
389 : {
390 9 : int i = 0;
391 : ForkNumber forknum;
1100 noah 392 ECB :
1100 noah 393 GIC 9 : if (nrels == 0)
1100 noah 394 LBC 0 : return;
395 :
1100 noah 396 GIC 9 : FlushRelationsAllBuffers(rels, nrels);
1100 noah 397 ECB :
1100 noah 398 EUB : /*
399 : * Sync the physical file(s).
1100 noah 400 ECB : */
1100 noah 401 GIC 18 : for (i = 0; i < nrels; i++)
402 : {
403 9 : int which = rels[i]->smgr_which;
404 :
1100 noah 405 CBC 45 : for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
406 : {
407 36 : if (smgrsw[which].smgr_exists(rels[i], forknum))
1100 noah 408 GIC 10 : smgrsw[which].smgr_immedsync(rels[i], forknum);
1100 noah 409 ECB : }
410 : }
411 : }
412 :
413 : /*
414 : * smgrdounlinkall() -- Immediately unlink all forks of all given relations
415 : *
416 : * All forks of all given relations are removed from the store. This
417 : * should not be used during transactional operations, since it can't be
418 : * undone.
419 : *
420 : * If isRedo is true, it is okay for the underlying file(s) to be gone
421 : * already.
422 : */
423 : void
3734 alvherre 424 GIC 11019 : smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
425 : {
3602 bruce 426 11019 : int i = 0;
427 : RelFileLocatorBackend *rlocators;
3602 bruce 428 ECB : ForkNumber forknum;
429 :
3734 alvherre 430 CBC 11019 : if (nrels == 0)
3734 alvherre 431 GIC 356 : return;
432 :
433 : /*
816 akapila 434 ECB : * Get rid of any remaining buffers for the relations. bufmgr will just
435 : * drop them without bothering to write the contents.
436 : */
271 rhaas 437 GNC 10663 : DropRelationsAllBuffers(rels, nrels);
438 :
439 : /*
440 : * create an array which contains all relations to be dropped, and close
3602 bruce 441 ECB : * each relation's forks at the smgr level while at it
442 : */
277 rhaas 443 GNC 10663 : rlocators = palloc(sizeof(RelFileLocatorBackend) * nrels);
3734 alvherre 444 GIC 48045 : for (i = 0; i < nrels; i++)
445 : {
277 rhaas 446 GNC 37382 : RelFileLocatorBackend rlocator = rels[i]->smgr_rlocator;
3734 alvherre 447 CBC 37382 : int which = rels[i]->smgr_which;
3734 alvherre 448 ECB :
277 rhaas 449 GNC 37382 : rlocators[i] = rlocator;
3734 alvherre 450 ECB :
451 : /* Close the forks at smgr level */
3734 alvherre 452 GIC 186910 : for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
2040 peter_e 453 CBC 149528 : smgrsw[which].smgr_close(rels[i], forknum);
454 : }
455 :
3734 alvherre 456 ECB : /*
457 : * Send a shared-inval message to force other backends to close any
458 : * dangling smgr references they may have for these rels. We should do
459 : * this before starting the actual unlinking, in case we fail partway
460 : * through that step. Note that the sinval messages will eventually come
461 : * back to this backend, too, and thereby provide a backstop that we
462 : * closed our own smgr rel.
463 : */
3734 alvherre 464 GIC 48045 : for (i = 0; i < nrels; i++)
277 rhaas 465 GNC 37382 : CacheInvalidateSmgr(rlocators[i]);
466 :
467 : /*
3734 alvherre 468 ECB : * Delete the physical file(s).
469 : *
470 : * Note: smgr_unlink must treat deletion failure as a WARNING, not an
471 : * ERROR, because we've already decided to commit or abort the current
472 : * xact.
473 : */
474 :
3734 alvherre 475 GIC 48045 : for (i = 0; i < nrels; i++)
476 : {
3602 bruce 477 37382 : int which = rels[i]->smgr_which;
478 :
3734 alvherre 479 CBC 186910 : for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
277 rhaas 480 GNC 149528 : smgrsw[which].smgr_unlink(rlocators[i], forknum, isRedo);
3734 alvherre 481 ECB : }
482 :
277 rhaas 483 GNC 10663 : pfree(rlocators);
3734 alvherre 484 ECB : }
485 :
486 :
9770 scrappy 487 : /*
488 : * smgrextend() -- Add a new block to a file.
489 : *
490 : * The semantics are nearly the same as smgrwrite(): write at the
491 : * specified position. However, this is to be used for the case of
492 : * extending a relation (i.e., blocknum is at or beyond the current
493 : * EOF). Note that we assume writing a block beyond current EOF
494 : * causes intervening file space to become filled with zeroes.
495 : */
496 : void
5050 bruce 497 GIC 177935 : smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
498 : const void *buffer, bool skipFsync)
499 : {
2040 peter_e 500 177935 : smgrsw[reln->smgr_which].smgr_extend(reln, forknum, blocknum,
1957 rhaas 501 ECB : buffer, skipFsync);
502 :
503 : /*
982 tmunro 504 : * Normally we expect this to increase nblocks by one, but if the cached
505 : * value isn't as expected, just invalidate it so the next call asks the
506 : * kernel.
507 : */
982 tmunro 508 GIC 177935 : if (reln->smgr_cached_nblocks[forknum] == blocknum)
509 132235 : reln->smgr_cached_nblocks[forknum] = blocknum + 1;
510 : else
511 45700 : reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
9770 scrappy 512 CBC 177935 : }
9770 scrappy 513 ECB :
514 : /*
515 : * smgrzeroextend() -- Add new zeroed out blocks to a file.
516 : *
517 : * Similar to smgrextend(), except the relation can be extended by
518 : * multiple blocks at once and the added blocks will be filled with
519 : * zeroes.
520 : */
521 : void
4 andres 522 GNC 343785 : smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
523 : int nblocks, bool skipFsync)
524 : {
525 343785 : smgrsw[reln->smgr_which].smgr_zeroextend(reln, forknum, blocknum,
526 : nblocks, skipFsync);
527 :
528 : /*
529 : * Normally we expect this to increase the fork size by nblocks, but if
530 : * the cached value isn't as expected, just invalidate it so the next call
531 : * asks the kernel.
532 : */
533 343785 : if (reln->smgr_cached_nblocks[forknum] == blocknum)
534 343785 : reln->smgr_cached_nblocks[forknum] = blocknum + nblocks;
535 : else
4 andres 536 UNC 0 : reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
4 andres 537 GNC 343785 : }
538 :
539 : /*
5200 tgl 540 ECB : * smgrprefetch() -- Initiate asynchronous read of the specified block of a relation.
1096 tmunro 541 : *
542 : * In recovery only, this can return false to indicate that a file
543 : * doesn't exist (presumably it has been dropped by a later WAL
544 : * record).
545 : */
546 : bool
5200 tgl 547 GIC 212103 : smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
548 : {
1096 tmunro 549 212103 : return smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum);
550 : }
5200 tgl 551 ECB :
552 : /*
553 : * smgrread() -- read a particular block from a relation into the supplied
9345 bruce 554 : * buffer.
555 : *
556 : * This routine is called from the buffer manager in order to
557 : * instantiate pages in the shared buffer cache. All storage managers
558 : * return pages in the format that POSTGRES expects.
559 : */
560 : void
5050 bruce 561 GIC 1310557 : smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
562 : void *buffer)
9770 scrappy 563 ECB : {
2040 peter_e 564 GIC 1310557 : smgrsw[reln->smgr_which].smgr_read(reln, forknum, blocknum, buffer);
9770 scrappy 565 GBC 1310544 : }
9770 scrappy 566 ECB :
567 : /*
568 : * smgrwrite() -- Write the supplied buffer out.
569 : *
570 : * This is to be used only for updating already-existing blocks of a
571 : * relation (ie, those before the current EOF). To extend a relation,
572 : * use smgrextend().
573 : *
574 : * This is not a synchronous write -- the block is not necessarily
575 : * on disk at return, only dumped out to the kernel. However,
6887 tgl 576 : * provisions will be made to fsync the write before the next checkpoint.
577 : *
4622 rhaas 578 : * skipFsync indicates that the caller will make other provisions to
579 : * fsync the relation, so we needn't bother. Temporary relations also
580 : * do not require fsync.
581 : */
582 : void
5050 bruce 583 GIC 748947 : smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
584 : const void *buffer, bool skipFsync)
585 : {
2040 peter_e 586 748947 : smgrsw[reln->smgr_which].smgr_write(reln, forknum, blocknum,
587 : buffer, skipFsync);
8210 vadim4o 588 748947 : }
589 :
2606 andres 590 ECB :
591 : /*
592 : * smgrwriteback() -- Trigger kernel writeback for the supplied range of
593 : * blocks.
594 : */
595 : void
2606 andres 596 GIC 134299 : smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
597 : BlockNumber nblocks)
598 : {
2040 peter_e 599 134299 : smgrsw[reln->smgr_which].smgr_writeback(reln, forknum, blocknum,
600 : nblocks);
2606 andres 601 134299 : }
602 :
603 : /*
604 : * smgrnblocks() -- Calculate the number of blocks in the
605 : * supplied relation.
606 : */
607 : BlockNumber
5354 heikki.linnakangas 608 5666199 : smgrnblocks(SMgrRelation reln, ForkNumber forknum)
609 : {
610 : BlockNumber result;
611 :
817 akapila 612 ECB : /* Check and return if we get the cached value for the number of blocks. */
817 akapila 613 GIC 5666199 : result = smgrnblocks_cached(reln, forknum);
614 5666199 : if (result != InvalidBlockNumber)
817 akapila 615 CBC 3043823 : return result;
616 :
617 2622376 : result = smgrsw[reln->smgr_which].smgr_nblocks(reln, forknum);
618 :
817 akapila 619 GIC 2622358 : reln->smgr_cached_nblocks[forknum] = result;
620 :
621 2622358 : return result;
622 : }
623 :
624 : /*
817 akapila 625 ECB : * smgrnblocks_cached() -- Get the cached number of blocks in the supplied
626 : * relation.
627 : *
628 : * Returns an InvalidBlockNumber when not in recovery and when the relation
629 : * fork size is not cached.
630 : */
631 : BlockNumber
817 akapila 632 GIC 5682675 : smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum)
633 : {
634 : /*
635 : * For now, we only use cached values in recovery due to lack of a shared
636 : * invalidation mechanism for changes in file size.
982 tmunro 637 ECB : */
982 tmunro 638 GIC 5682675 : if (InRecovery && reln->smgr_cached_nblocks[forknum] != InvalidBlockNumber)
639 3045775 : return reln->smgr_cached_nblocks[forknum];
640 :
817 akapila 641 2636900 : return InvalidBlockNumber;
9770 scrappy 642 ECB : }
643 :
9629 vadim4o 644 : /*
645 : * smgrtruncate() -- Truncate the given forks of supplied relation to
1293 fujii 646 : * each specified numbers of blocks
647 : *
4813 tgl 648 : * The truncation is done immediately, so this can't be rolled back.
649 : *
1293 fujii 650 : * The caller must hold AccessExclusiveLock on the relation, to ensure that
651 : * other backends receive the smgr invalidation event that this function sends
652 : * before they access any forks of the relation again.
653 : */
654 : void
1293 fujii 655 GIC 494 : smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nblocks)
656 : {
657 : int i;
658 :
659 : /*
660 : * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will
6385 bruce 661 ECB : * just drop them without bothering to write the contents.
662 : */
271 rhaas 663 GNC 494 : DropRelationBuffers(reln, forknum, nforks, nblocks);
664 :
665 : /*
666 : * Send a shared-inval message to force other backends to close any smgr
4813 tgl 667 ECB : * references they may have for this rel. This is useful because they
4807 668 : * might have open file pointers to segments that got removed, and/or
669 : * smgr_targblock variables pointing past the new rel end. (The inval
4813 670 : * message will come back to our backend, too, causing a
671 : * probably-unnecessary local smgr flush. But we don't expect that this
672 : * is a performance-critical path.) As in the unlink code, we want to be
673 : * sure the message is sent before we start changing things on-disk.
674 : */
277 rhaas 675 GNC 494 : CacheInvalidateSmgr(reln->smgr_rlocator);
676 :
677 : /* Do the truncation */
1293 fujii 678 GIC 1157 : for (i = 0; i < nforks; i++)
679 : {
680 : /* Make the cached size is invalid if we encounter an error. */
982 tmunro 681 663 : reln->smgr_cached_nblocks[forknum[i]] = InvalidBlockNumber;
682 :
1293 fujii 683 663 : smgrsw[reln->smgr_which].smgr_truncate(reln, forknum[i], nblocks[i]);
1293 fujii 684 ECB :
685 : /*
686 : * We might as well update the local smgr_cached_nblocks values. The
687 : * smgr cache inval message that this function sent will cause other
688 : * backends to invalidate their copies of smgr_fsm_nblocks and
689 : * smgr_vm_nblocks, and these ones too at the next command boundary.
690 : * But these ensure they aren't outright wrong until then.
691 : */
982 tmunro 692 CBC 663 : reln->smgr_cached_nblocks[forknum[i]] = nblocks[i];
693 : }
9629 vadim4o 694 GIC 494 : }
695 :
696 : /*
697 : * smgrimmedsync() -- Force the specified relation to stable storage.
698 : *
699 : * Synchronously force all previous writes to the specified relation
700 : * down to disk.
701 : *
702 : * This is useful for building completely new relations (eg, new
703 : * indexes). Instead of incrementally WAL-logging the index build
6811 tgl 704 ECB : * steps, we can just write completed index pages to disk with smgrwrite
705 : * or smgrextend, and then fsync the completed index file before
706 : * committing the transaction. (This is sufficient for purposes of
6885 707 : * crash recovery, since it effectively duplicates forcing a checkpoint
708 : * for the completed index. But it is *not* sufficient if one wishes
709 : * to use the WAL log for PITR or replication purposes: in that case
6811 710 : * we have to make WAL entries as well.)
711 : *
4622 rhaas 712 : * The preceding writes should specify skipFsync = true to avoid
713 : * duplicative fsyncs.
714 : *
715 : * Note that you need to do FlushRelationBuffers() first if there is
716 : * any possibility that there are dirty buffers for the relation;
717 : * otherwise the sync is not very meaningful.
718 : */
719 : void
5354 heikki.linnakangas 720 GIC 59642 : smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
6885 tgl 721 ECB : {
2040 peter_e 722 GIC 59642 : smgrsw[reln->smgr_which].smgr_immedsync(reln, forknum);
6885 tgl 723 CBC 59642 : }
724 :
725 : /*
726 : * AtEOXact_SMgr
727 : *
728 : * This routine is called during transaction commit or abort (it doesn't
729 : * particularly care which). All transient SMgrRelation objects are closed.
730 : *
731 : * We do this as a compromise between wanting transient SMgrRelations to
732 : * live awhile (to amortize the costs of blind writes of multiple blocks)
733 : * and needing them to not live forever (since we're probably holding open
734 : * a kernel file descriptor for the underlying file, and we need to ensure
735 : * that gets closed reasonably soon if the file gets deleted).
736 : */
737 : void
3826 tgl 738 GIC 486175 : AtEOXact_SMgr(void)
739 : {
740 : dlist_mutable_iter iter;
741 :
742 : /*
743 : * Zap all unowned SMgrRelations. We rely on smgrclose() to remove each
744 : * one from the list.
745 : */
1474 tomas.vondra 746 535163 : dlist_foreach_modify(iter, &unowned_relns)
747 : {
1418 tgl 748 48988 : SMgrRelation rel = dlist_container(SMgrRelationData, node,
1418 tgl 749 ECB : iter.cur);
750 :
1474 tomas.vondra 751 CBC 48988 : Assert(rel->smgr_owner == NULL);
1474 tomas.vondra 752 ECB :
1474 tomas.vondra 753 GIC 48988 : smgrclose(rel);
754 : }
3826 tgl 755 486175 : }
756 :
757 : /*
758 : * This routine is called when we are ordered to release all open files by a
759 : * ProcSignalBarrier.
760 : */
761 : bool
421 tmunro 762 248 : ProcessBarrierSmgrRelease(void)
763 : {
337 764 248 : smgrreleaseall();
421 765 248 : return true;
766 : }
|