Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * shmem.c
4 : * create shared memory and initialize shared memory data structures.
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/storage/ipc/shmem.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : /*
16 : * POSTGRES processes share one or more regions of shared memory.
17 : * The shared memory is created by a postmaster and is inherited
18 : * by each backend via fork() (or, in some ports, via other OS-specific
19 : * methods). The routines in this file are used for allocating and
20 : * binding to shared memory data structures.
21 : *
22 : * NOTES:
23 : * (a) There are three kinds of shared memory data structures
24 : * available to POSTGRES: fixed-size structures, queues and hash
25 : * tables. Fixed-size structures contain things like global variables
26 : * for a module and should never be allocated after the shared memory
27 : * initialization phase. Hash tables have a fixed maximum size, but
28 : * their actual size can vary dynamically. When entries are added
29 : * to the table, more space is allocated. Queues link data structures
30 : * that have been allocated either within fixed-size structures or as hash
31 : * buckets. Each shared data structure has a string name to identify
32 : * it (assigned in the module that declares it).
33 : *
34 : * (b) During initialization, each module looks for its
35 : * shared data structures in a hash table called the "Shmem Index".
36 : * If the data structure is not present, the caller can allocate
37 : * a new one and initialize it. If the data structure is present,
38 : * the caller "attaches" to the structure by initializing a pointer
39 : * in the local address space.
40 : * The shmem index has two purposes: first, it gives us
41 : * a simple model of how the world looks when a backend process
42 : * initializes. If something is present in the shmem index,
43 : * it is initialized. If it is not, it is uninitialized. Second,
44 : * the shmem index allows us to allocate shared memory on demand
45 : * instead of trying to preallocate structures and hard-wire the
46 : * sizes and locations in header files. If you are using a lot
47 : * of shared memory in a lot of different places (and changing
48 : * things during development), this is important.
49 : *
50 : * (c) In standard Unix-ish environments, individual backends do not
51 : * need to re-establish their local pointers into shared memory, because
52 : * they inherit correct values of those variables via fork() from the
53 : * postmaster. However, this does not work in the EXEC_BACKEND case.
54 : * In ports using EXEC_BACKEND, new backends have to set up their local
55 : * pointers using the method described in (b) above.
56 : *
57 : * (d) memory allocation model: shared memory can never be
58 : * freed, once allocated. Each hash table has its own free list,
59 : * so hash buckets can be reused when an item is deleted. However,
60 : * if one hash table grows very large and then shrinks, its space
61 : * cannot be redistributed to other tables. We could build a simple
62 : * hash bucket garbage collector if need be. Right now, it seems
63 : * unnecessary.
64 : */
65 :
66 : #include "postgres.h"
67 :
68 : #include "access/transam.h"
69 : #include "fmgr.h"
70 : #include "funcapi.h"
71 : #include "miscadmin.h"
72 : #include "storage/lwlock.h"
73 : #include "storage/pg_shmem.h"
74 : #include "storage/shmem.h"
75 : #include "storage/spin.h"
76 : #include "utils/builtins.h"
77 :
78 : static void *ShmemAllocRaw(Size size, Size *allocated_size);
79 :
80 : /* shared memory global variables */
81 :
82 : static PGShmemHeader *ShmemSegHdr; /* shared mem segment header */
83 :
84 : static void *ShmemBase; /* start address of shared memory */
85 :
86 : static void *ShmemEnd; /* end+1 address of shared memory */
87 :
88 : slock_t *ShmemLock; /* spinlock for shared memory and LWLock
89 : * allocation */
90 :
91 : static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */
92 :
93 :
94 : /*
95 : * InitShmemAccess() --- set up basic pointers to shared memory.
96 : *
97 : * Note: the argument should be declared "PGShmemHeader *seghdr",
98 : * but we use void to avoid having to include ipc.h in shmem.h.
99 : */
100 : void
6304 tgl 101 CBC 1826 : InitShmemAccess(void *seghdr)
102 : {
7862 103 1826 : PGShmemHeader *shmhdr = (PGShmemHeader *) seghdr;
104 :
105 1826 : ShmemSegHdr = shmhdr;
5271 106 1826 : ShmemBase = (void *) shmhdr;
107 1826 : ShmemEnd = (char *) ShmemBase + shmhdr->totalsize;
6304 108 1826 : }
109 :
110 : /*
111 : * InitShmemAllocation() --- set up shared-memory space allocation.
112 : *
113 : * This should be called only in the postmaster or a standalone backend.
114 : */
115 : void
116 1826 : InitShmemAllocation(void)
117 : {
118 1826 : PGShmemHeader *shmhdr = ShmemSegHdr;
119 : char *aligned;
120 :
121 1826 : Assert(shmhdr != NULL);
122 :
123 : /*
124 : * Initialize the spinlock used by ShmemAlloc. We must use
125 : * ShmemAllocUnlocked, since obviously ShmemAlloc can't be called yet.
126 : */
2309 127 1826 : ShmemLock = (slock_t *) ShmemAllocUnlocked(sizeof(slock_t));
128 :
129 1826 : SpinLockInit(ShmemLock);
130 :
131 : /*
132 : * Allocations after this point should go through ShmemAlloc, which
133 : * expects to allocate everything on cache line boundaries. Make sure the
134 : * first allocation begins on a cache line boundary.
135 : */
2544 rhaas 136 1826 : aligned = (char *)
137 1826 : (CACHELINEALIGN((((char *) shmhdr) + shmhdr->freeoffset)));
138 1826 : shmhdr->freeoffset = aligned - (char *) shmhdr;
139 :
140 : /* ShmemIndex can't be set up yet (need LWLocks first) */
5271 tgl 141 1826 : shmhdr->index = NULL;
6304 142 1826 : ShmemIndex = (HTAB *) NULL;
143 :
144 : /*
145 : * Initialize ShmemVariableCache for transaction manager. (This doesn't
146 : * really belong here, but not worth moving.)
147 : */
148 1826 : ShmemVariableCache = (VariableCache)
6031 bruce 149 1826 : ShmemAlloc(sizeof(*ShmemVariableCache));
6304 tgl 150 1826 : memset(ShmemVariableCache, 0, sizeof(*ShmemVariableCache));
9770 scrappy 151 1826 : }
152 :
153 : /*
154 : * ShmemAlloc -- allocate max-aligned chunk from shared memory
155 : *
156 : * Throws error if request cannot be satisfied.
157 : *
158 : * Assumes ShmemLock and ShmemSegHdr are initialized.
159 : */
160 : void *
6020 tgl 161 18260 : ShmemAlloc(Size size)
162 : {
163 : void *newSpace;
164 : Size allocated_size;
165 :
1186 rhaas 166 18260 : newSpace = ShmemAllocRaw(size, &allocated_size);
2411 tgl 167 18260 : if (!newSpace)
2411 tgl 168 UBC 0 : ereport(ERROR,
169 : (errcode(ERRCODE_OUT_OF_MEMORY),
170 : errmsg("out of shared memory (%zu bytes requested)",
171 : size)));
2411 tgl 172 CBC 18260 : return newSpace;
173 : }
174 :
175 : /*
176 : * ShmemAllocNoError -- allocate max-aligned chunk from shared memory
177 : *
178 : * As ShmemAlloc, but returns NULL if out of space, rather than erroring.
179 : */
180 : void *
181 696871 : ShmemAllocNoError(Size size)
182 : {
183 : Size allocated_size;
184 :
1186 rhaas 185 696871 : return ShmemAllocRaw(size, &allocated_size);
186 : }
187 :
188 : /*
189 : * ShmemAllocRaw -- allocate align chunk and return allocated size
190 : *
191 : * Also sets *allocated_size to the number of bytes allocated, which will
192 : * be equal to the number requested plus any padding we choose to add.
193 : */
194 : static void *
195 819212 : ShmemAllocRaw(Size size, Size *allocated_size)
196 : {
197 : Size newStart;
198 : Size newFree;
199 : void *newSpace;
200 :
201 : /*
202 : * Ensure all space is adequately aligned. We used to only MAXALIGN this
203 : * space but experience has proved that on modern systems that is not good
204 : * enough. Many parts of the system are very sensitive to critical data
205 : * structures getting split across cache line boundaries. To avoid that,
206 : * attempt to align the beginning of the allocation to a cache line
207 : * boundary. The calling code will still need to be careful about how it
208 : * uses the allocated space - e.g. by padding each element in an array of
209 : * structures out to a power-of-two size - but without this, even that
210 : * won't be sufficient.
211 : */
2560 212 819212 : size = CACHELINEALIGN(size);
1186 213 819212 : *allocated_size = size;
214 :
2732 215 819212 : Assert(ShmemSegHdr != NULL);
216 :
7862 tgl 217 819212 : SpinLockAcquire(ShmemLock);
218 :
2732 rhaas 219 819212 : newStart = ShmemSegHdr->freeoffset;
220 :
7140 tgl 221 819212 : newFree = newStart + size;
2732 rhaas 222 819212 : if (newFree <= ShmemSegHdr->totalsize)
223 : {
5271 tgl 224 819212 : newSpace = (void *) ((char *) ShmemBase + newStart);
2732 rhaas 225 819212 : ShmemSegHdr->freeoffset = newFree;
226 : }
227 : else
9345 bruce 228 UBC 0 : newSpace = NULL;
229 :
7862 tgl 230 CBC 819212 : SpinLockRelease(ShmemLock);
231 :
232 : /* note this assert is okay with newSpace == NULL */
2544 rhaas 233 819212 : Assert(newSpace == (void *) CACHELINEALIGN(newSpace));
234 :
8986 bruce 235 819212 : return newSpace;
236 : }
237 :
238 : /*
239 : * ShmemAllocUnlocked -- allocate max-aligned chunk from shared memory
240 : *
241 : * Allocate space without locking ShmemLock. This should be used for,
242 : * and only for, allocations that must happen before ShmemLock is ready.
243 : *
244 : * We consider maxalign, rather than cachealign, sufficient here.
245 : */
246 : void *
2309 tgl 247 3652 : ShmemAllocUnlocked(Size size)
248 : {
249 : Size newStart;
250 : Size newFree;
251 : void *newSpace;
252 :
253 : /*
254 : * Ensure allocated space is adequately aligned.
255 : */
256 3652 : size = MAXALIGN(size);
257 :
258 3652 : Assert(ShmemSegHdr != NULL);
259 :
260 3652 : newStart = ShmemSegHdr->freeoffset;
261 :
262 3652 : newFree = newStart + size;
263 3652 : if (newFree > ShmemSegHdr->totalsize)
2309 tgl 264 UBC 0 : ereport(ERROR,
265 : (errcode(ERRCODE_OUT_OF_MEMORY),
266 : errmsg("out of shared memory (%zu bytes requested)",
267 : size)));
2309 tgl 268 CBC 3652 : ShmemSegHdr->freeoffset = newFree;
269 :
270 3652 : newSpace = (void *) ((char *) ShmemBase + newStart);
271 :
272 3652 : Assert(newSpace == (void *) MAXALIGN(newSpace));
273 :
274 3652 : return newSpace;
275 : }
276 :
277 : /*
278 : * ShmemAddrIsValid -- test if an address refers to shared memory
279 : *
280 : * Returns true if the pointer points within the shared memory segment.
281 : */
282 : bool
4444 heikki.linnakangas 283 105724 : ShmemAddrIsValid(const void *addr)
284 : {
5271 tgl 285 105724 : return (addr >= ShmemBase) && (addr < ShmemEnd);
286 : }
287 :
288 : /*
289 : * InitShmemIndex() --- set up or attach to shmem index table.
290 : */
291 : void
7862 292 1826 : InitShmemIndex(void)
293 : {
294 : HASHCTL info;
295 :
296 : /*
297 : * Create the shared memory shmem index.
298 : *
299 : * Since ShmemInitHash calls ShmemInitStruct, which expects the ShmemIndex
300 : * hashtable to exist already, we have a bit of a circularity problem in
301 : * initializing the ShmemIndex itself. The special "ShmemIndex" hash
302 : * table name will tell ShmemInitStruct to fake it.
303 : */
304 1826 : info.keysize = SHMEM_INDEX_KEYSIZE;
7860 305 1826 : info.entrysize = sizeof(ShmemIndexEnt);
306 :
7862 307 1826 : ShmemIndex = ShmemInitHash("ShmemIndex",
308 : SHMEM_INDEX_SIZE, SHMEM_INDEX_SIZE,
309 : &info,
310 : HASH_ELEM | HASH_STRINGS);
311 1826 : }
312 :
313 : /*
314 : * ShmemInitHash -- Create and initialize, or attach to, a
315 : * shared memory hash table.
316 : *
317 : * We assume caller is doing some kind of synchronization
318 : * so that two processes don't try to create/initialize the same
319 : * table at once. (In practice, all creations are done in the postmaster
320 : * process; child processes should always be attaching to existing tables.)
321 : *
322 : * max_size is the estimated maximum number of hashtable entries. This is
323 : * not a hard limit, but the access efficiency will degrade if it is
324 : * exceeded substantially (since it's used to compute directory size and
325 : * the hash table buckets will get overfull).
326 : *
327 : * init_size is the number of hashtable entries to preallocate. For a table
328 : * whose maximum size is certain, this should be equal to max_size; that
329 : * ensures that no run-time out-of-shared-memory failures can occur.
330 : *
331 : * *infoP and hash_flags must specify at least the entry sizes and key
332 : * comparison semantics (see hash_create()). Flag bits and values specific
333 : * to shared-memory hash tables are added here, except that callers may
334 : * choose to specify HASH_PARTITION and/or HASH_FIXED_SIZE.
335 : *
336 : * Note: before Postgres 9.0, this function returned NULL for some failure
337 : * cases. Now, it always throws error instead, so callers need not check
338 : * for NULL.
339 : */
340 : HTAB *
2118 341 12785 : ShmemInitHash(const char *name, /* table string name for shmem index */
342 : long init_size, /* initial table size */
343 : long max_size, /* max size of the table */
344 : HASHCTL *infoP, /* info about key and bucket size */
345 : int hash_flags) /* info about infoP */
346 : {
347 : bool found;
348 : void *location;
349 :
350 : /*
351 : * Hash tables allocated in shared memory have a fixed directory; it can't
352 : * grow or other backends wouldn't be able to find it. So, make sure we
353 : * make it big enough to start with.
354 : *
355 : * The shared memory allocator must be specified too.
356 : */
8443 357 12785 : infoP->dsize = infoP->max_dsize = hash_select_dirsize(max_size);
2411 358 12785 : infoP->alloc = ShmemAllocNoError;
7040 359 12785 : hash_flags |= HASH_SHARED_MEM | HASH_ALLOC | HASH_DIRSIZE;
360 :
361 : /* look it up in the shmem index */
8812 362 12785 : location = ShmemInitStruct(name,
363 : hash_get_shared_size(infoP, hash_flags),
364 : &found);
365 :
366 : /*
367 : * if it already exists, attach to it rather than allocate and initialize
368 : * new space
369 : */
9345 bruce 370 12785 : if (found)
9345 bruce 371 UBC 0 : hash_flags |= HASH_ATTACH;
372 :
373 : /* Pass location of hashtable header to hash_create */
7860 tgl 374 CBC 12785 : infoP->hctl = (HASHHDR *) location;
375 :
7856 376 12785 : return hash_create(name, init_size, infoP, hash_flags);
377 : }
378 :
379 : /*
380 : * ShmemInitStruct -- Create/attach to a structure in shared memory.
381 : *
382 : * This is called during initialization to find or allocate
383 : * a data structure in shared memory. If no other process
384 : * has created the structure, this routine allocates space
385 : * for it. If it exists already, a pointer to the existing
386 : * structure is returned.
387 : *
388 : * Returns: pointer to the object. *foundPtr is set true if the object was
389 : * already in the shmem index (hence, already initialized).
390 : *
391 : * Note: before Postgres 9.0, this function returned NULL for some failure
392 : * cases. Now, it always throws error instead, so callers need not check
393 : * for NULL.
394 : */
395 : void *
396 105909 : ShmemInitStruct(const char *name, Size size, bool *foundPtr)
397 : {
398 : ShmemIndexEnt *result;
399 : void *structPtr;
400 :
6304 401 105909 : LWLockAcquire(ShmemIndexLock, LW_EXCLUSIVE);
402 :
9052 bruce 403 105909 : if (!ShmemIndex)
404 : {
6304 tgl 405 1826 : PGShmemHeader *shmemseghdr = ShmemSegHdr;
406 :
407 : /* Must be trying to create/attach to ShmemIndex itself */
6579 neilc 408 1826 : Assert(strcmp(name, "ShmemIndex") == 0);
409 :
7050 bruce 410 1826 : if (IsUnderPostmaster)
411 : {
412 : /* Must be initializing a (non-standalone) backend */
5271 tgl 413 UBC 0 : Assert(shmemseghdr->index != NULL);
414 0 : structPtr = shmemseghdr->index;
2062 peter_e 415 0 : *foundPtr = true;
416 : }
417 : else
418 : {
419 : /*
420 : * If the shmem index doesn't exist, we are bootstrapping: we must
421 : * be trying to init the shmem index itself.
422 : *
423 : * Notice that the ShmemIndexLock is released before the shmem
424 : * index has been initialized. This should be OK because no other
425 : * process can be accessing shared memory yet.
426 : */
5271 tgl 427 CBC 1826 : Assert(shmemseghdr->index == NULL);
6304 428 1826 : structPtr = ShmemAlloc(size);
5271 429 1826 : shmemseghdr->index = structPtr;
2062 peter_e 430 1826 : *foundPtr = false;
431 : }
6105 tgl 432 1826 : LWLockRelease(ShmemIndexLock);
6304 433 1826 : return structPtr;
434 : }
435 :
436 : /* look it up in the shmem index */
437 : result = (ShmemIndexEnt *)
6038 438 104083 : hash_search(ShmemIndex, name, HASH_ENTER_NULL, foundPtr);
439 :
9345 bruce 440 104083 : if (!result)
441 : {
6304 tgl 442 UBC 0 : LWLockRelease(ShmemIndexLock);
7199 443 0 : ereport(ERROR,
444 : (errcode(ERRCODE_OUT_OF_MEMORY),
445 : errmsg("could not create ShmemIndex entry for data structure \"%s\"",
446 : name)));
447 : }
448 :
8167 tgl 449 CBC 104083 : if (*foundPtr)
450 : {
451 : /*
452 : * Structure is in the shmem index so someone else has allocated it
453 : * already. The size better be the same as the size we are trying to
454 : * initialize to, or there is a name conflict (or worse).
455 : */
9345 bruce 456 2 : if (result->size != size)
457 : {
6304 tgl 458 UBC 0 : LWLockRelease(ShmemIndexLock);
4729 459 0 : ereport(ERROR,
460 : (errmsg("ShmemIndex entry size is wrong for data structure"
461 : " \"%s\": expected %zu, actual %zu",
462 : name, size, result->size)));
463 : }
5271 tgl 464 CBC 2 : structPtr = result->location;
465 : }
466 : else
467 : {
468 : Size allocated_size;
469 :
470 : /* It isn't in the table yet. allocate and initialize it */
1186 rhaas 471 104081 : structPtr = ShmemAllocRaw(size, &allocated_size);
4729 tgl 472 104081 : if (structPtr == NULL)
473 : {
474 : /* out of memory; remove the failed ShmemIndex entry */
6038 tgl 475 UBC 0 : hash_search(ShmemIndex, name, HASH_REMOVE, NULL);
6304 476 0 : LWLockRelease(ShmemIndexLock);
4729 477 0 : ereport(ERROR,
478 : (errcode(ERRCODE_OUT_OF_MEMORY),
479 : errmsg("not enough shared memory for data structure"
480 : " \"%s\" (%zu bytes requested)",
481 : name, size)));
482 : }
9345 bruce 483 CBC 104081 : result->size = size;
1186 rhaas 484 104081 : result->allocated_size = allocated_size;
5271 tgl 485 104081 : result->location = structPtr;
486 : }
487 :
6304 488 104083 : LWLockRelease(ShmemIndexLock);
489 :
4729 490 104083 : Assert(ShmemAddrIsValid(structPtr));
491 :
2544 rhaas 492 104083 : Assert(structPtr == (void *) CACHELINEALIGN(structPtr));
493 :
8986 bruce 494 104083 : return structPtr;
495 : }
496 :
497 :
498 : /*
499 : * Add two Size values, checking for overflow
500 : */
501 : Size
6441 tgl 502 548370 : add_size(Size s1, Size s2)
503 : {
504 : Size result;
505 :
506 548370 : result = s1 + s2;
507 : /* We are assuming Size is an unsigned type here... */
508 548370 : if (result < s1 || result < s2)
6441 tgl 509 UBC 0 : ereport(ERROR,
510 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
511 : errmsg("requested shared memory size overflows size_t")));
6441 tgl 512 CBC 548370 : return result;
513 : }
514 :
515 : /*
516 : * Multiply two Size values, checking for overflow
517 : */
518 : Size
519 271649 : mul_size(Size s1, Size s2)
520 : {
521 : Size result;
522 :
523 271649 : if (s1 == 0 || s2 == 0)
524 14672 : return 0;
525 256977 : result = s1 * s2;
526 : /* We are assuming Size is an unsigned type here... */
527 256977 : if (result / s2 != s1)
6441 tgl 528 UBC 0 : ereport(ERROR,
529 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
530 : errmsg("requested shared memory size overflows size_t")));
6441 tgl 531 CBC 256977 : return result;
532 : }
533 :
534 : /* SQL SRF showing allocated shared memory */
535 : Datum
1186 rhaas 536 3 : pg_get_shmem_allocations(PG_FUNCTION_ARGS)
537 : {
538 : #define PG_GET_SHMEM_SIZES_COLS 4
539 3 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
540 : HASH_SEQ_STATUS hstat;
541 : ShmemIndexEnt *ent;
1060 tgl 542 3 : Size named_allocated = 0;
543 : Datum values[PG_GET_SHMEM_SIZES_COLS];
544 : bool nulls[PG_GET_SHMEM_SIZES_COLS];
545 :
173 michael 546 3 : InitMaterializedSRF(fcinfo, 0);
547 :
1186 rhaas 548 3 : LWLockAcquire(ShmemIndexLock, LW_SHARED);
549 :
550 3 : hash_seq_init(&hstat, ShmemIndex);
551 :
552 : /* output all allocated entries */
553 3 : memset(nulls, 0, sizeof(nulls));
554 176 : while ((ent = (ShmemIndexEnt *) hash_seq_search(&hstat)) != NULL)
555 : {
556 173 : values[0] = CStringGetTextDatum(ent->key);
557 173 : values[1] = Int64GetDatum((char *) ent->location - (char *) ShmemSegHdr);
558 173 : values[2] = Int64GetDatum(ent->size);
559 173 : values[3] = Int64GetDatum(ent->allocated_size);
560 173 : named_allocated += ent->allocated_size;
561 :
398 michael 562 173 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
563 : values, nulls);
564 : }
565 :
566 : /* output shared memory allocated but not counted via the shmem index */
1186 rhaas 567 3 : values[0] = CStringGetTextDatum("<anonymous>");
568 3 : nulls[1] = true;
569 3 : values[2] = Int64GetDatum(ShmemSegHdr->freeoffset - named_allocated);
570 3 : values[3] = values[2];
398 michael 571 3 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
572 :
573 : /* output as-of-yet unused shared memory */
1186 rhaas 574 3 : nulls[0] = true;
575 3 : values[1] = Int64GetDatum(ShmemSegHdr->freeoffset);
576 3 : nulls[1] = false;
577 3 : values[2] = Int64GetDatum(ShmemSegHdr->totalsize - ShmemSegHdr->freeoffset);
578 3 : values[3] = values[2];
398 michael 579 3 : tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
580 :
1186 rhaas 581 3 : LWLockRelease(ShmemIndexLock);
582 :
583 3 : return (Datum) 0;
584 : }
|