Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * dsm.c
4 : * manage dynamic shared memory segments
5 : *
6 : * This file provides a set of services to make programming with dynamic
7 : * shared memory segments more convenient. Unlike the low-level
8 : * facilities provided by dsm_impl.h and dsm_impl.c, mappings and segments
9 : * created using this module will be cleaned up automatically. Mappings
10 : * will be removed when the resource owner under which they were created
11 : * is cleaned up, unless dsm_pin_mapping() is used, in which case they
12 : * have session lifespan. Segments will be removed when there are no
13 : * remaining mappings, or at postmaster shutdown in any case. After a
14 : * hard postmaster crash, remaining segments will be removed, if they
15 : * still exist, at the next postmaster startup.
16 : *
17 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
18 : * Portions Copyright (c) 1994, Regents of the University of California
19 : *
20 : *
21 : * IDENTIFICATION
22 : * src/backend/storage/ipc/dsm.c
23 : *
24 : *-------------------------------------------------------------------------
25 : */
26 :
27 : #include "postgres.h"
28 :
29 : #include <fcntl.h>
30 : #include <unistd.h>
31 : #ifndef WIN32
32 : #include <sys/mman.h>
33 : #endif
34 : #include <sys/stat.h>
35 :
36 : #include "common/pg_prng.h"
37 : #include "lib/ilist.h"
38 : #include "miscadmin.h"
39 : #include "port/pg_bitutils.h"
40 : #include "storage/dsm.h"
41 : #include "storage/ipc.h"
42 : #include "storage/lwlock.h"
43 : #include "storage/pg_shmem.h"
44 : #include "utils/freepage.h"
45 : #include "utils/guc.h"
46 : #include "utils/memutils.h"
47 : #include "utils/resowner_private.h"
48 :
49 : #define PG_DYNSHMEM_CONTROL_MAGIC 0x9a503d32
50 :
51 : #define PG_DYNSHMEM_FIXED_SLOTS 64
52 : #define PG_DYNSHMEM_SLOTS_PER_BACKEND 5
53 :
54 : #define INVALID_CONTROL_SLOT ((uint32) -1)
55 :
56 : /* Backend-local tracking for on-detach callbacks. */
57 : typedef struct dsm_segment_detach_callback
58 : {
59 : on_dsm_detach_callback function;
60 : Datum arg;
61 : slist_node node;
62 : } dsm_segment_detach_callback;
63 :
64 : /* Backend-local state for a dynamic shared memory segment. */
65 : struct dsm_segment
66 : {
67 : dlist_node node; /* List link in dsm_segment_list. */
68 : ResourceOwner resowner; /* Resource owner. */
69 : dsm_handle handle; /* Segment name. */
70 : uint32 control_slot; /* Slot in control segment. */
71 : void *impl_private; /* Implementation-specific private data. */
72 : void *mapped_address; /* Mapping address, or NULL if unmapped. */
73 : Size mapped_size; /* Size of our mapping. */
74 : slist_head on_detach; /* On-detach callbacks. */
75 : };
76 :
77 : /* Shared-memory state for a dynamic shared memory segment. */
78 : typedef struct dsm_control_item
79 : {
80 : dsm_handle handle;
81 : uint32 refcnt; /* 2+ = active, 1 = moribund, 0 = gone */
82 : size_t first_page;
83 : size_t npages;
84 : void *impl_private_pm_handle; /* only needed on Windows */
85 : bool pinned;
86 : } dsm_control_item;
87 :
88 : /* Layout of the dynamic shared memory control segment. */
89 : typedef struct dsm_control_header
90 : {
91 : uint32 magic;
92 : uint32 nitems;
93 : uint32 maxitems;
94 : dsm_control_item item[FLEXIBLE_ARRAY_MEMBER];
95 : } dsm_control_header;
96 :
97 : static void dsm_cleanup_for_mmap(void);
98 : static void dsm_postmaster_shutdown(int code, Datum arg);
99 : static dsm_segment *dsm_create_descriptor(void);
100 : static bool dsm_control_segment_sane(dsm_control_header *control,
101 : Size mapped_size);
102 : static uint64 dsm_control_bytes_needed(uint32 nitems);
103 : static inline dsm_handle make_main_region_dsm_handle(int slot);
104 : static inline bool is_main_region_dsm_handle(dsm_handle handle);
105 :
106 : /* Has this backend initialized the dynamic shared memory system yet? */
107 : static bool dsm_init_done = false;
108 :
109 : /* Preallocated DSM space in the main shared memory region. */
110 : static void *dsm_main_space_begin = NULL;
111 :
112 : /*
113 : * List of dynamic shared memory segments used by this backend.
114 : *
115 : * At process exit time, we must decrement the reference count of each
116 : * segment we have attached; this list makes it possible to find all such
117 : * segments.
118 : *
119 : * This list should always be empty in the postmaster. We could probably
120 : * allow the postmaster to map dynamic shared memory segments before it
121 : * begins to start child processes, provided that each process adjusted
122 : * the reference counts for those segments in the control segment at
123 : * startup time, but there's no obvious need for such a facility, which
124 : * would also be complex to handle in the EXEC_BACKEND case. Once the
125 : * postmaster has begun spawning children, there's an additional problem:
126 : * each new mapping would require an update to the control segment,
127 : * which requires locking, in which the postmaster must not be involved.
128 : */
129 : static dlist_head dsm_segment_list = DLIST_STATIC_INIT(dsm_segment_list);
130 :
131 : /*
132 : * Control segment information.
133 : *
134 : * Unlike ordinary shared memory segments, the control segment is not
135 : * reference counted; instead, it lasts for the postmaster's entire
136 : * life cycle. For simplicity, it doesn't have a dsm_segment object either.
137 : */
138 : static dsm_handle dsm_control_handle;
139 : static dsm_control_header *dsm_control;
140 : static Size dsm_control_mapped_size = 0;
141 : static void *dsm_control_impl_private = NULL;
142 :
143 : /*
144 : * Start up the dynamic shared memory system.
145 : *
146 : * This is called just once during each cluster lifetime, at postmaster
147 : * startup time.
148 : */
149 : void
3288 rhaas 150 CBC 1826 : dsm_postmaster_startup(PGShmemHeader *shim)
151 : {
3469 152 1826 : void *dsm_control_address = NULL;
153 : uint32 maxitems;
154 : Size segsize;
155 :
156 1826 : Assert(!IsUnderPostmaster);
157 :
158 : /*
159 : * If we're using the mmap implementations, clean up any leftovers.
160 : * Cleanup isn't needed on Windows, and happens earlier in startup for
161 : * POSIX and System V shared memory, via a direct call to
162 : * dsm_cleanup_using_control_segment.
163 : */
3288 164 1826 : if (dynamic_shared_memory_type == DSM_IMPL_MMAP)
3288 rhaas 165 UBC 0 : dsm_cleanup_for_mmap();
166 :
167 : /* Determine size for new control segment. */
3469 rhaas 168 CBC 1826 : maxitems = PG_DYNSHMEM_FIXED_SLOTS
362 169 1826 : + PG_DYNSHMEM_SLOTS_PER_BACKEND * MaxBackends;
3469 170 1826 : elog(DEBUG2, "dynamic shared memory system will support %u segments",
171 : maxitems);
172 1826 : segsize = dsm_control_bytes_needed(maxitems);
173 :
174 : /*
175 : * Loop until we find an unused identifier for the new control segment. We
176 : * sometimes use DSM_HANDLE_INVALID as a sentinel value indicating "no
177 : * control segment", so avoid generating that value for a real handle.
178 : */
179 : for (;;)
3469 rhaas 180 ECB : {
3469 rhaas 181 CBC 1826 : Assert(dsm_control_address == NULL);
3469 rhaas 182 GIC 1826 : Assert(dsm_control_mapped_size == 0);
497 tgl 183 ECB : /* Use even numbers only */
497 tgl 184 CBC 1826 : dsm_control_handle = pg_prng_uint32(&pg_global_prng_state) << 1;
2336 rhaas 185 GBC 1826 : if (dsm_control_handle == DSM_HANDLE_INVALID)
3288 rhaas 186 LBC 0 : continue;
3469 rhaas 187 GIC 1826 : if (dsm_impl_op(DSM_OP_CREATE, dsm_control_handle, segsize,
188 : &dsm_control_impl_private, &dsm_control_address,
3469 rhaas 189 ECB : &dsm_control_mapped_size, ERROR))
3469 rhaas 190 GIC 1826 : break;
3469 rhaas 191 ECB : }
3469 rhaas 192 CBC 1826 : dsm_control = dsm_control_address;
3288 193 1826 : on_shmem_exit(dsm_postmaster_shutdown, PointerGetDatum(shim));
3443 rhaas 194 GIC 1826 : elog(DEBUG2,
195 : "created dynamic shared memory control segment %u (%zu bytes)",
3363 tgl 196 ECB : dsm_control_handle, segsize);
3288 rhaas 197 GIC 1826 : shim->dsm_control = dsm_control_handle;
198 :
3469 rhaas 199 ECB : /* Initialize control segment. */
3469 rhaas 200 CBC 1826 : dsm_control->magic = PG_DYNSHMEM_CONTROL_MAGIC;
201 1826 : dsm_control->nitems = 0;
202 1826 : dsm_control->maxitems = maxitems;
3469 rhaas 203 GIC 1826 : }
204 :
205 : /*
206 : * Determine whether the control segment from the previous postmaster
207 : * invocation still exists. If so, remove the dynamic shared memory
208 : * segments to which it refers, and then the control segment itself.
209 : */
3288 rhaas 210 ECB : void
3288 rhaas 211 GIC 2 : dsm_cleanup_using_control_segment(dsm_handle old_control_handle)
3469 rhaas 212 ECB : {
3469 rhaas 213 CBC 2 : void *mapped_address = NULL;
214 2 : void *junk_mapped_address = NULL;
215 2 : void *impl_private = NULL;
216 2 : void *junk_impl_private = NULL;
3450 217 2 : Size mapped_size = 0;
3450 rhaas 218 GIC 2 : Size junk_mapped_size = 0;
219 : uint32 nitems;
220 : uint32 i;
221 : dsm_control_header *old_control;
222 :
223 : /*
224 : * Try to attach the segment. If this fails, it probably just means that
225 : * the operating system has been rebooted and the segment no longer
226 : * exists, or an unrelated process has used the same shm ID. So just fall
227 : * out quietly.
3469 rhaas 228 ECB : */
3469 rhaas 229 GIC 2 : if (!dsm_impl_op(DSM_OP_ATTACH, old_control_handle, 0, &impl_private,
3469 rhaas 230 EUB : &mapped_address, &mapped_size, DEBUG1))
3469 rhaas 231 UIC 0 : return;
232 :
233 : /*
234 : * We've managed to reattach it, but the contents might not be sane. If
235 : * they aren't, we disregard the segment after all.
3469 rhaas 236 ECB : */
3469 rhaas 237 CBC 2 : old_control = (dsm_control_header *) mapped_address;
3469 rhaas 238 GIC 2 : if (!dsm_control_segment_sane(old_control, mapped_size))
3469 rhaas 239 EUB : {
3469 rhaas 240 UIC 0 : dsm_impl_op(DSM_OP_DETACH, old_control_handle, 0, &impl_private,
3469 rhaas 241 EUB : &mapped_address, &mapped_size, LOG);
3469 rhaas 242 UIC 0 : return;
243 : }
244 :
245 : /*
246 : * OK, the control segment looks basically valid, so we can use it to get
247 : * a list of segments that need to be removed.
3469 rhaas 248 ECB : */
3469 rhaas 249 CBC 2 : nitems = old_control->nitems;
3469 rhaas 250 GIC 4 : for (i = 0; i < nitems; ++i)
251 : {
252 : dsm_handle handle;
253 : uint32 refcnt;
254 :
3469 rhaas 255 ECB : /* If the reference count is 0, the slot is actually unused. */
3469 rhaas 256 CBC 2 : refcnt = old_control->item[i].refcnt;
3469 rhaas 257 GBC 2 : if (refcnt == 0)
3469 rhaas 258 UIC 0 : continue;
259 :
982 tmunro 260 ECB : /* If it was using the main shmem area, there is nothing to do. */
3469 rhaas 261 CBC 2 : handle = old_control->item[i].handle;
982 tmunro 262 GBC 2 : if (is_main_region_dsm_handle(handle))
982 tmunro 263 UIC 0 : continue;
264 :
982 tmunro 265 ECB : /* Log debugging information. */
3469 rhaas 266 GIC 2 : elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u (reference count %u)",
267 : handle, refcnt);
268 :
3469 rhaas 269 ECB : /* Destroy the referenced segment. */
3469 rhaas 270 GIC 2 : dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
271 : &junk_mapped_address, &junk_mapped_size, LOG);
272 : }
273 :
3469 rhaas 274 ECB : /* Destroy the old control segment, too. */
3469 rhaas 275 GIC 2 : elog(DEBUG2,
276 : "cleaning up dynamic shared memory control segment with ID %u",
3469 rhaas 277 ECB : old_control_handle);
3469 rhaas 278 GIC 2 : dsm_impl_op(DSM_OP_DESTROY, old_control_handle, 0, &impl_private,
279 : &mapped_address, &mapped_size, LOG);
280 : }
281 :
282 : /*
283 : * When we're using the mmap shared memory implementation, "shared memory"
284 : * segments might even manage to survive an operating system reboot.
285 : * But there's no guarantee as to exactly what will survive: some segments
286 : * may survive, and others may not, and the contents of some may be out
287 : * of date. In particular, the control segment may be out of date, so we
288 : * can't rely on it to figure out what to remove. However, since we know
289 : * what directory contains the files we used as shared memory, we can simply
290 : * scan the directory and blow everything away that shouldn't be there.
291 : */
3469 rhaas 292 EUB : static void
3469 rhaas 293 UIC 0 : dsm_cleanup_for_mmap(void)
294 : {
295 : DIR *dir;
296 : struct dirent *dent;
297 :
1952 tgl 298 EUB : /* Scan the directory for something with a name of the correct format. */
1952 tgl 299 UIC 0 : dir = AllocateDir(PG_DYNSHMEM_DIR);
1952 tgl 300 EUB :
3408 rhaas 301 UIC 0 : while ((dent = ReadDir(dir, PG_DYNSHMEM_DIR)) != NULL)
3469 rhaas 302 EUB : {
3469 rhaas 303 UIC 0 : if (strncmp(dent->d_name, PG_DYNSHMEM_MMAP_FILE_PREFIX,
304 : strlen(PG_DYNSHMEM_MMAP_FILE_PREFIX)) == 0)
305 : {
306 : char buf[MAXPGPATH + sizeof(PG_DYNSHMEM_DIR)];
3260 bruce 307 EUB :
2189 peter_e 308 UIC 0 : snprintf(buf, sizeof(buf), PG_DYNSHMEM_DIR "/%s", dent->d_name);
3469 rhaas 309 EUB :
3469 rhaas 310 UIC 0 : elog(DEBUG2, "removing file \"%s\"", buf);
311 :
3469 rhaas 312 EUB : /* We found a matching file; so remove it. */
3469 rhaas 313 UBC 0 : if (unlink(buf) != 0)
3469 rhaas 314 UIC 0 : ereport(ERROR,
315 : (errcode_for_file_access(),
316 : errmsg("could not remove file \"%s\": %m", buf)));
317 : }
318 : }
319 :
3469 rhaas 320 EUB : /* Cleanup complete. */
3408 rhaas 321 UBC 0 : FreeDir(dir);
3469 rhaas 322 UIC 0 : }
323 :
324 : /*
325 : * At shutdown time, we iterate over the control segment and remove all
326 : * remaining dynamic shared memory segments. We avoid throwing errors here;
327 : * the postmaster is shutting down either way, and this is just non-critical
328 : * resource cleanup.
329 : */
3469 rhaas 330 ECB : static void
3469 rhaas 331 GIC 1826 : dsm_postmaster_shutdown(int code, Datum arg)
332 : {
333 : uint32 nitems;
334 : uint32 i;
3469 rhaas 335 ECB : void *dsm_control_address;
3469 rhaas 336 CBC 1826 : void *junk_mapped_address = NULL;
337 1826 : void *junk_impl_private = NULL;
3450 338 1826 : Size junk_mapped_size = 0;
3288 rhaas 339 GIC 1826 : PGShmemHeader *shim = (PGShmemHeader *) DatumGetPointer(arg);
340 :
341 : /*
342 : * If some other backend exited uncleanly, it might have corrupted the
343 : * control segment while it was dying. In that case, we warn and ignore
344 : * the contents of the control segment. This may end up leaving behind
345 : * stray shared memory segments, but there's not much we can do about that
346 : * if the metadata is gone.
3469 rhaas 347 ECB : */
3469 rhaas 348 CBC 1826 : nitems = dsm_control->nitems;
3469 rhaas 349 GIC 1826 : if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size))
3469 rhaas 350 EUB : {
3469 rhaas 351 UIC 0 : ereport(LOG,
3469 rhaas 352 EUB : (errmsg("dynamic shared memory control segment is corrupt")));
3469 rhaas 353 UIC 0 : return;
354 : }
355 :
3469 rhaas 356 ECB : /* Remove any remaining segments. */
3469 rhaas 357 GIC 3126 : for (i = 0; i < nitems; ++i)
358 : {
359 : dsm_handle handle;
360 :
3469 rhaas 361 ECB : /* If the reference count is 0, the slot is actually unused. */
3469 rhaas 362 CBC 1300 : if (dsm_control->item[i].refcnt == 0)
3469 rhaas 363 GIC 40 : continue;
3469 rhaas 364 ECB :
3469 rhaas 365 CBC 1260 : handle = dsm_control->item[i].handle;
982 tmunro 366 GBC 1260 : if (is_main_region_dsm_handle(handle))
982 tmunro 367 UIC 0 : continue;
368 :
982 tmunro 369 ECB : /* Log debugging information. */
3469 rhaas 370 GIC 1260 : elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u",
371 : handle);
372 :
3469 rhaas 373 ECB : /* Destroy the segment. */
3469 rhaas 374 GIC 1260 : dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
375 : &junk_mapped_address, &junk_mapped_size, LOG);
376 : }
377 :
3469 rhaas 378 ECB : /* Remove the control segment itself. */
3469 rhaas 379 GIC 1826 : elog(DEBUG2,
380 : "cleaning up dynamic shared memory control segment with ID %u",
3469 rhaas 381 ECB : dsm_control_handle);
3469 rhaas 382 CBC 1826 : dsm_control_address = dsm_control;
3469 rhaas 383 GIC 1826 : dsm_impl_op(DSM_OP_DESTROY, dsm_control_handle, 0,
384 : &dsm_control_impl_private, &dsm_control_address,
3469 rhaas 385 ECB : &dsm_control_mapped_size, LOG);
3469 rhaas 386 CBC 1826 : dsm_control = dsm_control_address;
3288 rhaas 387 GIC 1826 : shim->dsm_control = 0;
388 : }
389 :
390 : /*
391 : * Prepare this backend for dynamic shared memory usage. Under EXEC_BACKEND,
392 : * we must reread the state file and map the control segment; in other cases,
393 : * we'll have inherited the postmaster's mapping and global variables.
394 : */
3469 rhaas 395 ECB : static void
3469 rhaas 396 GIC 12455 : dsm_backend_startup(void)
397 : {
398 : #ifdef EXEC_BACKEND
399 : if (IsUnderPostmaster)
400 : {
401 : void *control_address = NULL;
402 :
403 : /* Attach control segment. */
404 : Assert(dsm_control_handle != 0);
405 : dsm_impl_op(DSM_OP_ATTACH, dsm_control_handle, 0,
406 : &dsm_control_impl_private, &control_address,
407 : &dsm_control_mapped_size, ERROR);
408 : dsm_control = control_address;
409 : /* If control segment doesn't look sane, something is badly wrong. */
410 : if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size))
411 : {
412 : dsm_impl_op(DSM_OP_DETACH, dsm_control_handle, 0,
413 : &dsm_control_impl_private, &control_address,
414 : &dsm_control_mapped_size, WARNING);
415 : ereport(FATAL,
416 : (errcode(ERRCODE_INTERNAL_ERROR),
417 : errmsg("dynamic shared memory control segment is not valid")));
418 : }
419 : }
420 : #endif
3469 rhaas 421 ECB :
3469 rhaas 422 CBC 12455 : dsm_init_done = true;
3469 rhaas 423 GIC 12455 : }
424 :
425 : #ifdef EXEC_BACKEND
426 : /*
427 : * When running under EXEC_BACKEND, we get a callback here when the main
428 : * shared memory segment is re-attached, so that we can record the control
429 : * handle retrieved from it.
430 : */
431 : void
432 : dsm_set_control_handle(dsm_handle h)
433 : {
434 : Assert(dsm_control_handle == 0 && h != 0);
435 : dsm_control_handle = h;
436 : }
437 : #endif
438 :
439 : /*
440 : * Reserve some space in the main shared memory segment for DSM segments.
441 : */
982 tmunro 442 ECB : size_t
982 tmunro 443 GIC 4564 : dsm_estimate_size(void)
982 tmunro 444 ECB : {
982 tmunro 445 GIC 4564 : return 1024 * 1024 * (size_t) min_dynamic_shared_memory;
446 : }
447 :
448 : /*
449 : * Initialize space in the main shared memory segment for DSM segments.
450 : */
982 tmunro 451 ECB : void
982 tmunro 452 GIC 1826 : dsm_shmem_init(void)
982 tmunro 453 ECB : {
982 tmunro 454 GIC 1826 : size_t size = dsm_estimate_size();
455 : bool found;
982 tmunro 456 ECB :
982 tmunro 457 CBC 1826 : if (size == 0)
982 tmunro 458 GIC 1826 : return;
982 tmunro 459 EUB :
982 tmunro 460 UBC 0 : dsm_main_space_begin = ShmemInitStruct("Preallocated DSM", size, &found);
982 tmunro 461 UIC 0 : if (!found)
982 tmunro 462 EUB : {
982 tmunro 463 UBC 0 : FreePageManager *fpm = (FreePageManager *) dsm_main_space_begin;
982 tmunro 464 UIC 0 : size_t first_page = 0;
465 : size_t pages;
466 :
982 tmunro 467 EUB : /* Reserve space for the FreePageManager. */
982 tmunro 468 UBC 0 : while (first_page * FPM_PAGE_SIZE < sizeof(FreePageManager))
982 tmunro 469 UIC 0 : ++first_page;
470 :
982 tmunro 471 EUB : /* Initialize it and give it all the rest of the space. */
982 tmunro 472 UBC 0 : FreePageManagerInitialize(fpm, dsm_main_space_begin);
473 0 : pages = (size / FPM_PAGE_SIZE) - first_page;
982 tmunro 474 UIC 0 : FreePageManagerPut(fpm, first_page, pages);
475 : }
476 : }
477 :
478 : /*
479 : * Create a new dynamic shared memory segment.
480 : *
481 : * If there is a non-NULL CurrentResourceOwner, the new segment is associated
482 : * with it and must be detached before the resource owner releases, or a
483 : * warning will be logged. If CurrentResourceOwner is NULL, the segment
484 : * remains attached until explicitly detached or the session ends.
485 : * Creating with a NULL CurrentResourceOwner is equivalent to creating
486 : * with a non-NULL CurrentResourceOwner and then calling dsm_pin_mapping.
487 : */
3469 rhaas 488 ECB : dsm_segment *
2943 rhaas 489 GIC 1824 : dsm_create(Size size, int flags)
490 : {
491 : dsm_segment *seg;
492 : uint32 i;
3260 bruce 493 ECB : uint32 nitems;
982 tmunro 494 CBC 1824 : size_t npages = 0;
495 1824 : size_t first_page = 0;
496 1824 : FreePageManager *dsm_main_space_fpm = dsm_main_space_begin;
982 tmunro 497 GIC 1824 : bool using_main_dsm_region = false;
498 :
499 : /*
500 : * Unsafe in postmaster. It might seem pointless to allow use of dsm in
501 : * single user mode, but otherwise some subsystems will need dedicated
502 : * single user mode code paths.
368 andres 503 ECB : */
368 andres 504 GIC 1824 : Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
3469 rhaas 505 ECB :
3469 rhaas 506 CBC 1824 : if (!dsm_init_done)
3469 rhaas 507 GIC 1199 : dsm_backend_startup();
508 :
2943 rhaas 509 ECB : /* Create a new segment descriptor. */
2943 rhaas 510 GIC 1824 : seg = dsm_create_descriptor();
511 :
512 : /*
513 : * Lock the control segment while we try to allocate from the main shared
514 : * memory area, if configured.
982 tmunro 515 ECB : */
982 tmunro 516 GIC 1824 : if (dsm_main_space_fpm)
3469 rhaas 517 EUB : {
982 tmunro 518 UBC 0 : npages = size / FPM_PAGE_SIZE;
519 0 : if (size % FPM_PAGE_SIZE > 0)
982 tmunro 520 UIC 0 : ++npages;
982 tmunro 521 EUB :
982 tmunro 522 UBC 0 : LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
982 tmunro 523 UIC 0 : if (FreePageManagerGet(dsm_main_space_fpm, npages, &first_page))
524 : {
982 tmunro 525 EUB : /* We can carve out a piece of the main shared memory segment. */
982 tmunro 526 UBC 0 : seg->mapped_address = (char *) dsm_main_space_begin +
527 0 : first_page * FPM_PAGE_SIZE;
528 0 : seg->mapped_size = npages * FPM_PAGE_SIZE;
982 tmunro 529 UIC 0 : using_main_dsm_region = true;
530 : /* We'll choose a handle below. */
531 : }
532 : }
3469 rhaas 533 ECB :
982 tmunro 534 GIC 1824 : if (!using_main_dsm_region)
535 : {
536 : /*
537 : * We need to create a new memory segment. Loop until we find an
538 : * unused segment identifier.
982 tmunro 539 ECB : */
982 tmunro 540 GBC 1824 : if (dsm_main_space_fpm)
982 tmunro 541 UIC 0 : LWLockRelease(DynamicSharedMemoryControlLock);
542 : for (;;)
982 tmunro 543 ECB : {
982 tmunro 544 GIC 1824 : Assert(seg->mapped_address == NULL && seg->mapped_size == 0);
497 tgl 545 ECB : /* Use even numbers only */
497 tgl 546 CBC 1824 : seg->handle = pg_prng_uint32(&pg_global_prng_state) << 1;
982 tmunro 547 GBC 1824 : if (seg->handle == DSM_HANDLE_INVALID) /* Reserve sentinel */
982 tmunro 548 LBC 0 : continue;
982 tmunro 549 GIC 1824 : if (dsm_impl_op(DSM_OP_CREATE, seg->handle, size, &seg->impl_private,
982 tmunro 550 ECB : &seg->mapped_address, &seg->mapped_size, ERROR))
982 tmunro 551 GIC 1824 : break;
982 tmunro 552 ECB : }
982 tmunro 553 GIC 1824 : LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
554 : }
555 :
3469 rhaas 556 ECB : /* Search the control segment for an unused slot. */
3469 rhaas 557 CBC 1824 : nitems = dsm_control->nitems;
3469 rhaas 558 GIC 3868 : for (i = 0; i < nitems; ++i)
3469 rhaas 559 ECB : {
3469 rhaas 560 GIC 2592 : if (dsm_control->item[i].refcnt == 0)
3469 rhaas 561 ECB : {
982 tmunro 562 GIC 548 : if (using_main_dsm_region)
982 tmunro 563 EUB : {
982 tmunro 564 UBC 0 : seg->handle = make_main_region_dsm_handle(i);
565 0 : dsm_control->item[i].first_page = first_page;
982 tmunro 566 UIC 0 : dsm_control->item[i].npages = npages;
567 : }
982 tmunro 568 ECB : else
982 tmunro 569 CBC 548 : Assert(!is_main_region_dsm_handle(seg->handle));
3469 rhaas 570 GIC 548 : dsm_control->item[i].handle = seg->handle;
3469 rhaas 571 ECB : /* refcnt of 1 triggers destruction, so start at 2 */
3469 rhaas 572 CBC 548 : dsm_control->item[i].refcnt = 2;
2420 573 548 : dsm_control->item[i].impl_private_pm_handle = NULL;
574 548 : dsm_control->item[i].pinned = false;
3469 575 548 : seg->control_slot = i;
576 548 : LWLockRelease(DynamicSharedMemoryControlLock);
3469 rhaas 577 GIC 548 : return seg;
578 : }
579 : }
580 :
3469 rhaas 581 ECB : /* Verify that we can support an additional mapping. */
3469 rhaas 582 GIC 1276 : if (nitems >= dsm_control->maxitems)
2943 rhaas 583 EUB : {
982 tmunro 584 UBC 0 : if (using_main_dsm_region)
585 0 : FreePageManagerPut(dsm_main_space_fpm, first_page, npages);
1163 586 0 : LWLockRelease(DynamicSharedMemoryControlLock);
982 587 0 : if (!using_main_dsm_region)
982 tmunro 588 UIC 0 : dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private,
982 tmunro 589 EUB : &seg->mapped_address, &seg->mapped_size, WARNING);
1163 tmunro 590 UBC 0 : if (seg->resowner != NULL)
591 0 : ResourceOwnerForgetDSM(seg->resowner, seg);
592 0 : dlist_delete(&seg->node);
1163 tmunro 593 UIC 0 : pfree(seg);
1163 tmunro 594 EUB :
2943 rhaas 595 UBC 0 : if ((flags & DSM_CREATE_NULL_IF_MAXSEGMENTS) != 0)
596 0 : return NULL;
3469 rhaas 597 UIC 0 : ereport(ERROR,
598 : (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
599 : errmsg("too many dynamic shared memory segments")));
600 : }
601 :
3469 rhaas 602 ECB : /* Enter the handle into a new array slot. */
982 tmunro 603 GIC 1276 : if (using_main_dsm_region)
982 tmunro 604 EUB : {
982 tmunro 605 UBC 0 : seg->handle = make_main_region_dsm_handle(nitems);
606 0 : dsm_control->item[i].first_page = first_page;
982 tmunro 607 UIC 0 : dsm_control->item[i].npages = npages;
982 tmunro 608 ECB : }
3469 rhaas 609 GIC 1276 : dsm_control->item[nitems].handle = seg->handle;
3469 rhaas 610 ECB : /* refcnt of 1 triggers destruction, so start at 2 */
3469 rhaas 611 CBC 1276 : dsm_control->item[nitems].refcnt = 2;
2420 612 1276 : dsm_control->item[nitems].impl_private_pm_handle = NULL;
613 1276 : dsm_control->item[nitems].pinned = false;
3469 614 1276 : seg->control_slot = nitems;
615 1276 : dsm_control->nitems++;
3469 rhaas 616 GIC 1276 : LWLockRelease(DynamicSharedMemoryControlLock);
3469 rhaas 617 ECB :
3469 rhaas 618 GIC 1276 : return seg;
619 : }
620 :
621 : /*
622 : * Attach a dynamic shared memory segment.
623 : *
624 : * See comments for dsm_segment_handle() for an explanation of how this
625 : * is intended to be used.
626 : *
627 : * This function will return NULL if the segment isn't known to the system.
628 : * This can happen if we're asked to attach the segment, but then everyone
629 : * else detaches it (causing it to be destroyed) before we get around to
630 : * attaching it.
631 : *
632 : * If there is a non-NULL CurrentResourceOwner, the attached segment is
633 : * associated with it and must be detached before the resource owner releases,
634 : * or a warning will be logged. Otherwise the segment remains attached until
635 : * explicitly detached or the session ends. See the note atop dsm_create().
636 : */
3469 rhaas 637 ECB : dsm_segment *
3469 rhaas 638 GIC 14275 : dsm_attach(dsm_handle h)
639 : {
640 : dsm_segment *seg;
641 : dlist_iter iter;
642 : uint32 i;
643 : uint32 nitems;
644 :
3469 rhaas 645 ECB : /* Unsafe in postmaster (and pointless in a stand-alone backend). */
3469 rhaas 646 GIC 14275 : Assert(IsUnderPostmaster);
3469 rhaas 647 ECB :
3469 rhaas 648 CBC 14275 : if (!dsm_init_done)
3469 rhaas 649 GIC 11256 : dsm_backend_startup();
650 :
651 : /*
652 : * Since this is just a debugging cross-check, we could leave it out
653 : * altogether, or include it only in assert-enabled builds. But since the
654 : * list of attached segments should normally be very short, let's include
655 : * it always for right now.
656 : *
657 : * If you're hitting this error, you probably want to attempt to find an
658 : * existing mapping via dsm_find_mapping() before calling dsm_attach() to
659 : * create a new one.
3469 rhaas 660 ECB : */
3469 rhaas 661 GIC 19288 : dlist_foreach(iter, &dsm_segment_list)
3469 rhaas 662 ECB : {
3469 rhaas 663 CBC 5013 : seg = dlist_container(dsm_segment, node, iter.cur);
3469 rhaas 664 GBC 5013 : if (seg->handle == h)
3469 rhaas 665 UIC 0 : elog(ERROR, "can't attach the same segment more than once");
666 : }
667 :
3469 rhaas 668 ECB : /* Create a new segment descriptor. */
3469 rhaas 669 CBC 14275 : seg = dsm_create_descriptor();
3469 rhaas 670 GIC 14275 : seg->handle = h;
671 :
3469 rhaas 672 ECB : /* Bump reference count for this segment in shared memory. */
3469 rhaas 673 CBC 14275 : LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
674 14275 : nitems = dsm_control->nitems;
3469 rhaas 675 GIC 21675 : for (i = 0; i < nitems; ++i)
676 : {
677 : /*
678 : * If the reference count is 0, the slot is actually unused. If the
679 : * reference count is 1, the slot is still in use, but the segment is
680 : * in the process of going away; even if the handle matches, another
681 : * slot may already have started using the same handle value by
682 : * coincidence so we have to keep searching.
1514 tmunro 683 ECB : */
1514 tmunro 684 CBC 21675 : if (dsm_control->item[i].refcnt <= 1)
3469 rhaas 685 GIC 35 : continue;
686 :
3260 rhaas 687 ECB : /* If the handle doesn't match, it's not the slot we want. */
3260 rhaas 688 CBC 21640 : if (dsm_control->item[i].handle != seg->handle)
3260 rhaas 689 GIC 7365 : continue;
690 :
3260 rhaas 691 ECB : /* Otherwise we've found a match. */
3260 rhaas 692 CBC 14275 : dsm_control->item[i].refcnt++;
693 14275 : seg->control_slot = i;
982 tmunro 694 GIC 14275 : if (is_main_region_dsm_handle(seg->handle))
982 tmunro 695 EUB : {
982 tmunro 696 UBC 0 : seg->mapped_address = (char *) dsm_main_space_begin +
697 0 : dsm_control->item[i].first_page * FPM_PAGE_SIZE;
982 tmunro 698 UIC 0 : seg->mapped_size = dsm_control->item[i].npages * FPM_PAGE_SIZE;
982 tmunro 699 ECB : }
3260 rhaas 700 GIC 14275 : break;
3469 rhaas 701 ECB : }
3469 rhaas 702 GIC 14275 : LWLockRelease(DynamicSharedMemoryControlLock);
703 :
704 : /*
705 : * If we didn't find the handle we're looking for in the control segment,
706 : * it probably means that everyone else who had it mapped, including the
707 : * original creator, died before we got to this point. It's up to the
708 : * caller to decide what to do about that.
3469 rhaas 709 ECB : */
3469 rhaas 710 GIC 14275 : if (seg->control_slot == INVALID_CONTROL_SLOT)
3469 rhaas 711 EUB : {
3469 rhaas 712 UBC 0 : dsm_detach(seg);
3469 rhaas 713 UIC 0 : return NULL;
714 : }
715 :
3469 rhaas 716 ECB : /* Here's where we actually try to map the segment. */
982 tmunro 717 CBC 14275 : if (!is_main_region_dsm_handle(seg->handle))
982 tmunro 718 GIC 14275 : dsm_impl_op(DSM_OP_ATTACH, seg->handle, 0, &seg->impl_private,
719 : &seg->mapped_address, &seg->mapped_size, ERROR);
3469 rhaas 720 ECB :
3469 rhaas 721 GIC 14275 : return seg;
722 : }
723 :
724 : /*
725 : * At backend shutdown time, detach any segments that are still attached.
726 : * (This is similar to dsm_detach_all, except that there's no reason to
727 : * unmap the control segment before exiting, so we don't bother.)
728 : */
3399 rhaas 729 ECB : void
3399 rhaas 730 GIC 29153 : dsm_backend_shutdown(void)
3469 rhaas 731 ECB : {
3469 rhaas 732 GIC 29345 : while (!dlist_is_empty(&dsm_segment_list))
733 : {
734 : dsm_segment *seg;
3469 rhaas 735 ECB :
3469 rhaas 736 CBC 192 : seg = dlist_head_element(dsm_segment, node, &dsm_segment_list);
3469 rhaas 737 GIC 192 : dsm_detach(seg);
3469 rhaas 738 ECB : }
3469 rhaas 739 GIC 29153 : }
740 :
741 : /*
742 : * Detach all shared memory segments, including the control segments. This
743 : * should be called, along with PGSharedMemoryDetach, in processes that
744 : * might inherit mappings but are not intended to be connected to dynamic
745 : * shared memory.
746 : */
3309 rhaas 747 ECB : void
3309 rhaas 748 GIC 1 : dsm_detach_all(void)
3309 rhaas 749 ECB : {
3260 bruce 750 GIC 1 : void *control_address = dsm_control;
3309 rhaas 751 ECB :
3309 rhaas 752 GIC 1 : while (!dlist_is_empty(&dsm_segment_list))
753 : {
754 : dsm_segment *seg;
3309 rhaas 755 EUB :
3309 rhaas 756 UBC 0 : seg = dlist_head_element(dsm_segment, node, &dsm_segment_list);
3309 rhaas 757 UIC 0 : dsm_detach(seg);
758 : }
3309 rhaas 759 ECB :
3309 rhaas 760 CBC 1 : if (control_address != NULL)
3309 rhaas 761 GIC 1 : dsm_impl_op(DSM_OP_DETACH, dsm_control_handle, 0,
762 : &dsm_control_impl_private, &control_address,
3309 rhaas 763 ECB : &dsm_control_mapped_size, ERROR);
3309 rhaas 764 GIC 1 : }
765 :
766 : /*
767 : * Detach from a shared memory segment, destroying the segment if we
768 : * remove the last reference.
769 : *
770 : * This function should never fail. It will often be invoked when aborting
771 : * a transaction, and a further error won't serve any purpose. It's not a
772 : * complete disaster if we fail to unmap or destroy the segment; it means a
773 : * resource leak, but that doesn't necessarily preclude further operations.
774 : */
3469 rhaas 775 ECB : void
3469 rhaas 776 GIC 16099 : dsm_detach(dsm_segment *seg)
777 : {
778 : /*
779 : * Invoke registered callbacks. Just in case one of those callbacks
780 : * throws a further error that brings us back here, pop the callback
781 : * before invoking it, to avoid infinite error recursion. Don't allow
782 : * interrupts while running the individual callbacks in non-error code
783 : * paths, to avoid leaving cleanup work unfinished if we're interrupted by
784 : * a statement timeout or similar.
3399 rhaas 785 ECB : */
783 tmunro 786 CBC 16099 : HOLD_INTERRUPTS();
3399 rhaas 787 GIC 23566 : while (!slist_is_empty(&seg->on_detach))
788 : {
789 : slist_node *node;
790 : dsm_segment_detach_callback *cb;
791 : on_dsm_detach_callback function;
792 : Datum arg;
3399 rhaas 793 ECB :
3399 rhaas 794 CBC 7467 : node = slist_pop_head_node(&seg->on_detach);
795 7467 : cb = slist_container(dsm_segment_detach_callback, node, node);
796 7467 : function = cb->function;
797 7467 : arg = cb->arg;
3399 rhaas 798 GIC 7467 : pfree(cb);
3399 rhaas 799 ECB :
3399 rhaas 800 GIC 7467 : function(seg, arg);
3399 rhaas 801 ECB : }
783 tmunro 802 GIC 16099 : RESUME_INTERRUPTS();
803 :
804 : /*
805 : * Try to remove the mapping, if one exists. Normally, there will be, but
806 : * maybe not, if we failed partway through a create or attach operation.
807 : * We remove the mapping before decrementing the reference count so that
808 : * the process that sees a zero reference count can be certain that no
809 : * remaining mappings exist. Even if this fails, we pretend that it
810 : * works, because retrying is likely to fail in the same way.
3469 rhaas 811 ECB : */
3469 rhaas 812 GIC 16099 : if (seg->mapped_address != NULL)
3469 rhaas 813 ECB : {
982 tmunro 814 CBC 16099 : if (!is_main_region_dsm_handle(seg->handle))
982 tmunro 815 GIC 16099 : dsm_impl_op(DSM_OP_DETACH, seg->handle, 0, &seg->impl_private,
982 tmunro 816 ECB : &seg->mapped_address, &seg->mapped_size, WARNING);
3469 rhaas 817 CBC 16099 : seg->impl_private = NULL;
818 16099 : seg->mapped_address = NULL;
3469 rhaas 819 GIC 16099 : seg->mapped_size = 0;
820 : }
821 :
3469 rhaas 822 ECB : /* Reduce reference count, if we previously increased it. */
3469 rhaas 823 GIC 16099 : if (seg->control_slot != INVALID_CONTROL_SLOT)
824 : {
3260 bruce 825 ECB : uint32 refcnt;
3260 bruce 826 GIC 16099 : uint32 control_slot = seg->control_slot;
3469 rhaas 827 ECB :
3469 rhaas 828 CBC 16099 : LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
829 16099 : Assert(dsm_control->item[control_slot].handle == seg->handle);
830 16099 : Assert(dsm_control->item[control_slot].refcnt > 1);
831 16099 : refcnt = --dsm_control->item[control_slot].refcnt;
832 16099 : seg->control_slot = INVALID_CONTROL_SLOT;
3469 rhaas 833 GIC 16099 : LWLockRelease(DynamicSharedMemoryControlLock);
834 :
3469 rhaas 835 ECB : /* If new reference count is 1, try to destroy the segment. */
3469 rhaas 836 GIC 16099 : if (refcnt == 1)
837 : {
2420 rhaas 838 ECB : /* A pinned segment should never reach 1. */
2420 rhaas 839 GIC 474 : Assert(!dsm_control->item[control_slot].pinned);
840 :
841 : /*
842 : * If we fail to destroy the segment here, or are killed before we
843 : * finish doing so, the reference count will remain at 1, which
844 : * will mean that nobody else can attach to the segment. At
845 : * postmaster shutdown time, or when a new postmaster is started
846 : * after a hard kill, another attempt will be made to remove the
847 : * segment.
848 : *
849 : * The main case we're worried about here is being killed by a
850 : * signal before we can finish removing the segment. In that
851 : * case, it's important to be sure that the segment still gets
852 : * removed. If we actually fail to remove the segment for some
853 : * other reason, the postmaster may not have any better luck than
854 : * we did. There's not much we can do about that, though.
3469 rhaas 855 ECB : */
982 tmunro 856 CBC 948 : if (is_main_region_dsm_handle(seg->handle) ||
982 tmunro 857 GIC 474 : dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private,
858 : &seg->mapped_address, &seg->mapped_size, WARNING))
3469 rhaas 859 ECB : {
3469 rhaas 860 CBC 474 : LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
982 tmunro 861 GBC 474 : if (is_main_region_dsm_handle(seg->handle))
982 tmunro 862 UBC 0 : FreePageManagerPut((FreePageManager *) dsm_main_space_begin,
863 0 : dsm_control->item[control_slot].first_page,
982 tmunro 864 LBC 0 : dsm_control->item[control_slot].npages);
3469 rhaas 865 CBC 474 : Assert(dsm_control->item[control_slot].handle == seg->handle);
866 474 : Assert(dsm_control->item[control_slot].refcnt == 1);
867 474 : dsm_control->item[control_slot].refcnt = 0;
3469 rhaas 868 GIC 474 : LWLockRelease(DynamicSharedMemoryControlLock);
869 : }
870 : }
871 : }
872 :
3469 rhaas 873 ECB : /* Clean up our remaining backend-private data structures. */
3469 rhaas 874 CBC 16099 : if (seg->resowner != NULL)
875 837 : ResourceOwnerForgetDSM(seg->resowner, seg);
876 16099 : dlist_delete(&seg->node);
877 16099 : pfree(seg);
3469 rhaas 878 GIC 16099 : }
879 :
880 : /*
881 : * Keep a dynamic shared memory mapping until end of session.
882 : *
883 : * By default, mappings are owned by the current resource owner, which
884 : * typically means they stick around for the duration of the current query
885 : * only.
886 : */
3469 rhaas 887 ECB : void
3083 rhaas 888 GIC 13935 : dsm_pin_mapping(dsm_segment *seg)
3469 rhaas 889 ECB : {
3469 rhaas 890 GIC 13935 : if (seg->resowner != NULL)
3469 rhaas 891 ECB : {
3469 rhaas 892 CBC 13875 : ResourceOwnerForgetDSM(seg->resowner, seg);
3469 rhaas 893 GIC 13875 : seg->resowner = NULL;
3469 rhaas 894 ECB : }
3469 rhaas 895 GIC 13935 : }
896 :
897 : /*
898 : * Arrange to remove a dynamic shared memory mapping at cleanup time.
899 : *
900 : * dsm_pin_mapping() can be used to preserve a mapping for the entire
901 : * lifetime of a process; this function reverses that decision, making
902 : * the segment owned by the current resource owner. This may be useful
903 : * just before performing some operation that will invalidate the segment
904 : * for future use by this backend.
905 : */
3083 rhaas 906 EUB : void
3083 rhaas 907 UIC 0 : dsm_unpin_mapping(dsm_segment *seg)
3083 rhaas 908 EUB : {
3083 rhaas 909 UBC 0 : Assert(seg->resowner == NULL);
910 0 : ResourceOwnerEnlargeDSMs(CurrentResourceOwner);
911 0 : seg->resowner = CurrentResourceOwner;
912 0 : ResourceOwnerRememberDSM(seg->resowner, seg);
3083 rhaas 913 UIC 0 : }
914 :
915 : /*
916 : * Keep a dynamic shared memory segment until postmaster shutdown, or until
917 : * dsm_unpin_segment is called.
918 : *
919 : * This function should not be called more than once per segment, unless the
920 : * segment is explicitly unpinned with dsm_unpin_segment in between calls.
921 : *
922 : * Note that this function does not arrange for the current process to
923 : * keep the segment mapped indefinitely; if that behavior is desired,
924 : * dsm_pin_mapping() should be used from each process that needs to
925 : * retain the mapping.
926 : */
3317 rhaas 927 ECB : void
3083 rhaas 928 GIC 1350 : dsm_pin_segment(dsm_segment *seg)
929 : {
930 : void *handle;
931 :
932 : /*
933 : * Bump reference count for this segment in shared memory. This will
934 : * ensure that even if there is no session which is attached to this
935 : * segment, it will remain until postmaster shutdown or an explicit call
936 : * to unpin.
3317 rhaas 937 ECB : */
3317 rhaas 938 CBC 1350 : LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
2420 rhaas 939 GBC 1350 : if (dsm_control->item[seg->control_slot].pinned)
2420 rhaas 940 LBC 0 : elog(ERROR, "cannot pin a segment that is already pinned");
2420 rhaas 941 CBC 1350 : dsm_impl_pin_segment(seg->handle, seg->impl_private, &handle);
942 1350 : dsm_control->item[seg->control_slot].pinned = true;
3317 943 1350 : dsm_control->item[seg->control_slot].refcnt++;
2420 944 1350 : dsm_control->item[seg->control_slot].impl_private_pm_handle = handle;
3317 945 1350 : LWLockRelease(DynamicSharedMemoryControlLock);
2420 rhaas 946 GIC 1350 : }
947 :
948 : /*
949 : * Unpin a dynamic shared memory segment that was previously pinned with
950 : * dsm_pin_segment. This function should not be called unless dsm_pin_segment
951 : * was previously called for this segment.
952 : *
953 : * The argument is a dsm_handle rather than a dsm_segment in case you want
954 : * to unpin a segment to which you haven't attached. This turns out to be
955 : * useful if, for example, a reference to one shared memory segment is stored
956 : * within another shared memory segment. You might want to unpin the
957 : * referenced segment before destroying the referencing segment.
958 : */
2420 rhaas 959 ECB : void
2420 rhaas 960 GIC 114 : dsm_unpin_segment(dsm_handle handle)
2420 rhaas 961 ECB : {
2420 rhaas 962 CBC 114 : uint32 control_slot = INVALID_CONTROL_SLOT;
2420 rhaas 963 GIC 114 : bool destroy = false;
964 : uint32 i;
965 :
2420 rhaas 966 ECB : /* Find the control slot for the given handle. */
2420 rhaas 967 CBC 114 : LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
2420 rhaas 968 GIC 674 : for (i = 0; i < dsm_control->nitems; ++i)
969 : {
1511 tmunro 970 ECB : /* Skip unused slots and segments that are concurrently going away. */
1511 tmunro 971 CBC 674 : if (dsm_control->item[i].refcnt <= 1)
2420 rhaas 972 GIC 36 : continue;
973 :
2420 rhaas 974 ECB : /* If we've found our handle, we can stop searching. */
2420 rhaas 975 GIC 638 : if (dsm_control->item[i].handle == handle)
2420 rhaas 976 ECB : {
2420 rhaas 977 CBC 114 : control_slot = i;
2420 rhaas 978 GIC 114 : break;
979 : }
980 : }
981 :
982 : /*
983 : * We should definitely have found the slot, and it should not already be
984 : * in the process of going away, because this function should only be
985 : * called on a segment which is pinned.
2420 rhaas 986 ECB : */
2420 rhaas 987 GBC 114 : if (control_slot == INVALID_CONTROL_SLOT)
2420 rhaas 988 LBC 0 : elog(ERROR, "cannot unpin unknown segment handle");
2420 rhaas 989 GBC 114 : if (!dsm_control->item[control_slot].pinned)
2420 rhaas 990 LBC 0 : elog(ERROR, "cannot unpin a segment that is not pinned");
2420 rhaas 991 GIC 114 : Assert(dsm_control->item[control_slot].refcnt > 1);
992 :
993 : /*
994 : * Allow implementation-specific code to run. We have to do this before
995 : * releasing the lock, because impl_private_pm_handle may get modified by
996 : * dsm_impl_unpin_segment.
2420 rhaas 997 ECB : */
2420 rhaas 998 CBC 114 : dsm_impl_unpin_segment(handle,
2118 tgl 999 GIC 114 : &dsm_control->item[control_slot].impl_private_pm_handle);
1000 :
2420 rhaas 1001 ECB : /* Note that 1 means no references (0 means unused slot). */
2420 rhaas 1002 CBC 114 : if (--dsm_control->item[control_slot].refcnt == 1)
1003 114 : destroy = true;
2420 rhaas 1004 GIC 114 : dsm_control->item[control_slot].pinned = false;
1005 :
2420 rhaas 1006 ECB : /* Now we can release the lock. */
2420 rhaas 1007 GIC 114 : LWLockRelease(DynamicSharedMemoryControlLock);
1008 :
2420 rhaas 1009 ECB : /* Clean up resources if that was the last reference. */
2420 rhaas 1010 GIC 114 : if (destroy)
2420 rhaas 1011 ECB : {
2420 rhaas 1012 CBC 114 : void *junk_impl_private = NULL;
1013 114 : void *junk_mapped_address = NULL;
2420 rhaas 1014 GIC 114 : Size junk_mapped_size = 0;
1015 :
1016 : /*
1017 : * For an explanation of how error handling works in this case, see
1018 : * comments in dsm_detach. Note that if we reach this point, the
1019 : * current process certainly does not have the segment mapped, because
1020 : * if it did, the reference count would have still been greater than 1
1021 : * even after releasing the reference count held by the pin. The fact
1022 : * that there can't be a dsm_segment for this handle makes it OK to
1023 : * pass the mapped size, mapped address, and private data as NULL
1024 : * here.
2420 rhaas 1025 ECB : */
982 tmunro 1026 CBC 228 : if (is_main_region_dsm_handle(handle) ||
982 tmunro 1027 GIC 114 : dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
1028 : &junk_mapped_address, &junk_mapped_size, WARNING))
2420 rhaas 1029 ECB : {
2420 rhaas 1030 CBC 114 : LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
982 tmunro 1031 GBC 114 : if (is_main_region_dsm_handle(handle))
982 tmunro 1032 UBC 0 : FreePageManagerPut((FreePageManager *) dsm_main_space_begin,
1033 0 : dsm_control->item[control_slot].first_page,
982 tmunro 1034 LBC 0 : dsm_control->item[control_slot].npages);
2420 rhaas 1035 CBC 114 : Assert(dsm_control->item[control_slot].handle == handle);
1036 114 : Assert(dsm_control->item[control_slot].refcnt == 1);
1037 114 : dsm_control->item[control_slot].refcnt = 0;
2420 rhaas 1038 GIC 114 : LWLockRelease(DynamicSharedMemoryControlLock);
1039 : }
2420 rhaas 1040 ECB : }
3317 rhaas 1041 GIC 114 : }
1042 :
1043 : /*
1044 : * Find an existing mapping for a shared memory segment, if there is one.
1045 : */
3469 rhaas 1046 EUB : dsm_segment *
202 pg 1047 UNC 0 : dsm_find_mapping(dsm_handle handle)
1048 : {
1049 : dlist_iter iter;
1050 : dsm_segment *seg;
3469 rhaas 1051 EUB :
3469 rhaas 1052 UIC 0 : dlist_foreach(iter, &dsm_segment_list)
3469 rhaas 1053 EUB : {
3469 rhaas 1054 UBC 0 : seg = dlist_container(dsm_segment, node, iter.cur);
202 pg 1055 UNC 0 : if (seg->handle == handle)
3469 rhaas 1056 UIC 0 : return seg;
1057 : }
3469 rhaas 1058 EUB :
3469 rhaas 1059 UIC 0 : return NULL;
1060 : }
1061 :
1062 : /*
1063 : * Get the address at which a dynamic shared memory segment is mapped.
1064 : */
3469 rhaas 1065 ECB : void *
3469 rhaas 1066 GIC 16216 : dsm_segment_address(dsm_segment *seg)
3469 rhaas 1067 ECB : {
3469 rhaas 1068 CBC 16216 : Assert(seg->mapped_address != NULL);
3469 rhaas 1069 GIC 16216 : return seg->mapped_address;
1070 : }
1071 :
1072 : /*
1073 : * Get the size of a mapping.
1074 : */
3450 rhaas 1075 EUB : Size
3469 rhaas 1076 UIC 0 : dsm_segment_map_length(dsm_segment *seg)
3469 rhaas 1077 EUB : {
3469 rhaas 1078 UBC 0 : Assert(seg->mapped_address != NULL);
3469 rhaas 1079 UIC 0 : return seg->mapped_size;
1080 : }
1081 :
1082 : /*
1083 : * Get a handle for a mapping.
1084 : *
1085 : * To establish communication via dynamic shared memory between two backends,
1086 : * one of them should first call dsm_create() to establish a new shared
1087 : * memory mapping. That process should then call dsm_segment_handle() to
1088 : * obtain a handle for the mapping, and pass that handle to the
1089 : * coordinating backend via some means (e.g. bgw_main_arg, or via the
1090 : * main shared memory segment). The recipient, once in possession of the
1091 : * handle, should call dsm_attach().
1092 : */
3469 rhaas 1093 ECB : dsm_handle
3469 rhaas 1094 GIC 2303 : dsm_segment_handle(dsm_segment *seg)
3469 rhaas 1095 ECB : {
3469 rhaas 1096 GIC 2303 : return seg->handle;
1097 : }
1098 :
1099 : /*
1100 : * Register an on-detach callback for a dynamic shared memory segment.
1101 : */
3399 rhaas 1102 ECB : void
3399 rhaas 1103 GIC 11237 : on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg)
1104 : {
1105 : dsm_segment_detach_callback *cb;
3399 rhaas 1106 ECB :
3399 rhaas 1107 GIC 11237 : cb = MemoryContextAlloc(TopMemoryContext,
3399 rhaas 1108 ECB : sizeof(dsm_segment_detach_callback));
3399 rhaas 1109 CBC 11237 : cb->function = function;
1110 11237 : cb->arg = arg;
1111 11237 : slist_push_head(&seg->on_detach, &cb->node);
3399 rhaas 1112 GIC 11237 : }
1113 :
1114 : /*
1115 : * Unregister an on-detach callback for a dynamic shared memory segment.
1116 : */
3399 rhaas 1117 ECB : void
3399 rhaas 1118 GIC 3770 : cancel_on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function,
1119 : Datum arg)
1120 : {
1121 : slist_mutable_iter iter;
3399 rhaas 1122 ECB :
3399 rhaas 1123 GIC 11766 : slist_foreach_modify(iter, &seg->on_detach)
1124 : {
1125 : dsm_segment_detach_callback *cb;
3399 rhaas 1126 ECB :
3399 rhaas 1127 CBC 11766 : cb = slist_container(dsm_segment_detach_callback, node, iter.cur);
3399 rhaas 1128 GIC 11766 : if (cb->function == function && cb->arg == arg)
3399 rhaas 1129 ECB : {
3399 rhaas 1130 CBC 3770 : slist_delete_current(&iter);
1131 3770 : pfree(cb);
3399 rhaas 1132 GIC 3770 : break;
1133 : }
3399 rhaas 1134 ECB : }
3399 rhaas 1135 GIC 3770 : }
1136 :
1137 : /*
1138 : * Discard all registered on-detach callbacks without executing them.
1139 : */
3317 rhaas 1140 ECB : void
3317 rhaas 1141 GIC 12732 : reset_on_dsm_detach(void)
1142 : {
1143 : dlist_iter iter;
3317 rhaas 1144 ECB :
3317 rhaas 1145 GIC 12732 : dlist_foreach(iter, &dsm_segment_list)
3317 rhaas 1146 EUB : {
3317 rhaas 1147 UIC 0 : dsm_segment *seg = dlist_container(dsm_segment, node, iter.cur);
1148 :
3317 rhaas 1149 EUB : /* Throw away explicit on-detach actions one by one. */
3317 rhaas 1150 UIC 0 : while (!slist_is_empty(&seg->on_detach))
1151 : {
1152 : slist_node *node;
1153 : dsm_segment_detach_callback *cb;
3317 rhaas 1154 EUB :
3317 rhaas 1155 UBC 0 : node = slist_pop_head_node(&seg->on_detach);
1156 0 : cb = slist_container(dsm_segment_detach_callback, node, node);
3317 rhaas 1157 UIC 0 : pfree(cb);
1158 : }
1159 :
1160 : /*
1161 : * Decrementing the reference count is a sort of implicit on-detach
1162 : * action; make sure we don't do that, either.
3317 rhaas 1163 EUB : */
3317 rhaas 1164 UIC 0 : seg->control_slot = INVALID_CONTROL_SLOT;
3317 rhaas 1165 ECB : }
3317 rhaas 1166 GIC 12732 : }
1167 :
1168 : /*
1169 : * Create a segment descriptor.
1170 : */
3469 rhaas 1171 ECB : static dsm_segment *
3469 rhaas 1172 GIC 16099 : dsm_create_descriptor(void)
1173 : {
1174 : dsm_segment *seg;
3469 rhaas 1175 ECB :
2207 alvherre 1176 CBC 16099 : if (CurrentResourceOwner)
2207 alvherre 1177 GIC 14712 : ResourceOwnerEnlargeDSMs(CurrentResourceOwner);
3469 rhaas 1178 ECB :
3469 rhaas 1179 CBC 16099 : seg = MemoryContextAlloc(TopMemoryContext, sizeof(dsm_segment));
3469 rhaas 1180 GIC 16099 : dlist_push_head(&dsm_segment_list, &seg->node);
1181 :
3469 rhaas 1182 ECB : /* seg->handle must be initialized by the caller */
3469 rhaas 1183 CBC 16099 : seg->control_slot = INVALID_CONTROL_SLOT;
1184 16099 : seg->impl_private = NULL;
1185 16099 : seg->mapped_address = NULL;
3469 rhaas 1186 GIC 16099 : seg->mapped_size = 0;
3469 rhaas 1187 ECB :
3469 rhaas 1188 CBC 16099 : seg->resowner = CurrentResourceOwner;
2207 alvherre 1189 16099 : if (CurrentResourceOwner)
2207 alvherre 1190 GIC 14712 : ResourceOwnerRememberDSM(CurrentResourceOwner, seg);
3469 rhaas 1191 ECB :
3399 rhaas 1192 GIC 16099 : slist_init(&seg->on_detach);
3399 rhaas 1193 ECB :
3469 rhaas 1194 GIC 16099 : return seg;
1195 : }
1196 :
1197 : /*
1198 : * Sanity check a control segment.
1199 : *
1200 : * The goal here isn't to detect everything that could possibly be wrong with
1201 : * the control segment; there's not enough information for that. Rather, the
1202 : * goal is to make sure that someone can iterate over the items in the segment
1203 : * without overrunning the end of the mapping and crashing. We also check
1204 : * the magic number since, if that's messed up, this may not even be one of
1205 : * our segments at all.
1206 : */
3469 rhaas 1207 ECB : static bool
3450 rhaas 1208 GIC 1828 : dsm_control_segment_sane(dsm_control_header *control, Size mapped_size)
3469 rhaas 1209 ECB : {
3469 rhaas 1210 GBC 1828 : if (mapped_size < offsetof(dsm_control_header, item))
3469 rhaas 1211 LBC 0 : return false; /* Mapped size too short to read header. */
3469 rhaas 1212 GBC 1828 : if (control->magic != PG_DYNSHMEM_CONTROL_MAGIC)
3469 rhaas 1213 LBC 0 : return false; /* Magic number doesn't match. */
3469 rhaas 1214 GBC 1828 : if (dsm_control_bytes_needed(control->maxitems) > mapped_size)
3469 rhaas 1215 LBC 0 : return false; /* Max item count won't fit in map. */
3469 rhaas 1216 GBC 1828 : if (control->nitems > control->maxitems)
3469 rhaas 1217 LBC 0 : return false; /* Overfull. */
3469 rhaas 1218 GIC 1828 : return true;
1219 : }
1220 :
1221 : /*
1222 : * Compute the number of control-segment bytes needed to store a given
1223 : * number of items.
1224 : */
3469 rhaas 1225 ECB : static uint64
3469 rhaas 1226 GIC 3654 : dsm_control_bytes_needed(uint32 nitems)
1227 : {
3469 rhaas 1228 ECB : return offsetof(dsm_control_header, item)
2118 tgl 1229 GIC 3654 : + sizeof(dsm_control_item) * (uint64) nitems;
1230 : }
1231 :
982 tmunro 1232 EUB : static inline dsm_handle
982 tmunro 1233 UIC 0 : make_main_region_dsm_handle(int slot)
1234 : {
1235 : dsm_handle handle;
1236 :
1237 : /*
1238 : * We need to create a handle that doesn't collide with any existing extra
1239 : * segment created by dsm_impl_op(), so we'll make it odd. It also
1240 : * mustn't collide with any other main area pseudo-segment, so we'll
1241 : * include the slot number in some of the bits. We also want to make an
1242 : * effort to avoid newly created and recently destroyed handles from being
1243 : * confused, so we'll make the rest of the bits random.
982 tmunro 1244 EUB : */
982 tmunro 1245 UBC 0 : handle = 1;
1246 0 : handle |= slot << 1;
497 tgl 1247 0 : handle |= pg_prng_uint32(&pg_global_prng_state) << (pg_leftmost_one_pos32(dsm_control->maxitems) + 1);
982 tmunro 1248 UIC 0 : return handle;
1249 : }
1250 :
982 tmunro 1251 ECB : static inline bool
982 tmunro 1252 GIC 47635 : is_main_region_dsm_handle(dsm_handle handle)
982 tmunro 1253 ECB : {
982 tmunro 1254 GIC 47635 : return handle & 1;
1255 : }
|