Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * dsm_impl.c
4 : : * manage dynamic shared memory segments
5 : : *
6 : : * This file provides low-level APIs for creating and destroying shared
7 : : * memory segments using several different possible techniques. We refer
8 : : * to these segments as dynamic because they can be created, altered, and
9 : : * destroyed at any point during the server life cycle. This is unlike
10 : : * the main shared memory segment, of which there is always exactly one
11 : : * and which is always mapped at a fixed address in every PostgreSQL
12 : : * background process.
13 : : *
14 : : * Because not all systems provide the same primitives in this area, nor
15 : : * do all primitives behave the same way on all systems, we provide
16 : : * several implementations of this facility. Many systems implement
17 : : * POSIX shared memory (shm_open etc.), which is well-suited to our needs
18 : : * in this area, with the exception that shared memory identifiers live
19 : : * in a flat system-wide namespace, raising the uncomfortable prospect of
20 : : * name collisions with other processes (including other copies of
21 : : * PostgreSQL) running on the same system. Some systems only support
22 : : * the older System V shared memory interface (shmget etc.) which is
23 : : * also usable; however, the default allocation limits are often quite
24 : : * small, and the namespace is even more restricted.
25 : : *
26 : : * We also provide an mmap-based shared memory implementation. This may
27 : : * be useful on systems that provide shared memory via a special-purpose
28 : : * filesystem; by opting for this implementation, the user can even
29 : : * control precisely where their shared memory segments are placed. It
30 : : * can also be used as a fallback for systems where shm_open and shmget
31 : : * are not available or can't be used for some reason. Of course,
32 : : * mapping a file residing on an actual spinning disk is a fairly poor
33 : : * approximation for shared memory because writeback may hurt performance
34 : : * substantially, but there should be few systems where we must make do
35 : : * with such poor tools.
36 : : *
37 : : * As ever, Windows requires its own implementation.
38 : : *
39 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
40 : : * Portions Copyright (c) 1994, Regents of the University of California
41 : : *
42 : : *
43 : : * IDENTIFICATION
44 : : * src/backend/storage/ipc/dsm_impl.c
45 : : *
46 : : *-------------------------------------------------------------------------
47 : : */
48 : :
49 : : #include "postgres.h"
50 : :
51 : : #include <fcntl.h>
52 : : #include <signal.h>
53 : : #include <unistd.h>
54 : : #ifndef WIN32
55 : : #include <sys/mman.h>
56 : : #include <sys/ipc.h>
57 : : #include <sys/shm.h>
58 : : #include <sys/stat.h>
59 : : #endif
60 : :
61 : : #include "common/file_perm.h"
62 : : #include "libpq/pqsignal.h"
63 : : #include "miscadmin.h"
64 : : #include "pgstat.h"
65 : : #include "portability/mem.h"
66 : : #include "postmaster/postmaster.h"
67 : : #include "storage/dsm_impl.h"
68 : : #include "storage/fd.h"
69 : : #include "utils/guc.h"
70 : : #include "utils/memutils.h"
71 : :
72 : : #ifdef USE_DSM_POSIX
73 : : static bool dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
74 : : void **impl_private, void **mapped_address,
75 : : Size *mapped_size, int elevel);
76 : : static int dsm_impl_posix_resize(int fd, off_t size);
77 : : #endif
78 : : #ifdef USE_DSM_SYSV
79 : : static bool dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
80 : : void **impl_private, void **mapped_address,
81 : : Size *mapped_size, int elevel);
82 : : #endif
83 : : #ifdef USE_DSM_WINDOWS
84 : : static bool dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
85 : : void **impl_private, void **mapped_address,
86 : : Size *mapped_size, int elevel);
87 : : #endif
88 : : #ifdef USE_DSM_MMAP
89 : : static bool dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
90 : : void **impl_private, void **mapped_address,
91 : : Size *mapped_size, int elevel);
92 : : #endif
93 : : static int errcode_for_dynamic_shared_memory(void);
94 : :
95 : : const struct config_enum_entry dynamic_shared_memory_options[] = {
96 : : #ifdef USE_DSM_POSIX
97 : : {"posix", DSM_IMPL_POSIX, false},
98 : : #endif
99 : : #ifdef USE_DSM_SYSV
100 : : {"sysv", DSM_IMPL_SYSV, false},
101 : : #endif
102 : : #ifdef USE_DSM_WINDOWS
103 : : {"windows", DSM_IMPL_WINDOWS, false},
104 : : #endif
105 : : #ifdef USE_DSM_MMAP
106 : : {"mmap", DSM_IMPL_MMAP, false},
107 : : #endif
108 : : {NULL, 0, false}
109 : : };
110 : :
111 : : /* Implementation selector. */
112 : : int dynamic_shared_memory_type = DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE;
113 : :
114 : : /* Amount of space reserved for DSM segments in the main area. */
115 : : int min_dynamic_shared_memory;
116 : :
117 : : /* Size of buffer to be used for zero-filling. */
118 : : #define ZBUFFER_SIZE 8192
119 : :
120 : : #define SEGMENT_NAME_PREFIX "Global/PostgreSQL"
121 : :
122 : : /*------
123 : : * Perform a low-level shared memory operation in a platform-specific way,
124 : : * as dictated by the selected implementation. Each implementation is
125 : : * required to implement the following primitives.
126 : : *
127 : : * DSM_OP_CREATE. Create a segment whose size is the request_size and
128 : : * map it.
129 : : *
130 : : * DSM_OP_ATTACH. Map the segment, whose size must be the request_size.
131 : : *
132 : : * DSM_OP_DETACH. Unmap the segment.
133 : : *
134 : : * DSM_OP_DESTROY. Unmap the segment, if it is mapped. Destroy the
135 : : * segment.
136 : : *
137 : : * Arguments:
138 : : * op: The operation to be performed.
139 : : * handle: The handle of an existing object, or for DSM_OP_CREATE, the
140 : : * identifier for the new handle the caller wants created.
141 : : * request_size: For DSM_OP_CREATE, the requested size. Otherwise, 0.
142 : : * impl_private: Private, implementation-specific data. Will be a pointer
143 : : * to NULL for the first operation on a shared memory segment within this
144 : : * backend; thereafter, it will point to the value to which it was set
145 : : * on the previous call.
146 : : * mapped_address: Pointer to start of current mapping; pointer to NULL
147 : : * if none. Updated with new mapping address.
148 : : * mapped_size: Pointer to size of current mapping; pointer to 0 if none.
149 : : * Updated with new mapped size.
150 : : * elevel: Level at which to log errors.
151 : : *
152 : : * Return value: true on success, false on failure. When false is returned,
153 : : * a message should first be logged at the specified elevel, except in the
154 : : * case where DSM_OP_CREATE experiences a name collision, which should
155 : : * silently return false.
156 : : *-----
157 : : */
158 : : bool
3821 rhaas@postgresql.org 159 :CBC 43828 : dsm_impl_op(dsm_op op, dsm_handle handle, Size request_size,
160 : : void **impl_private, void **mapped_address, Size *mapped_size,
161 : : int elevel)
162 : : {
1986 tmunro@postgresql.or 163 [ + + - + ]: 43828 : Assert(op == DSM_OP_CREATE || request_size == 0);
3840 rhaas@postgresql.org 164 [ + + + + : 43828 : Assert((op != DSM_OP_CREATE && op != DSM_OP_ATTACH) ||
+ - - + ]
165 : : (*mapped_address == NULL && *mapped_size == 0));
166 : :
167 [ + - - - ]: 43828 : switch (dynamic_shared_memory_type)
168 : : {
169 : : #ifdef USE_DSM_POSIX
170 : 43828 : case DSM_IMPL_POSIX:
171 : 43828 : return dsm_impl_posix(op, handle, request_size, impl_private,
172 : : mapped_address, mapped_size, elevel);
173 : : #endif
174 : : #ifdef USE_DSM_SYSV
3840 rhaas@postgresql.org 175 :UBC 0 : case DSM_IMPL_SYSV:
176 : 0 : return dsm_impl_sysv(op, handle, request_size, impl_private,
177 : : mapped_address, mapped_size, elevel);
178 : : #endif
179 : : #ifdef USE_DSM_WINDOWS
180 : : case DSM_IMPL_WINDOWS:
181 : : return dsm_impl_windows(op, handle, request_size, impl_private,
182 : : mapped_address, mapped_size, elevel);
183 : : #endif
184 : : #ifdef USE_DSM_MMAP
185 : 0 : case DSM_IMPL_MMAP:
186 : 0 : return dsm_impl_mmap(op, handle, request_size, impl_private,
187 : : mapped_address, mapped_size, elevel);
188 : : #endif
3827 tgl@sss.pgh.pa.us 189 : 0 : default:
190 [ # # ]: 0 : elog(ERROR, "unexpected dynamic shared memory type: %d",
191 : : dynamic_shared_memory_type);
192 : : return false;
193 : : }
194 : : }
195 : :
196 : : #ifdef USE_DSM_POSIX
197 : : /*
198 : : * Operating system primitives to support POSIX shared memory.
199 : : *
200 : : * POSIX shared memory segments are created and attached using shm_open()
201 : : * and shm_unlink(); other operations, such as sizing or mapping the
202 : : * segment, are performed as if the shared memory segments were files.
203 : : *
204 : : * Indeed, on some platforms, they may be implemented that way. While
205 : : * POSIX shared memory segments seem intended to exist in a flat namespace,
206 : : * some operating systems may implement them as files, even going so far
207 : : * to treat a request for /xyz as a request to create a file by that name
208 : : * in the root directory. Users of such broken platforms should select
209 : : * a different shared memory implementation.
210 : : */
211 : : static bool
3821 rhaas@postgresql.org 212 :CBC 43828 : dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
213 : : void **impl_private, void **mapped_address, Size *mapped_size,
214 : : int elevel)
215 : : {
216 : : char name[64];
217 : : int flags;
218 : : int fd;
219 : : char *address;
220 : :
3840 221 : 43828 : snprintf(name, 64, "/PostgreSQL.%u", handle);
222 : :
223 : : /* Handle teardown cases. */
224 [ + + + + ]: 43828 : if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
225 : : {
226 [ + + ]: 22373 : if (*mapped_address != NULL
227 [ - + ]: 20871 : && munmap(*mapped_address, *mapped_size) != 0)
228 : : {
3840 rhaas@postgresql.org 229 [ # # ]:UBC 0 : ereport(elevel,
230 : : (errcode_for_dynamic_shared_memory(),
231 : : errmsg("could not unmap shared memory segment \"%s\": %m",
232 : : name)));
233 : 0 : return false;
234 : : }
3840 rhaas@postgresql.org 235 :CBC 22373 : *mapped_address = NULL;
236 : 22373 : *mapped_size = 0;
237 [ + + - + ]: 22373 : if (op == DSM_OP_DESTROY && shm_unlink(name) != 0)
238 : : {
3840 rhaas@postgresql.org 239 [ # # ]:UBC 0 : ereport(elevel,
240 : : (errcode_for_dynamic_shared_memory(),
241 : : errmsg("could not remove shared memory segment \"%s\": %m",
242 : : name)));
243 : 0 : return false;
244 : : }
3840 rhaas@postgresql.org 245 :CBC 22373 : return true;
246 : : }
247 : :
248 : : /*
249 : : * Create new segment or open an existing one for attach.
250 : : *
251 : : * Even though we will close the FD before returning, it seems desirable
252 : : * to use Reserve/ReleaseExternalFD, to reduce the probability of EMFILE
253 : : * failure. The fact that we won't hold the FD open long justifies using
254 : : * ReserveExternalFD rather than AcquireExternalFD, though.
255 : : */
1511 tgl@sss.pgh.pa.us 256 : 21455 : ReserveExternalFD();
257 : :
3840 rhaas@postgresql.org 258 [ + + ]: 21455 : flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
2199 sfrost@snowman.net 259 [ - + ]: 21455 : if ((fd = shm_open(name, flags, PG_FILE_MODE_OWNER)) == -1)
260 : : {
1511 tgl@sss.pgh.pa.us 261 :UBC 0 : ReleaseExternalFD();
653 tmunro@postgresql.or 262 [ # # # # ]: 0 : if (op == DSM_OP_ATTACH || errno != EEXIST)
3840 rhaas@postgresql.org 263 [ # # ]: 0 : ereport(elevel,
264 : : (errcode_for_dynamic_shared_memory(),
265 : : errmsg("could not open shared memory segment \"%s\": %m",
266 : : name)));
267 : 0 : return false;
268 : : }
269 : :
270 : : /*
271 : : * If we're attaching the segment, determine the current size; if we are
272 : : * creating the segment, set the size to the requested value.
273 : : */
3840 rhaas@postgresql.org 274 [ + + ]:CBC 21455 : if (op == DSM_OP_ATTACH)
275 : : {
276 : : struct stat st;
277 : :
278 [ - + ]: 19059 : if (fstat(fd, &st) != 0)
279 : : {
280 : : int save_errno;
281 : :
282 : : /* Back out what's already been done. */
3840 rhaas@postgresql.org 283 :UBC 0 : save_errno = errno;
284 : 0 : close(fd);
1511 tgl@sss.pgh.pa.us 285 : 0 : ReleaseExternalFD();
3840 rhaas@postgresql.org 286 : 0 : errno = save_errno;
287 : :
288 [ # # ]: 0 : ereport(elevel,
289 : : (errcode_for_dynamic_shared_memory(),
290 : : errmsg("could not stat shared memory segment \"%s\": %m",
291 : : name)));
292 : 0 : return false;
293 : : }
3840 rhaas@postgresql.org 294 :CBC 19059 : request_size = st.st_size;
295 : : }
1986 tmunro@postgresql.or 296 [ - + ]: 2396 : else if (dsm_impl_posix_resize(fd, request_size) != 0)
297 : : {
298 : : int save_errno;
299 : :
300 : : /* Back out what's already been done. */
3840 rhaas@postgresql.org 301 :UBC 0 : save_errno = errno;
302 : 0 : close(fd);
1511 tgl@sss.pgh.pa.us 303 : 0 : ReleaseExternalFD();
1986 tmunro@postgresql.or 304 : 0 : shm_unlink(name);
3840 rhaas@postgresql.org 305 : 0 : errno = save_errno;
306 : :
307 [ # # ]: 0 : ereport(elevel,
308 : : (errcode_for_dynamic_shared_memory(),
309 : : errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
310 : : name, request_size)));
311 : 0 : return false;
312 : : }
313 : :
314 : : /* Map it. */
3631 bruce@momjian.us 315 :CBC 21455 : address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
316 : : MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
3840 rhaas@postgresql.org 317 [ - + ]: 21455 : if (address == MAP_FAILED)
318 : : {
319 : : int save_errno;
320 : :
321 : : /* Back out what's already been done. */
3840 rhaas@postgresql.org 322 :UBC 0 : save_errno = errno;
323 : 0 : close(fd);
1511 tgl@sss.pgh.pa.us 324 : 0 : ReleaseExternalFD();
3840 rhaas@postgresql.org 325 [ # # ]: 0 : if (op == DSM_OP_CREATE)
326 : 0 : shm_unlink(name);
327 : 0 : errno = save_errno;
328 : :
329 [ # # ]: 0 : ereport(elevel,
330 : : (errcode_for_dynamic_shared_memory(),
331 : : errmsg("could not map shared memory segment \"%s\": %m",
332 : : name)));
333 : 0 : return false;
334 : : }
3840 rhaas@postgresql.org 335 :CBC 21455 : *mapped_address = address;
336 : 21455 : *mapped_size = request_size;
337 : 21455 : close(fd);
1511 tgl@sss.pgh.pa.us 338 : 21455 : ReleaseExternalFD();
339 : :
3840 rhaas@postgresql.org 340 : 21455 : return true;
341 : : }
342 : :
343 : : /*
344 : : * Set the size of a virtual memory region associated with a file descriptor.
345 : : * If necessary, also ensure that virtual memory is actually allocated by the
346 : : * operating system, to avoid nasty surprises later.
347 : : *
348 : : * Returns non-zero if either truncation or allocation fails, and sets errno.
349 : : */
350 : : static int
2393 tgl@sss.pgh.pa.us 351 : 2396 : dsm_impl_posix_resize(int fd, off_t size)
352 : : {
353 : : int rc;
354 : : int save_errno;
355 : : sigset_t save_sigmask;
356 : :
357 : : /*
358 : : * Block all blockable signals, except SIGQUIT. posix_fallocate() can run
359 : : * for quite a long time, and is an all-or-nothing operation. If we
360 : : * allowed SIGUSR1 to interrupt us repeatedly (for example, due to
361 : : * recovery conflicts), the retry loop might never succeed.
362 : : */
639 tmunro@postgresql.or 363 [ + + ]: 2396 : if (IsUnderPostmaster)
638 364 : 1411 : sigprocmask(SIG_SETMASK, &BlockSig, &save_sigmask);
365 : :
640 366 : 2396 : pgstat_report_wait_start(WAIT_EVENT_DSM_ALLOCATE);
367 : : #if defined(HAVE_POSIX_FALLOCATE) && defined(__linux__)
368 : :
369 : : /*
370 : : * On Linux, a shm_open fd is backed by a tmpfs file. If we were to use
371 : : * ftruncate, the file would contain a hole. Accessing memory backed by a
372 : : * hole causes tmpfs to allocate pages, which fails with SIGBUS if there
373 : : * is no more tmpfs space available. So we ask tmpfs to allocate pages
374 : : * here, so we can fail gracefully with ENOSPC now rather than risking
375 : : * SIGBUS later.
376 : : *
377 : : * We still use a traditional EINTR retry loop to handle SIGCONT.
378 : : * posix_fallocate() doesn't restart automatically, and we don't want this
379 : : * to fail if you attach a debugger.
380 : : */
381 : : do
382 : : {
383 : 2396 : rc = posix_fallocate(fd, 0, size);
384 [ - + ]: 2396 : } while (rc == EINTR);
385 : :
386 : : /*
387 : : * The caller expects errno to be set, but posix_fallocate() doesn't set
388 : : * it. Instead it returns error numbers directly. So set errno, even
389 : : * though we'll also return rc to indicate success or failure.
390 : : */
391 : 2396 : errno = rc;
392 : : #else
393 : : /* Extend the file to the requested size. */
394 : : do
395 : : {
396 : : rc = ftruncate(fd, size);
397 : : } while (rc < 0 && errno == EINTR);
398 : : #endif
399 : 2396 : pgstat_report_wait_end();
400 : :
639 401 [ + + ]: 2396 : if (IsUnderPostmaster)
402 : : {
403 : 1411 : save_errno = errno;
638 404 : 1411 : sigprocmask(SIG_SETMASK, &save_sigmask, NULL);
639 405 : 1411 : errno = save_errno;
406 : : }
407 : :
2393 tgl@sss.pgh.pa.us 408 : 2396 : return rc;
409 : : }
410 : :
411 : : #endif /* USE_DSM_POSIX */
412 : :
413 : : #ifdef USE_DSM_SYSV
414 : : /*
415 : : * Operating system primitives to support System V shared memory.
416 : : *
417 : : * System V shared memory segments are manipulated using shmget(), shmat(),
418 : : * shmdt(), and shmctl(). As the default allocation limits for System V
419 : : * shared memory are usually quite low, the POSIX facilities may be
420 : : * preferable; but those are not supported everywhere.
421 : : */
422 : : static bool
3821 rhaas@postgresql.org 423 :UBC 0 : dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
424 : : void **impl_private, void **mapped_address, Size *mapped_size,
425 : : int elevel)
426 : : {
427 : : key_t key;
428 : : int ident;
429 : : char *address;
430 : : char name[64];
431 : : int *ident_cache;
432 : :
433 : : /*
434 : : * POSIX shared memory and mmap-based shared memory identify segments with
435 : : * names. To avoid needless error message variation, we use the handle as
436 : : * the name.
437 : : */
3840 438 : 0 : snprintf(name, 64, "%u", handle);
439 : :
440 : : /*
441 : : * The System V shared memory namespace is very restricted; names are of
442 : : * type key_t, which is expected to be some sort of integer data type, but
443 : : * not necessarily the same one as dsm_handle. Since we use dsm_handle to
444 : : * identify shared memory segments across processes, this might seem like
445 : : * a problem, but it's really not. If dsm_handle is bigger than key_t,
446 : : * the cast below might truncate away some bits from the handle the
447 : : * user-provided, but it'll truncate exactly the same bits away in exactly
448 : : * the same fashion every time we use that handle, which is all that
449 : : * really matters. Conversely, if dsm_handle is smaller than key_t, we
450 : : * won't use the full range of available key space, but that's no big deal
451 : : * either.
452 : : *
453 : : * We do make sure that the key isn't negative, because that might not be
454 : : * portable.
455 : : */
456 : 0 : key = (key_t) handle;
3631 bruce@momjian.us 457 [ # # ]: 0 : if (key < 1) /* avoid compiler warning if type is unsigned */
3840 rhaas@postgresql.org 458 : 0 : key = -key;
459 : :
460 : : /*
461 : : * There's one special key, IPC_PRIVATE, which can't be used. If we end
462 : : * up with that value by chance during a create operation, just pretend it
463 : : * already exists, so that caller will retry. If we run into it anywhere
464 : : * else, the caller has passed a handle that doesn't correspond to
465 : : * anything we ever created, which should not happen.
466 : : */
467 [ # # ]: 0 : if (key == IPC_PRIVATE)
468 : : {
469 [ # # ]: 0 : if (op != DSM_OP_CREATE)
470 [ # # ]: 0 : elog(DEBUG4, "System V shared memory key may not be IPC_PRIVATE");
471 : 0 : errno = EEXIST;
472 : 0 : return false;
473 : : }
474 : :
475 : : /*
476 : : * Before we can do anything with a shared memory segment, we have to map
477 : : * the shared memory key to a shared memory identifier using shmget(). To
478 : : * avoid repeated lookups, we store the key using impl_private.
479 : : */
480 [ # # ]: 0 : if (*impl_private != NULL)
481 : : {
482 : 0 : ident_cache = *impl_private;
483 : 0 : ident = *ident_cache;
484 : : }
485 : : else
486 : : {
3631 bruce@momjian.us 487 : 0 : int flags = IPCProtection;
488 : : size_t segsize;
489 : :
490 : : /*
491 : : * Allocate the memory BEFORE acquiring the resource, so that we don't
492 : : * leak the resource if memory allocation fails.
493 : : */
3840 rhaas@postgresql.org 494 : 0 : ident_cache = MemoryContextAlloc(TopMemoryContext, sizeof(int));
495 : :
496 : : /*
497 : : * When using shmget to find an existing segment, we must pass the
498 : : * size as 0. Passing a non-zero size which is greater than the
499 : : * actual size will result in EINVAL.
500 : : */
501 : 0 : segsize = 0;
502 : :
503 [ # # ]: 0 : if (op == DSM_OP_CREATE)
504 : : {
505 : 0 : flags |= IPC_CREAT | IPC_EXCL;
506 : 0 : segsize = request_size;
507 : : }
508 : :
509 [ # # ]: 0 : if ((ident = shmget(key, segsize, flags)) == -1)
510 : : {
653 tmunro@postgresql.or 511 [ # # # # ]: 0 : if (op == DSM_OP_ATTACH || errno != EEXIST)
512 : : {
3631 bruce@momjian.us 513 : 0 : int save_errno = errno;
514 : :
3840 rhaas@postgresql.org 515 : 0 : pfree(ident_cache);
516 : 0 : errno = save_errno;
517 [ # # ]: 0 : ereport(elevel,
518 : : (errcode_for_dynamic_shared_memory(),
519 : : errmsg("could not get shared memory segment: %m")));
520 : : }
521 : 0 : return false;
522 : : }
523 : :
524 : 0 : *ident_cache = ident;
525 : 0 : *impl_private = ident_cache;
526 : : }
527 : :
528 : : /* Handle teardown cases. */
529 [ # # # # ]: 0 : if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
530 : : {
531 : 0 : pfree(ident_cache);
532 : 0 : *impl_private = NULL;
533 [ # # # # ]: 0 : if (*mapped_address != NULL && shmdt(*mapped_address) != 0)
534 : : {
535 [ # # ]: 0 : ereport(elevel,
536 : : (errcode_for_dynamic_shared_memory(),
537 : : errmsg("could not unmap shared memory segment \"%s\": %m",
538 : : name)));
539 : 0 : return false;
540 : : }
541 : 0 : *mapped_address = NULL;
542 : 0 : *mapped_size = 0;
543 [ # # # # ]: 0 : if (op == DSM_OP_DESTROY && shmctl(ident, IPC_RMID, NULL) < 0)
544 : : {
545 [ # # ]: 0 : ereport(elevel,
546 : : (errcode_for_dynamic_shared_memory(),
547 : : errmsg("could not remove shared memory segment \"%s\": %m",
548 : : name)));
549 : 0 : return false;
550 : : }
551 : 0 : return true;
552 : : }
553 : :
554 : : /* If we're attaching it, we must use IPC_STAT to determine the size. */
555 [ # # ]: 0 : if (op == DSM_OP_ATTACH)
556 : : {
557 : : struct shmid_ds shm;
558 : :
559 [ # # ]: 0 : if (shmctl(ident, IPC_STAT, &shm) != 0)
560 : : {
561 [ # # ]: 0 : ereport(elevel,
562 : : (errcode_for_dynamic_shared_memory(),
563 : : errmsg("could not stat shared memory segment \"%s\": %m",
564 : : name)));
565 : 0 : return false;
566 : : }
567 : 0 : request_size = shm.shm_segsz;
568 : : }
569 : :
570 : : /* Map it. */
571 : 0 : address = shmat(ident, NULL, PG_SHMAT_FLAGS);
572 [ # # ]: 0 : if (address == (void *) -1)
573 : : {
574 : : int save_errno;
575 : :
576 : : /* Back out what's already been done. */
577 : 0 : save_errno = errno;
578 [ # # ]: 0 : if (op == DSM_OP_CREATE)
579 : 0 : shmctl(ident, IPC_RMID, NULL);
580 : 0 : errno = save_errno;
581 : :
582 [ # # ]: 0 : ereport(elevel,
583 : : (errcode_for_dynamic_shared_memory(),
584 : : errmsg("could not map shared memory segment \"%s\": %m",
585 : : name)));
586 : 0 : return false;
587 : : }
588 : 0 : *mapped_address = address;
589 : 0 : *mapped_size = request_size;
590 : :
591 : 0 : return true;
592 : : }
593 : : #endif
594 : :
595 : : #ifdef USE_DSM_WINDOWS
596 : : /*
597 : : * Operating system primitives to support Windows shared memory.
598 : : *
599 : : * Windows shared memory implementation is done using file mapping
600 : : * which can be backed by either physical file or system paging file.
601 : : * Current implementation uses system paging file as other effects
602 : : * like performance are not clear for physical file and it is used in similar
603 : : * way for main shared memory in windows.
604 : : *
605 : : * A memory mapping object is a kernel object - they always get deleted when
606 : : * the last reference to them goes away, either explicitly via a CloseHandle or
607 : : * when the process containing the reference exits.
608 : : */
609 : : static bool
610 : : dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
611 : : void **impl_private, void **mapped_address,
612 : : Size *mapped_size, int elevel)
613 : : {
614 : : char *address;
615 : : HANDLE hmap;
616 : : char name[64];
617 : : MEMORY_BASIC_INFORMATION info;
618 : :
619 : : /*
620 : : * Storing the shared memory segment in the Global\ namespace, can allow
621 : : * any process running in any session to access that file mapping object
622 : : * provided that the caller has the required access rights. But to avoid
623 : : * issues faced in main shared memory, we are using the naming convention
624 : : * similar to main shared memory. We can change here once issue mentioned
625 : : * in GetSharedMemName is resolved.
626 : : */
627 : : snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
628 : :
629 : : /*
630 : : * Handle teardown cases. Since Windows automatically destroys the object
631 : : * when no references remain, we can treat it the same as detach.
632 : : */
633 : : if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
634 : : {
635 : : if (*mapped_address != NULL
636 : : && UnmapViewOfFile(*mapped_address) == 0)
637 : : {
638 : : _dosmaperr(GetLastError());
639 : : ereport(elevel,
640 : : (errcode_for_dynamic_shared_memory(),
641 : : errmsg("could not unmap shared memory segment \"%s\": %m",
642 : : name)));
643 : : return false;
644 : : }
645 : : if (*impl_private != NULL
646 : : && CloseHandle(*impl_private) == 0)
647 : : {
648 : : _dosmaperr(GetLastError());
649 : : ereport(elevel,
650 : : (errcode_for_dynamic_shared_memory(),
651 : : errmsg("could not remove shared memory segment \"%s\": %m",
652 : : name)));
653 : : return false;
654 : : }
655 : :
656 : : *impl_private = NULL;
657 : : *mapped_address = NULL;
658 : : *mapped_size = 0;
659 : : return true;
660 : : }
661 : :
662 : : /* Create new segment or open an existing one for attach. */
663 : : if (op == DSM_OP_CREATE)
664 : : {
665 : : DWORD size_high;
666 : : DWORD size_low;
667 : : DWORD errcode;
668 : :
669 : : /* Shifts >= the width of the type are undefined. */
670 : : #ifdef _WIN64
671 : : size_high = request_size >> 32;
672 : : #else
673 : : size_high = 0;
674 : : #endif
675 : : size_low = (DWORD) request_size;
676 : :
677 : : /* CreateFileMapping might not clear the error code on success */
678 : : SetLastError(0);
679 : :
680 : : hmap = CreateFileMapping(INVALID_HANDLE_VALUE, /* Use the pagefile */
681 : : NULL, /* Default security attrs */
682 : : PAGE_READWRITE, /* Memory is read/write */
683 : : size_high, /* Upper 32 bits of size */
684 : : size_low, /* Lower 32 bits of size */
685 : : name);
686 : :
687 : : errcode = GetLastError();
688 : : if (errcode == ERROR_ALREADY_EXISTS || errcode == ERROR_ACCESS_DENIED)
689 : : {
690 : : /*
691 : : * On Windows, when the segment already exists, a handle for the
692 : : * existing segment is returned. We must close it before
693 : : * returning. However, if the existing segment is created by a
694 : : * service, then it returns ERROR_ACCESS_DENIED. We don't do
695 : : * _dosmaperr here, so errno won't be modified.
696 : : */
697 : : if (hmap)
698 : : CloseHandle(hmap);
699 : : return false;
700 : : }
701 : :
702 : : if (!hmap)
703 : : {
704 : : _dosmaperr(errcode);
705 : : ereport(elevel,
706 : : (errcode_for_dynamic_shared_memory(),
707 : : errmsg("could not create shared memory segment \"%s\": %m",
708 : : name)));
709 : : return false;
710 : : }
711 : : }
712 : : else
713 : : {
714 : : hmap = OpenFileMapping(FILE_MAP_WRITE | FILE_MAP_READ,
715 : : FALSE, /* do not inherit the name */
716 : : name); /* name of mapping object */
717 : : if (!hmap)
718 : : {
719 : : _dosmaperr(GetLastError());
720 : : ereport(elevel,
721 : : (errcode_for_dynamic_shared_memory(),
722 : : errmsg("could not open shared memory segment \"%s\": %m",
723 : : name)));
724 : : return false;
725 : : }
726 : : }
727 : :
728 : : /* Map it. */
729 : : address = MapViewOfFile(hmap, FILE_MAP_WRITE | FILE_MAP_READ,
730 : : 0, 0, 0);
731 : : if (!address)
732 : : {
733 : : int save_errno;
734 : :
735 : : _dosmaperr(GetLastError());
736 : : /* Back out what's already been done. */
737 : : save_errno = errno;
738 : : CloseHandle(hmap);
739 : : errno = save_errno;
740 : :
741 : : ereport(elevel,
742 : : (errcode_for_dynamic_shared_memory(),
743 : : errmsg("could not map shared memory segment \"%s\": %m",
744 : : name)));
745 : : return false;
746 : : }
747 : :
748 : : /*
749 : : * VirtualQuery gives size in page_size units, which is 4K for Windows. We
750 : : * need size only when we are attaching, but it's better to get the size
751 : : * when creating new segment to keep size consistent both for
752 : : * DSM_OP_CREATE and DSM_OP_ATTACH.
753 : : */
754 : : if (VirtualQuery(address, &info, sizeof(info)) == 0)
755 : : {
756 : : int save_errno;
757 : :
758 : : _dosmaperr(GetLastError());
759 : : /* Back out what's already been done. */
760 : : save_errno = errno;
761 : : UnmapViewOfFile(address);
762 : : CloseHandle(hmap);
763 : : errno = save_errno;
764 : :
765 : : ereport(elevel,
766 : : (errcode_for_dynamic_shared_memory(),
767 : : errmsg("could not stat shared memory segment \"%s\": %m",
768 : : name)));
769 : : return false;
770 : : }
771 : :
772 : : *mapped_address = address;
773 : : *mapped_size = info.RegionSize;
774 : : *impl_private = hmap;
775 : :
776 : : return true;
777 : : }
778 : : #endif
779 : :
780 : : #ifdef USE_DSM_MMAP
781 : : /*
782 : : * Operating system primitives to support mmap-based shared memory.
783 : : *
784 : : * Calling this "shared memory" is somewhat of a misnomer, because what
785 : : * we're really doing is creating a bunch of files and mapping them into
786 : : * our address space. The operating system may feel obliged to
787 : : * synchronize the contents to disk even if nothing is being paged out,
788 : : * which will not serve us well. The user can relocate the pg_dynshmem
789 : : * directory to a ramdisk to avoid this problem, if available.
790 : : */
791 : : static bool
3821 792 : 0 : dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
793 : : void **impl_private, void **mapped_address, Size *mapped_size,
794 : : int elevel)
795 : : {
796 : : char name[64];
797 : : int flags;
798 : : int fd;
799 : : char *address;
800 : :
3840 801 : 0 : snprintf(name, 64, PG_DYNSHMEM_DIR "/" PG_DYNSHMEM_MMAP_FILE_PREFIX "%u",
802 : : handle);
803 : :
804 : : /* Handle teardown cases. */
805 [ # # # # ]: 0 : if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
806 : : {
807 [ # # ]: 0 : if (*mapped_address != NULL
808 [ # # ]: 0 : && munmap(*mapped_address, *mapped_size) != 0)
809 : : {
810 [ # # ]: 0 : ereport(elevel,
811 : : (errcode_for_dynamic_shared_memory(),
812 : : errmsg("could not unmap shared memory segment \"%s\": %m",
813 : : name)));
814 : 0 : return false;
815 : : }
816 : 0 : *mapped_address = NULL;
817 : 0 : *mapped_size = 0;
818 [ # # # # ]: 0 : if (op == DSM_OP_DESTROY && unlink(name) != 0)
819 : : {
820 [ # # ]: 0 : ereport(elevel,
821 : : (errcode_for_dynamic_shared_memory(),
822 : : errmsg("could not remove shared memory segment \"%s\": %m",
823 : : name)));
824 : 0 : return false;
825 : : }
826 : 0 : return true;
827 : : }
828 : :
829 : : /* Create new segment or open an existing one for attach. */
830 [ # # ]: 0 : flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
2395 peter_e@gmx.net 831 [ # # ]: 0 : if ((fd = OpenTransientFile(name, flags)) == -1)
832 : : {
653 tmunro@postgresql.or 833 [ # # # # ]: 0 : if (op == DSM_OP_ATTACH || errno != EEXIST)
3840 rhaas@postgresql.org 834 [ # # ]: 0 : ereport(elevel,
835 : : (errcode_for_dynamic_shared_memory(),
836 : : errmsg("could not open shared memory segment \"%s\": %m",
837 : : name)));
838 : 0 : return false;
839 : : }
840 : :
841 : : /*
842 : : * If we're attaching the segment, determine the current size; if we are
843 : : * creating the segment, set the size to the requested value.
844 : : */
845 [ # # ]: 0 : if (op == DSM_OP_ATTACH)
846 : : {
847 : : struct stat st;
848 : :
849 [ # # ]: 0 : if (fstat(fd, &st) != 0)
850 : : {
851 : : int save_errno;
852 : :
853 : : /* Back out what's already been done. */
854 : 0 : save_errno = errno;
855 : 0 : CloseTransientFile(fd);
856 : 0 : errno = save_errno;
857 : :
858 [ # # ]: 0 : ereport(elevel,
859 : : (errcode_for_dynamic_shared_memory(),
860 : : errmsg("could not stat shared memory segment \"%s\": %m",
861 : : name)));
862 : 0 : return false;
863 : : }
864 : 0 : request_size = st.st_size;
865 : : }
866 : : else
867 : : {
868 : : /*
869 : : * Allocate a buffer full of zeros.
870 : : *
871 : : * Note: palloc zbuffer, instead of just using a local char array, to
872 : : * ensure it is reasonably well-aligned; this may save a few cycles
873 : : * transferring data to the kernel.
874 : : */
3631 bruce@momjian.us 875 : 0 : char *zbuffer = (char *) palloc0(ZBUFFER_SIZE);
61 heikki.linnakangas@i 876 : 0 : Size remaining = request_size;
3631 bruce@momjian.us 877 : 0 : bool success = true;
878 : :
879 : : /*
880 : : * Zero-fill the file. We have to do this the hard way to ensure that
881 : : * all the file space has really been allocated, so that we don't
882 : : * later seg fault when accessing the memory mapping. This is pretty
883 : : * pessimal.
884 : : */
3840 rhaas@postgresql.org 885 [ # # # # ]: 0 : while (success && remaining > 0)
886 : : {
3631 bruce@momjian.us 887 : 0 : Size goal = remaining;
888 : :
3840 rhaas@postgresql.org 889 [ # # ]: 0 : if (goal > ZBUFFER_SIZE)
890 : 0 : goal = ZBUFFER_SIZE;
2584 891 : 0 : pgstat_report_wait_start(WAIT_EVENT_DSM_FILL_ZERO_WRITE);
3840 892 [ # # ]: 0 : if (write(fd, zbuffer, goal) == goal)
893 : 0 : remaining -= goal;
894 : : else
895 : 0 : success = false;
2584 896 : 0 : pgstat_report_wait_end();
897 : : }
898 : :
3840 899 [ # # ]: 0 : if (!success)
900 : : {
901 : : int save_errno;
902 : :
903 : : /* Back out what's already been done. */
904 : 0 : save_errno = errno;
905 : 0 : CloseTransientFile(fd);
1986 tmunro@postgresql.or 906 : 0 : unlink(name);
3840 rhaas@postgresql.org 907 [ # # ]: 0 : errno = save_errno ? save_errno : ENOSPC;
908 : :
909 [ # # ]: 0 : ereport(elevel,
910 : : (errcode_for_dynamic_shared_memory(),
911 : : errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
912 : : name, request_size)));
913 : 0 : return false;
914 : : }
915 : : }
916 : :
917 : : /* Map it. */
3631 bruce@momjian.us 918 : 0 : address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
919 : : MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
3840 rhaas@postgresql.org 920 [ # # ]: 0 : if (address == MAP_FAILED)
921 : : {
922 : : int save_errno;
923 : :
924 : : /* Back out what's already been done. */
925 : 0 : save_errno = errno;
926 : 0 : CloseTransientFile(fd);
927 [ # # ]: 0 : if (op == DSM_OP_CREATE)
928 : 0 : unlink(name);
929 : 0 : errno = save_errno;
930 : :
931 [ # # ]: 0 : ereport(elevel,
932 : : (errcode_for_dynamic_shared_memory(),
933 : : errmsg("could not map shared memory segment \"%s\": %m",
934 : : name)));
935 : 0 : return false;
936 : : }
937 : 0 : *mapped_address = address;
938 : 0 : *mapped_size = request_size;
939 : :
1744 peter@eisentraut.org 940 [ # # ]: 0 : if (CloseTransientFile(fd) != 0)
941 : : {
1863 michael@paquier.xyz 942 [ # # ]: 0 : ereport(elevel,
943 : : (errcode_for_file_access(),
944 : : errmsg("could not close shared memory segment \"%s\": %m",
945 : : name)));
946 : 0 : return false;
947 : : }
948 : :
3840 rhaas@postgresql.org 949 : 0 : return true;
950 : : }
951 : : #endif
952 : :
953 : : /*
954 : : * Implementation-specific actions that must be performed when a segment is to
955 : : * be preserved even when no backend has it attached.
956 : : *
957 : : * Except on Windows, we don't need to do anything at all. But since Windows
958 : : * cleans up segments automatically when no references remain, we duplicate
959 : : * the segment handle into the postmaster process. The postmaster needn't
960 : : * do anything to receive the handle; Windows transfers it automatically.
961 : : */
962 : : void
2791 rhaas@postgresql.org 963 :CBC 1008 : dsm_impl_pin_segment(dsm_handle handle, void *impl_private,
964 : : void **impl_private_pm_handle)
965 : : {
3688 966 : 1008 : switch (dynamic_shared_memory_type)
967 : : {
968 : : #ifdef USE_DSM_WINDOWS
969 : : case DSM_IMPL_WINDOWS:
970 : : if (IsUnderPostmaster)
971 : : {
972 : : HANDLE hmap;
973 : :
974 : : if (!DuplicateHandle(GetCurrentProcess(), impl_private,
975 : : PostmasterHandle, &hmap, 0, FALSE,
976 : : DUPLICATE_SAME_ACCESS))
977 : : {
978 : : char name[64];
979 : :
980 : : snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
981 : : _dosmaperr(GetLastError());
982 : : ereport(ERROR,
983 : : (errcode_for_dynamic_shared_memory(),
984 : : errmsg("could not duplicate handle for \"%s\": %m",
985 : : name)));
986 : : }
987 : :
988 : : /*
989 : : * Here, we remember the handle that we created in the
990 : : * postmaster process. This handle isn't actually usable in
991 : : * any process other than the postmaster, but that doesn't
992 : : * matter. We're just holding onto it so that, if the segment
993 : : * is unpinned, dsm_impl_unpin_segment can close it.
994 : : */
995 : : *impl_private_pm_handle = hmap;
996 : : }
997 : : break;
998 : : #endif
999 : : default:
2791 1000 : 1008 : break;
1001 : : }
1002 : 1008 : }
1003 : :
1004 : : /*
1005 : : * Implementation-specific actions that must be performed when a segment is no
1006 : : * longer to be preserved, so that it will be cleaned up when all backends
1007 : : * have detached from it.
1008 : : *
1009 : : * Except on Windows, we don't need to do anything at all. For Windows, we
1010 : : * close the extra handle that dsm_impl_pin_segment created in the
1011 : : * postmaster's process space.
1012 : : */
1013 : : void
1014 : 139 : dsm_impl_unpin_segment(dsm_handle handle, void **impl_private)
1015 : : {
1016 : 139 : switch (dynamic_shared_memory_type)
1017 : : {
1018 : : #ifdef USE_DSM_WINDOWS
1019 : : case DSM_IMPL_WINDOWS:
1020 : : if (IsUnderPostmaster)
1021 : : {
1022 : : if (*impl_private &&
1023 : : !DuplicateHandle(PostmasterHandle, *impl_private,
1024 : : NULL, NULL, 0, FALSE,
1025 : : DUPLICATE_CLOSE_SOURCE))
1026 : : {
1027 : : char name[64];
1028 : :
1029 : : snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
1030 : : _dosmaperr(GetLastError());
1031 : : ereport(ERROR,
1032 : : (errcode_for_dynamic_shared_memory(),
1033 : : errmsg("could not duplicate handle for \"%s\": %m",
1034 : : name)));
1035 : : }
1036 : :
1037 : : *impl_private = NULL;
1038 : : }
1039 : : break;
1040 : : #endif
1041 : : default:
3688 1042 : 139 : break;
1043 : : }
1044 : 139 : }
1045 : :
1046 : : static int
3165 andres@anarazel.de 1047 :UBC 0 : errcode_for_dynamic_shared_memory(void)
1048 : : {
3840 rhaas@postgresql.org 1049 [ # # # # ]: 0 : if (errno == EFBIG || errno == ENOMEM)
1481 tgl@sss.pgh.pa.us 1050 : 0 : return errcode(ERRCODE_OUT_OF_MEMORY);
1051 : : else
1052 : 0 : return errcode_for_file_access();
1053 : : }
|