LCOV - differential code coverage report
Current view: top level - src/backend/storage/ipc - dsm_impl.c (source / functions) Coverage Total Hit LBC UIC UBC GBC GIC GNC CBC EUB ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 24.4 % 205 50 4 95 56 6 34 5 5 93 36 5
Current Date: 2023-04-08 15:15:32 Functions: 62.5 % 8 5 3 5 3 5
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * dsm_impl.c
       4                 :  *    manage dynamic shared memory segments
       5                 :  *
       6                 :  * This file provides low-level APIs for creating and destroying shared
       7                 :  * memory segments using several different possible techniques.  We refer
       8                 :  * to these segments as dynamic because they can be created, altered, and
       9                 :  * destroyed at any point during the server life cycle.  This is unlike
      10                 :  * the main shared memory segment, of which there is always exactly one
      11                 :  * and which is always mapped at a fixed address in every PostgreSQL
      12                 :  * background process.
      13                 :  *
      14                 :  * Because not all systems provide the same primitives in this area, nor
      15                 :  * do all primitives behave the same way on all systems, we provide
      16                 :  * several implementations of this facility.  Many systems implement
      17                 :  * POSIX shared memory (shm_open etc.), which is well-suited to our needs
      18                 :  * in this area, with the exception that shared memory identifiers live
      19                 :  * in a flat system-wide namespace, raising the uncomfortable prospect of
      20                 :  * name collisions with other processes (including other copies of
      21                 :  * PostgreSQL) running on the same system.  Some systems only support
      22                 :  * the older System V shared memory interface (shmget etc.) which is
      23                 :  * also usable; however, the default allocation limits are often quite
      24                 :  * small, and the namespace is even more restricted.
      25                 :  *
      26                 :  * We also provide an mmap-based shared memory implementation.  This may
      27                 :  * be useful on systems that provide shared memory via a special-purpose
      28                 :  * filesystem; by opting for this implementation, the user can even
      29                 :  * control precisely where their shared memory segments are placed.  It
      30                 :  * can also be used as a fallback for systems where shm_open and shmget
      31                 :  * are not available or can't be used for some reason.  Of course,
      32                 :  * mapping a file residing on an actual spinning disk is a fairly poor
      33                 :  * approximation for shared memory because writeback may hurt performance
      34                 :  * substantially, but there should be few systems where we must make do
      35                 :  * with such poor tools.
      36                 :  *
      37                 :  * As ever, Windows requires its own implementation.
      38                 :  *
      39                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      40                 :  * Portions Copyright (c) 1994, Regents of the University of California
      41                 :  *
      42                 :  *
      43                 :  * IDENTIFICATION
      44                 :  *    src/backend/storage/ipc/dsm_impl.c
      45                 :  *
      46                 :  *-------------------------------------------------------------------------
      47                 :  */
      48                 : 
      49                 : #include "postgres.h"
      50                 : 
      51                 : #include <fcntl.h>
      52                 : #include <signal.h>
      53                 : #include <unistd.h>
      54                 : #ifndef WIN32
      55                 : #include <sys/mman.h>
      56                 : #include <sys/ipc.h>
      57                 : #include <sys/shm.h>
      58                 : #include <sys/stat.h>
      59                 : #endif
      60                 : 
      61                 : #include "common/file_perm.h"
      62                 : #include "libpq/pqsignal.h"
      63                 : #include "miscadmin.h"
      64                 : #include "pgstat.h"
      65                 : #include "portability/mem.h"
      66                 : #include "postmaster/postmaster.h"
      67                 : #include "storage/dsm_impl.h"
      68                 : #include "storage/fd.h"
      69                 : #include "utils/guc.h"
      70                 : #include "utils/memutils.h"
      71                 : 
      72                 : #ifdef USE_DSM_POSIX
      73                 : static bool dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
      74                 :                            void **impl_private, void **mapped_address,
      75                 :                            Size *mapped_size, int elevel);
      76                 : static int  dsm_impl_posix_resize(int fd, off_t size);
      77                 : #endif
      78                 : #ifdef USE_DSM_SYSV
      79                 : static bool dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
      80                 :                           void **impl_private, void **mapped_address,
      81                 :                           Size *mapped_size, int elevel);
      82                 : #endif
      83                 : #ifdef USE_DSM_WINDOWS
      84                 : static bool dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
      85                 :                              void **impl_private, void **mapped_address,
      86                 :                              Size *mapped_size, int elevel);
      87                 : #endif
      88                 : #ifdef USE_DSM_MMAP
      89                 : static bool dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
      90                 :                           void **impl_private, void **mapped_address,
      91                 :                           Size *mapped_size, int elevel);
      92                 : #endif
      93                 : static int  errcode_for_dynamic_shared_memory(void);
      94                 : 
      95                 : const struct config_enum_entry dynamic_shared_memory_options[] = {
      96                 : #ifdef USE_DSM_POSIX
      97                 :     {"posix", DSM_IMPL_POSIX, false},
      98                 : #endif
      99                 : #ifdef USE_DSM_SYSV
     100                 :     {"sysv", DSM_IMPL_SYSV, false},
     101                 : #endif
     102                 : #ifdef USE_DSM_WINDOWS
     103                 :     {"windows", DSM_IMPL_WINDOWS, false},
     104                 : #endif
     105                 : #ifdef USE_DSM_MMAP
     106                 :     {"mmap", DSM_IMPL_MMAP, false},
     107                 : #endif
     108                 :     {NULL, 0, false}
     109                 : };
     110                 : 
     111                 : /* Implementation selector. */
     112                 : int         dynamic_shared_memory_type = DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE;
     113                 : 
     114                 : /* Amount of space reserved for DSM segments in the main area. */
     115                 : int         min_dynamic_shared_memory;
     116                 : 
     117                 : /* Size of buffer to be used for zero-filling. */
     118                 : #define ZBUFFER_SIZE                8192
     119                 : 
     120                 : #define SEGMENT_NAME_PREFIX         "Global/PostgreSQL"
     121                 : 
     122                 : /*------
     123                 :  * Perform a low-level shared memory operation in a platform-specific way,
     124                 :  * as dictated by the selected implementation.  Each implementation is
     125                 :  * required to implement the following primitives.
     126                 :  *
     127                 :  * DSM_OP_CREATE.  Create a segment whose size is the request_size and
     128                 :  * map it.
     129                 :  *
     130                 :  * DSM_OP_ATTACH.  Map the segment, whose size must be the request_size.
     131                 :  *
     132                 :  * DSM_OP_DETACH.  Unmap the segment.
     133                 :  *
     134                 :  * DSM_OP_DESTROY.  Unmap the segment, if it is mapped.  Destroy the
     135                 :  * segment.
     136                 :  *
     137                 :  * Arguments:
     138                 :  *   op: The operation to be performed.
     139                 :  *   handle: The handle of an existing object, or for DSM_OP_CREATE, the
     140                 :  *     a new handle the caller wants created.
     141                 :  *   request_size: For DSM_OP_CREATE, the requested size.  Otherwise, 0.
     142                 :  *   impl_private: Private, implementation-specific data.  Will be a pointer
     143                 :  *     to NULL for the first operation on a shared memory segment within this
     144                 :  *     backend; thereafter, it will point to the value to which it was set
     145                 :  *     on the previous call.
     146                 :  *   mapped_address: Pointer to start of current mapping; pointer to NULL
     147                 :  *     if none.  Updated with new mapping address.
     148                 :  *   mapped_size: Pointer to size of current mapping; pointer to 0 if none.
     149                 :  *     Updated with new mapped size.
     150                 :  *   elevel: Level at which to log errors.
     151                 :  *
     152                 :  * Return value: true on success, false on failure.  When false is returned,
     153                 :  * a message should first be logged at the specified elevel, except in the
     154                 :  * case where DSM_OP_CREATE experiences a name collision, which should
     155 ECB             :  * silently return false.
     156                 :  *-----
     157                 :  */
     158                 : bool
     159 CBC       37705 : dsm_impl_op(dsm_op op, dsm_handle handle, Size request_size,
     160 ECB             :             void **impl_private, void **mapped_address, Size *mapped_size,
     161                 :             int elevel)
     162                 : {
     163 CBC       37705 :     Assert(op == DSM_OP_CREATE || request_size == 0);
     164 GIC       37705 :     Assert((op != DSM_OP_CREATE && op != DSM_OP_ATTACH) ||
     165                 :            (*mapped_address == NULL && *mapped_size == 0));
     166 ECB             : 
     167 CBC       37705 :     switch (dynamic_shared_memory_type)
     168                 :     {
     169                 : #ifdef USE_DSM_POSIX
     170 GIC       37705 :         case DSM_IMPL_POSIX:
     171 GBC       37705 :             return dsm_impl_posix(op, handle, request_size, impl_private,
     172 EUB             :                                   mapped_address, mapped_size, elevel);
     173                 : #endif
     174                 : #ifdef USE_DSM_SYSV
     175 UIC           0 :         case DSM_IMPL_SYSV:
     176               0 :             return dsm_impl_sysv(op, handle, request_size, impl_private,
     177                 :                                  mapped_address, mapped_size, elevel);
     178                 : #endif
     179                 : #ifdef USE_DSM_WINDOWS
     180                 :         case DSM_IMPL_WINDOWS:
     181 EUB             :             return dsm_impl_windows(op, handle, request_size, impl_private,
     182                 :                                     mapped_address, mapped_size, elevel);
     183                 : #endif
     184                 : #ifdef USE_DSM_MMAP
     185 UBC           0 :         case DSM_IMPL_MMAP:
     186               0 :             return dsm_impl_mmap(op, handle, request_size, impl_private,
     187                 :                                  mapped_address, mapped_size, elevel);
     188                 : #endif
     189 UIC           0 :         default:
     190               0 :             elog(ERROR, "unexpected dynamic shared memory type: %d",
     191                 :                  dynamic_shared_memory_type);
     192                 :             return false;
     193                 :     }
     194                 : }
     195                 : 
     196                 : #ifdef USE_DSM_POSIX
     197                 : /*
     198                 :  * Operating system primitives to support POSIX shared memory.
     199                 :  *
     200                 :  * POSIX shared memory segments are created and attached using shm_open()
     201                 :  * and shm_unlink(); other operations, such as sizing or mapping the
     202                 :  * segment, are performed as if the shared memory segments were files.
     203                 :  *
     204                 :  * Indeed, on some platforms, they may be implemented that way.  While
     205                 :  * POSIX shared memory segments seem intended to exist in a flat namespace,
     206                 :  * some operating systems may implement them as files, even going so far
     207                 :  * to treat a request for /xyz as a request to create a file by that name
     208 ECB             :  * in the root directory.  Users of such broken platforms should select
     209                 :  * a different shared memory implementation.
     210                 :  */
     211                 : static bool
     212 GIC       37705 : dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
     213                 :                void **impl_private, void **mapped_address, Size *mapped_size,
     214                 :                int elevel)
     215                 : {
     216                 :     char        name[64];
     217 ECB             :     int         flags;
     218                 :     int         fd;
     219                 :     char       *address;
     220                 : 
     221 GIC       37705 :     snprintf(name, 64, "/PostgreSQL.%u", handle);
     222 ECB             : 
     223                 :     /* Handle teardown cases. */
     224 GIC       37705 :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     225 EUB             :     {
     226 GIC       19778 :         if (*mapped_address != NULL
     227           17928 :             && munmap(*mapped_address, *mapped_size) != 0)
     228                 :         {
     229 UBC           0 :             ereport(elevel,
     230                 :                     (errcode_for_dynamic_shared_memory(),
     231 ECB             :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     232                 :                             name)));
     233 LBC           0 :             return false;
     234                 :         }
     235 GBC       19778 :         *mapped_address = NULL;
     236 GIC       19778 :         *mapped_size = 0;
     237           19778 :         if (op == DSM_OP_DESTROY && shm_unlink(name) != 0)
     238                 :         {
     239 UBC           0 :             ereport(elevel,
     240                 :                     (errcode_for_dynamic_shared_memory(),
     241 ECB             :                      errmsg("could not remove shared memory segment \"%s\": %m",
     242                 :                             name)));
     243 UIC           0 :             return false;
     244                 :         }
     245 GIC       19778 :         return true;
     246                 :     }
     247                 : 
     248                 :     /*
     249                 :      * Create new segment or open an existing one for attach.
     250                 :      *
     251                 :      * Even though we will close the FD before returning, it seems desirable
     252 ECB             :      * to use Reserve/ReleaseExternalFD, to reduce the probability of EMFILE
     253                 :      * failure.  The fact that we won't hold the FD open long justifies using
     254                 :      * ReserveExternalFD rather than AcquireExternalFD, though.
     255                 :      */
     256 GIC       17927 :     ReserveExternalFD();
     257 EUB             : 
     258 GBC       17927 :     flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
     259           17927 :     if ((fd = shm_open(name, flags, PG_FILE_MODE_OWNER)) == -1)
     260                 :     {
     261 UIC           0 :         ReleaseExternalFD();
     262               0 :         if (op == DSM_OP_ATTACH || errno != EEXIST)
     263 UBC           0 :             ereport(elevel,
     264                 :                     (errcode_for_dynamic_shared_memory(),
     265                 :                      errmsg("could not open shared memory segment \"%s\": %m",
     266                 :                             name)));
     267 UIC           0 :         return false;
     268                 :     }
     269                 : 
     270 ECB             :     /*
     271                 :      * If we're attaching the segment, determine the current size; if we are
     272                 :      * creating the segment, set the size to the requested value.
     273                 :      */
     274 CBC       17927 :     if (op == DSM_OP_ATTACH)
     275                 :     {
     276                 :         struct stat st;
     277                 : 
     278 GIC       14277 :         if (fstat(fd, &st) != 0)
     279 EUB             :         {
     280                 :             int         save_errno;
     281                 : 
     282                 :             /* Back out what's already been done. */
     283 UIC           0 :             save_errno = errno;
     284 UBC           0 :             close(fd);
     285 UIC           0 :             ReleaseExternalFD();
     286               0 :             errno = save_errno;
     287                 : 
     288 UBC           0 :             ereport(elevel,
     289                 :                     (errcode_for_dynamic_shared_memory(),
     290 ECB             :                      errmsg("could not stat shared memory segment \"%s\": %m",
     291                 :                             name)));
     292 LBC           0 :             return false;
     293                 :         }
     294 GIC       14277 :         request_size = st.st_size;
     295                 :     }
     296            3650 :     else if (dsm_impl_posix_resize(fd, request_size) != 0)
     297 EUB             :     {
     298                 :         int         save_errno;
     299                 : 
     300                 :         /* Back out what's already been done. */
     301 UBC           0 :         save_errno = errno;
     302 UIC           0 :         close(fd);
     303 UBC           0 :         ReleaseExternalFD();
     304 UIC           0 :         shm_unlink(name);
     305               0 :         errno = save_errno;
     306                 : 
     307 UBC           0 :         ereport(elevel,
     308                 :                 (errcode_for_dynamic_shared_memory(),
     309                 :                  errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
     310                 :                         name, request_size)));
     311 LBC           0 :         return false;
     312                 :     }
     313 ECB             : 
     314                 :     /* Map it. */
     315 GIC       17927 :     address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
     316                 :                    MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
     317           17927 :     if (address == MAP_FAILED)
     318 EUB             :     {
     319                 :         int         save_errno;
     320                 : 
     321                 :         /* Back out what's already been done. */
     322 UBC           0 :         save_errno = errno;
     323               0 :         close(fd);
     324 UIC           0 :         ReleaseExternalFD();
     325 UBC           0 :         if (op == DSM_OP_CREATE)
     326 UIC           0 :             shm_unlink(name);
     327               0 :         errno = save_errno;
     328                 : 
     329 UBC           0 :         ereport(elevel,
     330                 :                 (errcode_for_dynamic_shared_memory(),
     331 ECB             :                  errmsg("could not map shared memory segment \"%s\": %m",
     332                 :                         name)));
     333 LBC           0 :         return false;
     334 ECB             :     }
     335 GIC       17927 :     *mapped_address = address;
     336 CBC       17927 :     *mapped_size = request_size;
     337 GIC       17927 :     close(fd);
     338           17927 :     ReleaseExternalFD();
     339                 : 
     340           17927 :     return true;
     341                 : }
     342                 : 
     343                 : /*
     344                 :  * Set the size of a virtual memory region associated with a file descriptor.
     345                 :  * If necessary, also ensure that virtual memory is actually allocated by the
     346                 :  * operating system, to avoid nasty surprises later.
     347 ECB             :  *
     348                 :  * Returns non-zero if either truncation or allocation fails, and sets errno.
     349                 :  */
     350                 : static int
     351 GIC        3650 : dsm_impl_posix_resize(int fd, off_t size)
     352                 : {
     353                 :     int         rc;
     354                 :     int         save_errno;
     355                 :     sigset_t    save_sigmask;
     356                 : 
     357                 :     /*
     358                 :      * Block all blockable signals, except SIGQUIT.  posix_fallocate() can run
     359 ECB             :      * for quite a long time, and is an all-or-nothing operation.  If we
     360                 :      * allowed SIGUSR1 to interrupt us repeatedly (for example, due to recovery
     361                 :      * conflicts), the retry loop might never succeed.
     362                 :      */
     363 GIC        3650 :     if (IsUnderPostmaster)
     364            1205 :         sigprocmask(SIG_SETMASK, &BlockSig, &save_sigmask);
     365                 : 
     366 GNC        3650 :     pgstat_report_wait_start(WAIT_EVENT_DSM_ALLOCATE);
     367                 : #if defined(HAVE_POSIX_FALLOCATE) && defined(__linux__)
     368                 :     /*
     369                 :      * On Linux, a shm_open fd is backed by a tmpfs file.  If we were to use
     370                 :      * ftruncate, the file would contain a hole.  Accessing memory backed by a
     371                 :      * hole causes tmpfs to allocate pages, which fails with SIGBUS if there
     372                 :      * is no more tmpfs space available.  So we ask tmpfs to allocate pages
     373                 :      * here, so we can fail gracefully with ENOSPC now rather than risking
     374 ECB             :      * SIGBUS later.
     375                 :      *
     376                 :      * We still use a traditional EINTR retry loop to handle SIGCONT.
     377                 :      * posix_fallocate() doesn't restart automatically, and we don't want
     378                 :      * this to fail if you attach a debugger.
     379                 :      */
     380                 :     do
     381                 :     {
     382 GNC        3650 :         rc = posix_fallocate(fd, 0, size);
     383            3650 :     } while (rc == EINTR);
     384 ECB             : 
     385                 :     /*
     386                 :      * The caller expects errno to be set, but posix_fallocate() doesn't
     387                 :      * set it.  Instead it returns error numbers directly.  So set errno,
     388                 :      * even though we'll also return rc to indicate success or failure.
     389                 :      */
     390 GNC        3650 :     errno = rc;
     391                 : #else
     392                 :     /* Extend the file to the requested size. */
     393                 :     do
     394                 :     {
     395                 :         rc = ftruncate(fd, size);
     396                 :     } while (rc < 0 && errno == EINTR);
     397                 : #endif
     398            3650 :     pgstat_report_wait_end();
     399 ECB             : 
     400 GIC        3650 :     if (IsUnderPostmaster)
     401                 :     {
     402            1205 :         save_errno = errno;
     403            1205 :         sigprocmask(SIG_SETMASK, &save_sigmask, NULL);
     404            1205 :         errno = save_errno;
     405                 :     }
     406                 : 
     407            3650 :     return rc;
     408                 : }
     409                 : 
     410                 : #endif                          /* USE_DSM_POSIX */
     411                 : 
     412                 : #ifdef USE_DSM_SYSV
     413                 : /*
     414 EUB             :  * Operating system primitives to support System V shared memory.
     415                 :  *
     416                 :  * System V shared memory segments are manipulated using shmget(), shmat(),
     417                 :  * shmdt(), and shmctl().  As the default allocation limits for System V
     418                 :  * shared memory are usually quite low, the POSIX facilities may be
     419                 :  * preferable; but those are not supported everywhere.
     420                 :  */
     421                 : static bool
     422 UIC           0 : dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
     423                 :               void **impl_private, void **mapped_address, Size *mapped_size,
     424                 :               int elevel)
     425                 : {
     426                 :     key_t       key;
     427                 :     int         ident;
     428                 :     char       *address;
     429 EUB             :     char        name[64];
     430                 :     int        *ident_cache;
     431                 : 
     432                 :     /*
     433                 :      * POSIX shared memory and mmap-based shared memory identify segments with
     434                 :      * names.  To avoid needless error message variation, we use the handle as
     435                 :      * the name.
     436                 :      */
     437 UIC           0 :     snprintf(name, 64, "%u", handle);
     438                 : 
     439                 :     /*
     440                 :      * The System V shared memory namespace is very restricted; names are of
     441                 :      * type key_t, which is expected to be some sort of integer data type, but
     442                 :      * not necessarily the same one as dsm_handle.  Since we use dsm_handle to
     443                 :      * identify shared memory segments across processes, this might seem like
     444                 :      * a problem, but it's really not.  If dsm_handle is bigger than key_t,
     445                 :      * the cast below might truncate away some bits from the handle the
     446                 :      * user-provided, but it'll truncate exactly the same bits away in exactly
     447 EUB             :      * the same fashion every time we use that handle, which is all that
     448                 :      * really matters.  Conversely, if dsm_handle is smaller than key_t, we
     449                 :      * won't use the full range of available key space, but that's no big deal
     450                 :      * either.
     451                 :      *
     452                 :      * We do make sure that the key isn't negative, because that might not be
     453                 :      * portable.
     454                 :      */
     455 UIC           0 :     key = (key_t) handle;
     456               0 :     if (key < 1)             /* avoid compiler warning if type is unsigned */
     457               0 :         key = -key;
     458 EUB             : 
     459                 :     /*
     460                 :      * There's one special key, IPC_PRIVATE, which can't be used.  If we end
     461                 :      * up with that value by chance during a create operation, just pretend it
     462                 :      * already exists, so that caller will retry.  If we run into it anywhere
     463                 :      * else, the caller has passed a handle that doesn't correspond to
     464                 :      * anything we ever created, which should not happen.
     465                 :      */
     466 UIC           0 :     if (key == IPC_PRIVATE)
     467                 :     {
     468               0 :         if (op != DSM_OP_CREATE)
     469               0 :             elog(DEBUG4, "System V shared memory key may not be IPC_PRIVATE");
     470               0 :         errno = EEXIST;
     471 UBC           0 :         return false;
     472                 :     }
     473 EUB             : 
     474                 :     /*
     475                 :      * Before we can do anything with a shared memory segment, we have to map
     476                 :      * the shared memory key to a shared memory identifier using shmget(). To
     477                 :      * avoid repeated lookups, we store the key using impl_private.
     478                 :      */
     479 UIC           0 :     if (*impl_private != NULL)
     480                 :     {
     481               0 :         ident_cache = *impl_private;
     482               0 :         ident = *ident_cache;
     483                 :     }
     484                 :     else
     485 EUB             :     {
     486 UIC           0 :         int         flags = IPCProtection;
     487                 :         size_t      segsize;
     488                 : 
     489                 :         /*
     490                 :          * Allocate the memory BEFORE acquiring the resource, so that we don't
     491                 :          * leak the resource if memory allocation fails.
     492 EUB             :          */
     493 UIC           0 :         ident_cache = MemoryContextAlloc(TopMemoryContext, sizeof(int));
     494 EUB             : 
     495                 :         /*
     496                 :          * When using shmget to find an existing segment, we must pass the
     497                 :          * size as 0.  Passing a non-zero size which is greater than the
     498                 :          * actual size will result in EINVAL.
     499                 :          */
     500 UBC           0 :         segsize = 0;
     501                 : 
     502               0 :         if (op == DSM_OP_CREATE)
     503                 :         {
     504               0 :             flags |= IPC_CREAT | IPC_EXCL;
     505 UIC           0 :             segsize = request_size;
     506 EUB             :         }
     507                 : 
     508 UBC           0 :         if ((ident = shmget(key, segsize, flags)) == -1)
     509                 :         {
     510 UIC           0 :             if (op == DSM_OP_ATTACH || errno != EEXIST)
     511                 :             {
     512 UBC           0 :                 int         save_errno = errno;
     513                 : 
     514 UIC           0 :                 pfree(ident_cache);
     515 UBC           0 :                 errno = save_errno;
     516               0 :                 ereport(elevel,
     517                 :                         (errcode_for_dynamic_shared_memory(),
     518                 :                          errmsg("could not get shared memory segment: %m")));
     519                 :             }
     520               0 :             return false;
     521                 :         }
     522 EUB             : 
     523 UBC           0 :         *ident_cache = ident;
     524               0 :         *impl_private = ident_cache;
     525                 :     }
     526 EUB             : 
     527                 :     /* Handle teardown cases. */
     528 UIC           0 :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     529                 :     {
     530 UBC           0 :         pfree(ident_cache);
     531 UIC           0 :         *impl_private = NULL;
     532 UBC           0 :         if (*mapped_address != NULL && shmdt(*mapped_address) != 0)
     533 EUB             :         {
     534 UBC           0 :             ereport(elevel,
     535                 :                     (errcode_for_dynamic_shared_memory(),
     536 EUB             :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     537                 :                             name)));
     538 UIC           0 :             return false;
     539                 :         }
     540 UBC           0 :         *mapped_address = NULL;
     541 UIC           0 :         *mapped_size = 0;
     542 UBC           0 :         if (op == DSM_OP_DESTROY && shmctl(ident, IPC_RMID, NULL) < 0)
     543                 :         {
     544 UIC           0 :             ereport(elevel,
     545                 :                     (errcode_for_dynamic_shared_memory(),
     546 EUB             :                      errmsg("could not remove shared memory segment \"%s\": %m",
     547                 :                             name)));
     548 UIC           0 :             return false;
     549                 :         }
     550 UBC           0 :         return true;
     551                 :     }
     552 EUB             : 
     553                 :     /* If we're attaching it, we must use IPC_STAT to determine the size. */
     554 UIC           0 :     if (op == DSM_OP_ATTACH)
     555                 :     {
     556 EUB             :         struct shmid_ds shm;
     557                 : 
     558 UBC           0 :         if (shmctl(ident, IPC_STAT, &shm) != 0)
     559                 :         {
     560 UIC           0 :             ereport(elevel,
     561                 :                     (errcode_for_dynamic_shared_memory(),
     562 EUB             :                      errmsg("could not stat shared memory segment \"%s\": %m",
     563                 :                             name)));
     564 UIC           0 :             return false;
     565                 :         }
     566               0 :         request_size = shm.shm_segsz;
     567                 :     }
     568 EUB             : 
     569                 :     /* Map it. */
     570 UBC           0 :     address = shmat(ident, NULL, PG_SHMAT_FLAGS);
     571               0 :     if (address == (void *) -1)
     572                 :     {
     573 EUB             :         int         save_errno;
     574                 : 
     575                 :         /* Back out what's already been done. */
     576 UIC           0 :         save_errno = errno;
     577 UBC           0 :         if (op == DSM_OP_CREATE)
     578 UIC           0 :             shmctl(ident, IPC_RMID, NULL);
     579 UBC           0 :         errno = save_errno;
     580 EUB             : 
     581 UIC           0 :         ereport(elevel,
     582 EUB             :                 (errcode_for_dynamic_shared_memory(),
     583                 :                  errmsg("could not map shared memory segment \"%s\": %m",
     584                 :                         name)));
     585 UIC           0 :         return false;
     586                 :     }
     587               0 :     *mapped_address = address;
     588               0 :     *mapped_size = request_size;
     589                 : 
     590               0 :     return true;
     591                 : }
     592                 : #endif
     593                 : 
     594                 : #ifdef USE_DSM_WINDOWS
     595                 : /*
     596                 :  * Operating system primitives to support Windows shared memory.
     597                 :  *
     598                 :  * Windows shared memory implementation is done using file mapping
     599                 :  * which can be backed by either physical file or system paging file.
     600                 :  * Current implementation uses system paging file as other effects
     601                 :  * like performance are not clear for physical file and it is used in similar
     602                 :  * way for main shared memory in windows.
     603                 :  *
     604                 :  * A memory mapping object is a kernel object - they always get deleted when
     605                 :  * the last reference to them goes away, either explicitly via a CloseHandle or
     606                 :  * when the process containing the reference exits.
     607                 :  */
     608                 : static bool
     609                 : dsm_impl_windows(dsm_op op, dsm_handle handle, Size request_size,
     610                 :                  void **impl_private, void **mapped_address,
     611                 :                  Size *mapped_size, int elevel)
     612                 : {
     613                 :     char       *address;
     614                 :     HANDLE      hmap;
     615                 :     char        name[64];
     616                 :     MEMORY_BASIC_INFORMATION info;
     617                 : 
     618                 :     /*
     619                 :      * Storing the shared memory segment in the Global\ namespace, can allow
     620                 :      * any process running in any session to access that file mapping object
     621                 :      * provided that the caller has the required access rights. But to avoid
     622                 :      * issues faced in main shared memory, we are using the naming convention
     623                 :      * similar to main shared memory. We can change here once issue mentioned
     624                 :      * in GetSharedMemName is resolved.
     625                 :      */
     626                 :     snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
     627                 : 
     628                 :     /*
     629                 :      * Handle teardown cases.  Since Windows automatically destroys the object
     630                 :      * when no references remain, we can treat it the same as detach.
     631                 :      */
     632                 :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     633                 :     {
     634                 :         if (*mapped_address != NULL
     635                 :             && UnmapViewOfFile(*mapped_address) == 0)
     636                 :         {
     637                 :             _dosmaperr(GetLastError());
     638                 :             ereport(elevel,
     639                 :                     (errcode_for_dynamic_shared_memory(),
     640                 :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     641                 :                             name)));
     642                 :             return false;
     643                 :         }
     644                 :         if (*impl_private != NULL
     645                 :             && CloseHandle(*impl_private) == 0)
     646                 :         {
     647                 :             _dosmaperr(GetLastError());
     648                 :             ereport(elevel,
     649                 :                     (errcode_for_dynamic_shared_memory(),
     650                 :                      errmsg("could not remove shared memory segment \"%s\": %m",
     651                 :                             name)));
     652                 :             return false;
     653                 :         }
     654                 : 
     655                 :         *impl_private = NULL;
     656                 :         *mapped_address = NULL;
     657                 :         *mapped_size = 0;
     658                 :         return true;
     659                 :     }
     660                 : 
     661                 :     /* Create new segment or open an existing one for attach. */
     662                 :     if (op == DSM_OP_CREATE)
     663                 :     {
     664                 :         DWORD       size_high;
     665                 :         DWORD       size_low;
     666                 :         DWORD       errcode;
     667                 : 
     668                 :         /* Shifts >= the width of the type are undefined. */
     669                 : #ifdef _WIN64
     670                 :         size_high = request_size >> 32;
     671                 : #else
     672                 :         size_high = 0;
     673                 : #endif
     674                 :         size_low = (DWORD) request_size;
     675                 : 
     676                 :         /* CreateFileMapping might not clear the error code on success */
     677                 :         SetLastError(0);
     678                 : 
     679                 :         hmap = CreateFileMapping(INVALID_HANDLE_VALUE,  /* Use the pagefile */
     680                 :                                  NULL,  /* Default security attrs */
     681                 :                                  PAGE_READWRITE,    /* Memory is read/write */
     682                 :                                  size_high, /* Upper 32 bits of size */
     683                 :                                  size_low,  /* Lower 32 bits of size */
     684                 :                                  name);
     685                 : 
     686                 :         errcode = GetLastError();
     687                 :         if (errcode == ERROR_ALREADY_EXISTS || errcode == ERROR_ACCESS_DENIED)
     688                 :         {
     689                 :             /*
     690                 :              * On Windows, when the segment already exists, a handle for the
     691                 :              * existing segment is returned.  We must close it before
     692                 :              * returning.  However, if the existing segment is created by a
     693                 :              * service, then it returns ERROR_ACCESS_DENIED. We don't do
     694                 :              * _dosmaperr here, so errno won't be modified.
     695                 :              */
     696                 :             if (hmap)
     697                 :                 CloseHandle(hmap);
     698                 :             return false;
     699                 :         }
     700                 : 
     701                 :         if (!hmap)
     702                 :         {
     703                 :             _dosmaperr(errcode);
     704                 :             ereport(elevel,
     705                 :                     (errcode_for_dynamic_shared_memory(),
     706                 :                      errmsg("could not create shared memory segment \"%s\": %m",
     707                 :                             name)));
     708                 :             return false;
     709                 :         }
     710                 :     }
     711                 :     else
     712                 :     {
     713                 :         hmap = OpenFileMapping(FILE_MAP_WRITE | FILE_MAP_READ,
     714                 :                                FALSE,   /* do not inherit the name */
     715                 :                                name);   /* name of mapping object */
     716                 :         if (!hmap)
     717                 :         {
     718                 :             _dosmaperr(GetLastError());
     719                 :             ereport(elevel,
     720                 :                     (errcode_for_dynamic_shared_memory(),
     721                 :                      errmsg("could not open shared memory segment \"%s\": %m",
     722                 :                             name)));
     723                 :             return false;
     724                 :         }
     725                 :     }
     726                 : 
     727                 :     /* Map it. */
     728                 :     address = MapViewOfFile(hmap, FILE_MAP_WRITE | FILE_MAP_READ,
     729                 :                             0, 0, 0);
     730                 :     if (!address)
     731                 :     {
     732                 :         int         save_errno;
     733                 : 
     734                 :         _dosmaperr(GetLastError());
     735                 :         /* Back out what's already been done. */
     736                 :         save_errno = errno;
     737                 :         CloseHandle(hmap);
     738                 :         errno = save_errno;
     739                 : 
     740                 :         ereport(elevel,
     741                 :                 (errcode_for_dynamic_shared_memory(),
     742                 :                  errmsg("could not map shared memory segment \"%s\": %m",
     743                 :                         name)));
     744                 :         return false;
     745                 :     }
     746                 : 
     747                 :     /*
     748                 :      * VirtualQuery gives size in page_size units, which is 4K for Windows. We
     749                 :      * need size only when we are attaching, but it's better to get the size
     750                 :      * when creating new segment to keep size consistent both for
     751                 :      * DSM_OP_CREATE and DSM_OP_ATTACH.
     752                 :      */
     753                 :     if (VirtualQuery(address, &info, sizeof(info)) == 0)
     754                 :     {
     755                 :         int         save_errno;
     756                 : 
     757                 :         _dosmaperr(GetLastError());
     758                 :         /* Back out what's already been done. */
     759                 :         save_errno = errno;
     760                 :         UnmapViewOfFile(address);
     761                 :         CloseHandle(hmap);
     762                 :         errno = save_errno;
     763                 : 
     764                 :         ereport(elevel,
     765                 :                 (errcode_for_dynamic_shared_memory(),
     766                 :                  errmsg("could not stat shared memory segment \"%s\": %m",
     767                 :                         name)));
     768                 :         return false;
     769                 :     }
     770                 : 
     771                 :     *mapped_address = address;
     772                 :     *mapped_size = info.RegionSize;
     773                 :     *impl_private = hmap;
     774                 : 
     775                 :     return true;
     776                 : }
     777                 : #endif
     778                 : 
     779                 : #ifdef USE_DSM_MMAP
     780                 : /*
     781                 :  * Operating system primitives to support mmap-based shared memory.
     782                 :  *
     783 EUB             :  * Calling this "shared memory" is somewhat of a misnomer, because what
     784                 :  * we're really doing is creating a bunch of files and mapping them into
     785                 :  * our address space.  The operating system may feel obliged to
     786                 :  * synchronize the contents to disk even if nothing is being paged out,
     787                 :  * which will not serve us well.  The user can relocate the pg_dynshmem
     788                 :  * directory to a ramdisk to avoid this problem, if available.
     789                 :  */
     790                 : static bool
     791 UIC           0 : dsm_impl_mmap(dsm_op op, dsm_handle handle, Size request_size,
     792 EUB             :               void **impl_private, void **mapped_address, Size *mapped_size,
     793                 :               int elevel)
     794                 : {
     795                 :     char        name[64];
     796                 :     int         flags;
     797                 :     int         fd;
     798                 :     char       *address;
     799                 : 
     800 UIC           0 :     snprintf(name, 64, PG_DYNSHMEM_DIR "/" PG_DYNSHMEM_MMAP_FILE_PREFIX "%u",
     801 EUB             :              handle);
     802                 : 
     803                 :     /* Handle teardown cases. */
     804 UIC           0 :     if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
     805 EUB             :     {
     806 UIC           0 :         if (*mapped_address != NULL
     807 UBC           0 :             && munmap(*mapped_address, *mapped_size) != 0)
     808 EUB             :         {
     809 UBC           0 :             ereport(elevel,
     810                 :                     (errcode_for_dynamic_shared_memory(),
     811 EUB             :                      errmsg("could not unmap shared memory segment \"%s\": %m",
     812                 :                             name)));
     813 UIC           0 :             return false;
     814                 :         }
     815 UBC           0 :         *mapped_address = NULL;
     816 UIC           0 :         *mapped_size = 0;
     817 UBC           0 :         if (op == DSM_OP_DESTROY && unlink(name) != 0)
     818                 :         {
     819 UIC           0 :             ereport(elevel,
     820                 :                     (errcode_for_dynamic_shared_memory(),
     821 EUB             :                      errmsg("could not remove shared memory segment \"%s\": %m",
     822                 :                             name)));
     823 UIC           0 :             return false;
     824 EUB             :         }
     825 UBC           0 :         return true;
     826                 :     }
     827                 : 
     828                 :     /* Create new segment or open an existing one for attach. */
     829               0 :     flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
     830 UIC           0 :     if ((fd = OpenTransientFile(name, flags)) == -1)
     831                 :     {
     832               0 :         if (op == DSM_OP_ATTACH || errno != EEXIST)
     833               0 :             ereport(elevel,
     834                 :                     (errcode_for_dynamic_shared_memory(),
     835                 :                      errmsg("could not open shared memory segment \"%s\": %m",
     836 EUB             :                             name)));
     837 UIC           0 :         return false;
     838                 :     }
     839                 : 
     840 EUB             :     /*
     841                 :      * If we're attaching the segment, determine the current size; if we are
     842                 :      * creating the segment, set the size to the requested value.
     843                 :      */
     844 UIC           0 :     if (op == DSM_OP_ATTACH)
     845 EUB             :     {
     846                 :         struct stat st;
     847                 : 
     848 UIC           0 :         if (fstat(fd, &st) != 0)
     849 EUB             :         {
     850                 :             int         save_errno;
     851                 : 
     852                 :             /* Back out what's already been done. */
     853 UBC           0 :             save_errno = errno;
     854 UIC           0 :             CloseTransientFile(fd);
     855 UBC           0 :             errno = save_errno;
     856                 : 
     857 UIC           0 :             ereport(elevel,
     858                 :                     (errcode_for_dynamic_shared_memory(),
     859                 :                      errmsg("could not stat shared memory segment \"%s\": %m",
     860                 :                             name)));
     861               0 :             return false;
     862                 :         }
     863               0 :         request_size = st.st_size;
     864                 :     }
     865                 :     else
     866 EUB             :     {
     867                 :         /*
     868                 :          * Allocate a buffer full of zeros.
     869                 :          *
     870                 :          * Note: palloc zbuffer, instead of just using a local char array, to
     871                 :          * ensure it is reasonably well-aligned; this may save a few cycles
     872                 :          * transferring data to the kernel.
     873                 :          */
     874 UIC           0 :         char       *zbuffer = (char *) palloc0(ZBUFFER_SIZE);
     875               0 :         uint32      remaining = request_size;
     876 UBC           0 :         bool        success = true;
     877                 : 
     878 EUB             :         /*
     879                 :          * Zero-fill the file. We have to do this the hard way to ensure that
     880                 :          * all the file space has really been allocated, so that we don't
     881                 :          * later seg fault when accessing the memory mapping.  This is pretty
     882                 :          * pessimal.
     883                 :          */
     884 UBC           0 :         while (success && remaining > 0)
     885                 :         {
     886               0 :             Size        goal = remaining;
     887 EUB             : 
     888 UIC           0 :             if (goal > ZBUFFER_SIZE)
     889               0 :                 goal = ZBUFFER_SIZE;
     890 UBC           0 :             pgstat_report_wait_start(WAIT_EVENT_DSM_FILL_ZERO_WRITE);
     891 UIC           0 :             if (write(fd, zbuffer, goal) == goal)
     892               0 :                 remaining -= goal;
     893                 :             else
     894               0 :                 success = false;
     895 UBC           0 :             pgstat_report_wait_end();
     896 EUB             :         }
     897                 : 
     898 UBC           0 :         if (!success)
     899                 :         {
     900 EUB             :             int         save_errno;
     901                 : 
     902                 :             /* Back out what's already been done. */
     903 UIC           0 :             save_errno = errno;
     904 UBC           0 :             CloseTransientFile(fd);
     905 UIC           0 :             unlink(name);
     906               0 :             errno = save_errno ? save_errno : ENOSPC;
     907                 : 
     908               0 :             ereport(elevel,
     909 EUB             :                     (errcode_for_dynamic_shared_memory(),
     910                 :                      errmsg("could not resize shared memory segment \"%s\" to %zu bytes: %m",
     911                 :                             name, request_size)));
     912 UIC           0 :             return false;
     913                 :         }
     914                 :     }
     915                 : 
     916 EUB             :     /* Map it. */
     917 UBC           0 :     address = mmap(NULL, request_size, PROT_READ | PROT_WRITE,
     918 EUB             :                    MAP_SHARED | MAP_HASSEMAPHORE | MAP_NOSYNC, fd, 0);
     919 UBC           0 :     if (address == MAP_FAILED)
     920 EUB             :     {
     921                 :         int         save_errno;
     922                 : 
     923                 :         /* Back out what's already been done. */
     924 UIC           0 :         save_errno = errno;
     925               0 :         CloseTransientFile(fd);
     926 UBC           0 :         if (op == DSM_OP_CREATE)
     927 UIC           0 :             unlink(name);
     928 UBC           0 :         errno = save_errno;
     929 EUB             : 
     930 UIC           0 :         ereport(elevel,
     931 EUB             :                 (errcode_for_dynamic_shared_memory(),
     932                 :                  errmsg("could not map shared memory segment \"%s\": %m",
     933                 :                         name)));
     934 UIC           0 :         return false;
     935                 :     }
     936               0 :     *mapped_address = address;
     937 UBC           0 :     *mapped_size = request_size;
     938                 : 
     939 UIC           0 :     if (CloseTransientFile(fd) != 0)
     940 EUB             :     {
     941 UIC           0 :         ereport(elevel,
     942                 :                 (errcode_for_file_access(),
     943                 :                  errmsg("could not close shared memory segment \"%s\": %m",
     944                 :                         name)));
     945               0 :         return false;
     946                 :     }
     947                 : 
     948               0 :     return true;
     949                 : }
     950                 : #endif
     951                 : 
     952                 : /*
     953                 :  * Implementation-specific actions that must be performed when a segment is to
     954 ECB             :  * be preserved even when no backend has it attached.
     955                 :  *
     956                 :  * Except on Windows, we don't need to do anything at all.  But since Windows
     957                 :  * cleans up segments automatically when no references remain, we duplicate
     958                 :  * the segment handle into the postmaster process.  The postmaster needn't
     959                 :  * do anything to receive the handle; Windows transfers it automatically.
     960                 :  */
     961                 : void
     962 GIC        1350 : dsm_impl_pin_segment(dsm_handle handle, void *impl_private,
     963                 :                      void **impl_private_pm_handle)
     964                 : {
     965            1350 :     switch (dynamic_shared_memory_type)
     966                 :     {
     967                 : #ifdef USE_DSM_WINDOWS
     968                 :         case DSM_IMPL_WINDOWS:
     969                 :             if (IsUnderPostmaster)
     970                 :             {
     971                 :                 HANDLE      hmap;
     972                 : 
     973                 :                 if (!DuplicateHandle(GetCurrentProcess(), impl_private,
     974                 :                                      PostmasterHandle, &hmap, 0, FALSE,
     975                 :                                      DUPLICATE_SAME_ACCESS))
     976                 :                 {
     977                 :                     char        name[64];
     978                 : 
     979                 :                     snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
     980                 :                     _dosmaperr(GetLastError());
     981                 :                     ereport(ERROR,
     982                 :                             (errcode_for_dynamic_shared_memory(),
     983                 :                              errmsg("could not duplicate handle for \"%s\": %m",
     984                 :                                     name)));
     985                 :                 }
     986                 : 
     987                 :                 /*
     988                 :                  * Here, we remember the handle that we created in the
     989                 :                  * postmaster process.  This handle isn't actually usable in
     990                 :                  * any process other than the postmaster, but that doesn't
     991 ECB             :                  * matter.  We're just holding onto it so that, if the segment
     992                 :                  * is unpinned, dsm_impl_unpin_segment can close it.
     993                 :                  */
     994                 :                 *impl_private_pm_handle = hmap;
     995                 :             }
     996                 :             break;
     997                 : #endif
     998                 :         default:
     999 GIC        1350 :             break;
    1000                 :     }
    1001            1350 : }
    1002                 : 
    1003                 : /*
    1004                 :  * Implementation-specific actions that must be performed when a segment is no
    1005 ECB             :  * longer to be preserved, so that it will be cleaned up when all backends
    1006                 :  * have detached from it.
    1007                 :  *
    1008                 :  * Except on Windows, we don't need to do anything at all.  For Windows, we
    1009                 :  * close the extra handle that dsm_impl_pin_segment created in the
    1010                 :  * postmaster's process space.
    1011                 :  */
    1012                 : void
    1013 GIC         114 : dsm_impl_unpin_segment(dsm_handle handle, void **impl_private)
    1014                 : {
    1015             114 :     switch (dynamic_shared_memory_type)
    1016                 :     {
    1017                 : #ifdef USE_DSM_WINDOWS
    1018                 :         case DSM_IMPL_WINDOWS:
    1019                 :             if (IsUnderPostmaster)
    1020                 :             {
    1021                 :                 if (*impl_private &&
    1022                 :                     !DuplicateHandle(PostmasterHandle, *impl_private,
    1023                 :                                      NULL, NULL, 0, FALSE,
    1024                 :                                      DUPLICATE_CLOSE_SOURCE))
    1025                 :                 {
    1026                 :                     char        name[64];
    1027                 : 
    1028                 :                     snprintf(name, 64, "%s.%u", SEGMENT_NAME_PREFIX, handle);
    1029                 :                     _dosmaperr(GetLastError());
    1030                 :                     ereport(ERROR,
    1031                 :                             (errcode_for_dynamic_shared_memory(),
    1032                 :                              errmsg("could not duplicate handle for \"%s\": %m",
    1033 ECB             :                                     name)));
    1034                 :                 }
    1035                 : 
    1036                 :                 *impl_private = NULL;
    1037                 :             }
    1038 EUB             :             break;
    1039                 : #endif
    1040                 :         default:
    1041 GBC         114 :             break;
    1042                 :     }
    1043             114 : }
    1044                 : 
    1045                 : static int
    1046 UIC           0 : errcode_for_dynamic_shared_memory(void)
    1047                 : {
    1048               0 :     if (errno == EFBIG || errno == ENOMEM)
    1049               0 :         return errcode(ERRCODE_OUT_OF_MEMORY);
    1050                 :     else
    1051               0 :         return errcode_for_file_access();
    1052                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a