Allow dynamic allocation of shared memory segments.
authorRobert Haas <rhaas@postgresql.org>
Thu, 10 Oct 2013 01:05:02 +0000 (21:05 -0400)
committerRobert Haas <rhaas@postgresql.org>
Thu, 10 Oct 2013 01:05:02 +0000 (21:05 -0400)
Patch by myself and Amit Kapila.  Design help from Noah Misch.  Review
by Andres Freund.

18 files changed:
configure
configure.in
doc/src/sgml/config.sgml
src/backend/port/sysv_shmem.c
src/backend/storage/ipc/Makefile
src/backend/storage/ipc/dsm.c [new file with mode: 0644]
src/backend/storage/ipc/dsm_impl.c [new file with mode: 0644]
src/backend/storage/ipc/ipci.c
src/backend/utils/misc/guc.c
src/backend/utils/misc/postgresql.conf.sample
src/backend/utils/resowner/resowner.c
src/bin/initdb/initdb.c
src/include/pg_config.h.in
src/include/portability/mem.h [new file with mode: 0644]
src/include/storage/dsm.h [new file with mode: 0644]
src/include/storage/dsm_impl.h [new file with mode: 0644]
src/include/storage/lwlock.h
src/include/utils/resowner_private.h

index c685ca3f918c9a5bdecf1ad7e83933073015824c..97d2f68956af39edf017726be968674ad01a34e9 100755 (executable)
--- a/configure
+++ b/configure
@@ -8384,6 +8384,180 @@ if test "$ac_res" != no; then
 
 fi
 
+{ $as_echo "$as_me:$LINENO: checking for library containing shm_open" >&5
+$as_echo_n "checking for library containing shm_open... " >&6; }
+if test "${ac_cv_search_shm_open+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char shm_open ();
+int
+main ()
+{
+return shm_open ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' rt; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+    test -z "$ac_c_werror_flag" ||
+    test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+    test "$cross_compiling" = yes ||
+    $as_test_x conftest$ac_exeext
+       }; then
+  ac_cv_search_shm_open=$ac_res
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+      conftest$ac_exeext
+  if test "${ac_cv_search_shm_open+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_shm_open+set}" = set; then
+  :
+else
+  ac_cv_search_shm_open=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_search_shm_open" >&5
+$as_echo "$ac_cv_search_shm_open" >&6; }
+ac_res=$ac_cv_search_shm_open
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+fi
+
+{ $as_echo "$as_me:$LINENO: checking for library containing shm_unlink" >&5
+$as_echo_n "checking for library containing shm_unlink... " >&6; }
+if test "${ac_cv_search_shm_unlink+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char shm_unlink ();
+int
+main ()
+{
+return shm_unlink ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' rt; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+    test -z "$ac_c_werror_flag" ||
+    test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+    test "$cross_compiling" = yes ||
+    $as_test_x conftest$ac_exeext
+       }; then
+  ac_cv_search_shm_unlink=$ac_res
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+      conftest$ac_exeext
+  if test "${ac_cv_search_shm_unlink+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_shm_unlink+set}" = set; then
+  :
+else
+  ac_cv_search_shm_unlink=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_search_shm_unlink" >&5
+$as_echo "$ac_cv_search_shm_unlink" >&6; }
+ac_res=$ac_cv_search_shm_unlink
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+fi
+
 # Solaris:
 { $as_echo "$as_me:$LINENO: checking for library containing fdatasync" >&5
 $as_echo_n "checking for library containing fdatasync... " >&6; }
@@ -19763,7 +19937,8 @@ LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
 
 
 
-for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l
+
+for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat readlink setproctitle setsid shm_open sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l
 do
 as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
 { $as_echo "$as_me:$LINENO: checking for $ac_func" >&5
index 82771bddb12ab584c9ba4f42e8f39570372fe100..ead0908fd9a1ba51ff53b81548cbef6e14ded12d 100644 (file)
@@ -883,6 +883,8 @@ case $host_os in
 esac
 AC_SEARCH_LIBS(getopt_long, [getopt gnugetopt])
 AC_SEARCH_LIBS(crypt, crypt)
+AC_SEARCH_LIBS(shm_open, rt)
+AC_SEARCH_LIBS(shm_unlink, rt)
 # Solaris:
 AC_SEARCH_LIBS(fdatasync, [rt posix4])
 # Required for thread_test.c on Solaris 2.5:
@@ -1230,7 +1232,7 @@ PGAC_FUNC_GETTIMEOFDAY_1ARG
 LIBS_including_readline="$LIBS"
 LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
 
-AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l])
+AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat readlink setproctitle setsid shm_open sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l])
 
 AC_REPLACE_FUNCS(fseeko)
 case $host_os in
index e8e8e6f8fcde7987c8606dce23c201346cd9a778..77a9303933dd6cf47861ef2a538682bcc6c51216 100644 (file)
@@ -1194,6 +1194,32 @@ include 'filename'
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-dynamic-shared-memory-type" xreflabel="dynamic_shared_memory_type">
+      <term><varname>dynamic_shared_memory_type</varname> (<type>enum</type>)</term>
+      <indexterm>
+       <primary><varname>dynamic_shared_memory_type</> configuration parameter</primary>
+      </indexterm>
+      <listitem>
+       <para>
+        Specifies the dynamic shared memory implementation that the server
+        should use.  Possible values are <literal>posix</> (for POSIX shared
+        memory allocated using <literal>shm_open</>), <literal>sysv</literal>
+        (for System V shared memory allocated via <literal>shmget</>),
+        <literal>windows</> (for Windows shared memory), <literal>mmap</>
+        (to simulate shared memory using memory-mapped files stored in the
+        data directory), and <literal>none</> (to disable this feature).
+        Not all values are supported on all platforms; the first supported
+        option is the default for that platform.  The use of the
+        <literal>mmap</> option, which is not the default on any platform,
+        is generally discouraged because the operating system may write
+        modified pages back to disk repeatedly, increasing system I/O load;
+        however, it may be useful for debugging, when the
+        <literal>pg_dynshmem</> directory is stored on a RAM disk, or when
+        other shared memory facilities are not available.
+       </para>
+      </listitem>
+     </varlistentry>
+
      </variablelist>
      </sect2>
 
index 20e3c321abd2cd8a81dc25dff59fabc57106523f..b604407999c14538c97ddd07f39439fedac8f990 100644 (file)
@@ -29,6 +29,7 @@
 #endif
 
 #include "miscadmin.h"
+#include "portability/mem.h"
 #include "storage/ipc.h"
 #include "storage/pg_shmem.h"
 
 typedef key_t IpcMemoryKey;        /* shared memory key passed to shmget(2) */
 typedef int IpcMemoryId;       /* shared memory ID returned by shmget(2) */
 
-#define IPCProtection  (0600)  /* access/modify by user only */
-
-#ifdef SHM_SHARE_MMU           /* use intimate shared memory on Solaris */
-#define PG_SHMAT_FLAGS         SHM_SHARE_MMU
-#else
-#define PG_SHMAT_FLAGS         0
-#endif
-
-/* Linux prefers MAP_ANONYMOUS, but the flag is called MAP_ANON on other systems. */
-#ifndef MAP_ANONYMOUS
-#define MAP_ANONYMOUS          MAP_ANON
-#endif
-
-/* BSD-derived systems have MAP_HASSEMAPHORE, but it's not present (or needed) on Linux. */
-#ifndef MAP_HASSEMAPHORE
-#define MAP_HASSEMAPHORE       0
-#endif
-
-#define PG_MMAP_FLAGS          (MAP_SHARED|MAP_ANONYMOUS|MAP_HASSEMAPHORE)
-
-/* Some really old systems don't define MAP_FAILED. */
-#ifndef MAP_FAILED
-#define MAP_FAILED ((void *) -1)
-#endif
-
 
 unsigned long UsedShmemSegID = 0;
 void      *UsedShmemSegAddr = NULL;
index 743f30e1c7389a6290200ad5b85c8460e72afbb2..873dd60dbf4630387f75cbdfc3f1f00d326a438b 100644 (file)
@@ -15,7 +15,7 @@ override CFLAGS+= -fno-inline
 endif
 endif
 
-OBJS = ipc.o ipci.o pmsignal.o procarray.o procsignal.o shmem.o shmqueue.o \
-   sinval.o sinvaladt.o standby.o
+OBJS = dsm_impl.o dsm.o ipc.o ipci.o pmsignal.o procarray.o procsignal.o \
+   shmem.o shmqueue.o sinval.o sinvaladt.o standby.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/storage/ipc/dsm.c b/src/backend/storage/ipc/dsm.c
new file mode 100644 (file)
index 0000000..e516197
--- /dev/null
@@ -0,0 +1,972 @@
+/*-------------------------------------------------------------------------
+ *
+ * dsm.c
+ *   manage dynamic shared memory segments
+ *
+ * This file provides a set of services to make programming with dynamic
+ * shared memory segments more convenient.  Unlike the low-level
+ * facilities provided by dsm_impl.h and dsm_impl.c, mappings and segments
+ * created using this module will be cleaned up automatically.  Mappings
+ * will be removed when the resource owner under which they were created
+ * is cleaned up, unless dsm_keep_mapping() is used, in which case they
+ * have session lifespan.  Segments will be removed when there are no
+ * remaining mappings, or at postmaster shutdown in any case.  After a
+ * hard postmaster crash, remaining segments will be removed, if they
+ * still exist, at the next postmaster startup.
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *   src/backend/storage/ipc/dsm.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#ifndef WIN32
+#include <sys/mman.h>
+#endif
+#include <sys/stat.h>
+
+#include "lib/ilist.h"
+#include "miscadmin.h"
+#include "storage/dsm.h"
+#include "storage/ipc.h"
+#include "storage/lwlock.h"
+#include "utils/guc.h"
+#include "utils/memutils.h"
+#include "utils/resowner_private.h"
+
+#define PG_DYNSHMEM_STATE_FILE         PG_DYNSHMEM_DIR "/state"
+#define PG_DYNSHMEM_NEW_STATE_FILE     PG_DYNSHMEM_DIR "/state.new"
+#define PG_DYNSHMEM_STATE_BUFSIZ       512
+#define PG_DYNSHMEM_CONTROL_MAGIC      0x9a503d32
+
+/*
+ * There's no point in getting too cheap here, because the minimum allocation
+ * is one OS page, which is probably at least 4KB and could easily be as high
+ * as 64KB.  Each currently sizeof(dsm_control_item), currently 8 bytes.
+ */
+#define PG_DYNSHMEM_FIXED_SLOTS            64
+#define PG_DYNSHMEM_SLOTS_PER_BACKEND  2
+
+#define INVALID_CONTROL_SLOT       ((uint32) -1)
+
+/* Backend-local state for a dynamic shared memory segment. */
+struct dsm_segment
+{
+   dlist_node  node;               /* List link in dsm_segment_list. */
+   ResourceOwner resowner;         /* Resource owner. */
+   dsm_handle  handle;             /* Segment name. */
+   uint32      control_slot;       /* Slot in control segment. */
+   void       *impl_private;       /* Implementation-specific private data. */
+   void       *mapped_address;     /* Mapping address, or NULL if unmapped. */
+   uint64      mapped_size;        /* Size of our mapping. */
+};
+
+/* Shared-memory state for a dynamic shared memory segment. */
+typedef struct dsm_control_item
+{
+   dsm_handle  handle;
+   uint32      refcnt;             /* 2+ = active, 1 = moribund, 0 = gone */
+} dsm_control_item;
+
+/* Layout of the dynamic shared memory control segment. */
+typedef struct dsm_control_header
+{
+   uint32      magic;
+   uint32      nitems;
+   uint32      maxitems;
+   dsm_control_item    item[FLEXIBLE_ARRAY_MEMBER];
+} dsm_control_header;
+
+static void dsm_cleanup_using_control_segment(void);
+static void dsm_cleanup_for_mmap(void);
+static bool dsm_read_state_file(dsm_handle *h);
+static void dsm_write_state_file(dsm_handle h);
+static void dsm_postmaster_shutdown(int code, Datum arg);
+static void dsm_backend_shutdown(int code, Datum arg);
+static dsm_segment *dsm_create_descriptor(void);
+static bool dsm_control_segment_sane(dsm_control_header *control,
+                        uint64 mapped_size);
+static uint64 dsm_control_bytes_needed(uint32 nitems);
+
+/* Has this backend initialized the dynamic shared memory system yet? */
+static bool dsm_init_done = false;
+
+/*
+ * List of dynamic shared memory segments used by this backend.
+ *
+ * At process exit time, we must decrement the reference count of each
+ * segment we have attached; this list makes it possible to find all such
+ * segments.
+ *
+ * This list should always be empty in the postmaster.  We could probably
+ * allow the postmaster to map dynamic shared memory segments before it
+ * begins to start child processes, provided that each process adjusted
+ * the reference counts for those segments in the control segment at
+ * startup time, but there's no obvious need for such a facility, which
+ * would also be complex to handle in the EXEC_BACKEND case.  Once the
+ * postmaster has begun spawning children, there's an additional problem:
+ * each new mapping would require an update to the control segment,
+ * which requires locking, in which the postmaster must not be involved.
+ */
+static dlist_head dsm_segment_list = DLIST_STATIC_INIT(dsm_segment_list);
+
+/*
+ * Control segment information.
+ *
+ * Unlike ordinary shared memory segments, the control segment is not
+ * reference counted; instead, it lasts for the postmaster's entire
+ * life cycle.  For simplicity, it doesn't have a dsm_segment object either.
+ */
+static dsm_handle dsm_control_handle;
+static dsm_control_header *dsm_control;
+static uint64 dsm_control_mapped_size = 0;
+static void    *dsm_control_impl_private = NULL;
+
+/*
+ * Start up the dynamic shared memory system.
+ *
+ * This is called just once during each cluster lifetime, at postmaster
+ * startup time.
+ */
+void
+dsm_postmaster_startup(void)
+{
+   void       *dsm_control_address = NULL;
+   uint32      maxitems;
+   uint64      segsize;
+
+   Assert(!IsUnderPostmaster);
+
+   /* If dynamic shared memory is disabled, there's nothing to do. */
+   if (dynamic_shared_memory_type == DSM_IMPL_NONE)
+       return;
+
+   /*
+    * Check for, and remove, shared memory segments left behind by a dead
+    * postmaster.  This isn't necessary on Windows, which always removes them
+    * when the last reference is gone.
+    */
+   switch (dynamic_shared_memory_type)
+   {
+       case DSM_IMPL_POSIX:
+       case DSM_IMPL_SYSV:
+           dsm_cleanup_using_control_segment();
+           break;
+       case DSM_IMPL_MMAP:
+           dsm_cleanup_for_mmap();
+           break;
+       case DSM_IMPL_WINDOWS:
+           /* Nothing to do. */
+           break;
+       default:
+           elog(ERROR, "unknown dynamic shared memory type: %d",
+                dynamic_shared_memory_type);
+   }
+
+   /* Determine size for new control segment. */
+   maxitems = PG_DYNSHMEM_FIXED_SLOTS
+       + PG_DYNSHMEM_SLOTS_PER_BACKEND * MaxBackends;
+   elog(DEBUG2, "dynamic shared memory system will support %u segments",
+       maxitems);
+   segsize = dsm_control_bytes_needed(maxitems);
+
+   /* Loop until we find an unused identifier for the new control segment. */
+   for (;;)
+   {
+       Assert(dsm_control_address == NULL);
+       Assert(dsm_control_mapped_size == 0);
+       dsm_control_handle = random();
+       if (dsm_impl_op(DSM_OP_CREATE, dsm_control_handle, segsize,
+                       &dsm_control_impl_private, &dsm_control_address,
+                       &dsm_control_mapped_size, ERROR))
+           break;
+   }
+   dsm_control = dsm_control_address;
+   on_shmem_exit(dsm_postmaster_shutdown, 0);
+   elog(DEBUG2, "created dynamic shared memory control segment %u ("
+       UINT64_FORMAT " bytes)", dsm_control_handle, segsize);
+   dsm_write_state_file(dsm_control_handle);
+
+   /* Initialize control segment. */
+   dsm_control->magic = PG_DYNSHMEM_CONTROL_MAGIC;
+   dsm_control->nitems = 0;
+   dsm_control->maxitems = maxitems;
+}
+
+/*
+ * Determine whether the control segment from the previous postmaster
+ * invocation still exists.  If so, remove the dynamic shared memory
+ * segments to which it refers, and then the control segment itself.
+ */
+static void
+dsm_cleanup_using_control_segment(void)
+{
+   void       *mapped_address = NULL;
+   void       *junk_mapped_address = NULL;
+   void       *impl_private = NULL;
+   void       *junk_impl_private = NULL;
+   uint64      mapped_size = 0;
+   uint64      junk_mapped_size = 0;
+   uint32      nitems;
+   uint32      i;
+   dsm_handle  old_control_handle;
+   dsm_control_header *old_control;
+
+   /*
+    * Read the state file.  If it doesn't exist or is empty, there's nothing
+    * more to do.
+    */
+   if (!dsm_read_state_file(&old_control_handle))
+       return;
+
+   /*
+    * Try to attach the segment.  If this fails, it probably just means that
+    * the operating system has been rebooted and the segment no longer exists,
+    * or an unrelated proces has used the same shm ID.  So just fall out
+    * quietly.
+    */
+   if (!dsm_impl_op(DSM_OP_ATTACH, old_control_handle, 0, &impl_private,
+                    &mapped_address, &mapped_size, DEBUG1))
+       return;
+
+   /*
+    * We've managed to reattach it, but the contents might not be sane.
+    * If they aren't, we disregard the segment after all.
+    */
+   old_control = (dsm_control_header *) mapped_address;
+   if (!dsm_control_segment_sane(old_control, mapped_size))
+   {
+       dsm_impl_op(DSM_OP_DETACH, old_control_handle, 0, &impl_private,
+                   &mapped_address, &mapped_size, LOG);
+       return;
+   }
+
+   /*
+    * OK, the control segment looks basically valid, so we can get use
+    * it to get a list of segments that need to be removed.
+    */
+   nitems = old_control->nitems;
+   for (i = 0; i < nitems; ++i)
+   {
+       dsm_handle      handle;
+       uint32          refcnt;
+
+       /* If the reference count is 0, the slot is actually unused. */
+       refcnt = old_control->item[i].refcnt;
+       if (refcnt == 0)
+           continue;
+
+       /* Log debugging information. */
+       handle = old_control->item[i].handle;
+       elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u (reference count %u)",
+           handle, refcnt);
+
+       /* Destroy the referenced segment. */
+       dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
+                   &junk_mapped_address, &junk_mapped_size, LOG);
+   }
+
+   /* Destroy the old control segment, too. */
+   elog(DEBUG2,
+        "cleaning up dynamic shared memory control segment with ID %u",
+        old_control_handle);
+   dsm_impl_op(DSM_OP_DESTROY, old_control_handle, 0, &impl_private,
+               &mapped_address, &mapped_size, LOG);
+}
+
+/*
+ * When we're using the mmap shared memory implementation, "shared memory"
+ * segments might even manage to survive an operating system reboot.
+ * But there's no guarantee as to exactly what will survive: some segments
+ * may survive, and others may not, and the contents of some may be out
+ * of date.  In particular, the control segment may be out of date, so we
+ * can't rely on it to figure out what to remove.  However, since we know
+ * what directory contains the files we used as shared memory, we can simply
+ * scan the directory and blow everything away that shouldn't be there.
+ */
+static void
+dsm_cleanup_for_mmap(void)
+{
+   DIR    *dir;
+   struct dirent *dent;
+
+   /* Open the directory; can't use AllocateDir in postmaster. */
+   if ((dir = opendir(PG_DYNSHMEM_DIR)) == NULL)
+       ereport(ERROR,
+               (errcode_for_file_access(),
+                errmsg("could not open directory \"%s\": %m",
+                   PG_DYNSHMEM_DIR)));
+
+   /* Scan for something with a name of the correct format. */
+   while ((dent = readdir(dir)) != NULL)
+   {
+       if (strncmp(dent->d_name, PG_DYNSHMEM_MMAP_FILE_PREFIX,
+               strlen(PG_DYNSHMEM_MMAP_FILE_PREFIX)) == 0)
+       {
+           char buf[MAXPGPATH];
+           snprintf(buf, MAXPGPATH, PG_DYNSHMEM_DIR "/%s", dent->d_name);
+
+           elog(DEBUG2, "removing file \"%s\"", buf);
+
+           /* We found a matching file; so remove it. */
+           if (unlink(buf) != 0)
+           {
+               int     save_errno;
+
+               save_errno = errno;
+               closedir(dir);
+               errno = save_errno;
+
+               ereport(ERROR,
+                       (errcode_for_file_access(),
+                        errmsg("could not remove file \"%s\": %m", buf)));
+           }
+       }
+   }
+
+   /* Cleanup complete. */
+   closedir(dir);
+}
+
+/*
+ * Read and parse the state file.
+ *
+ * If the state file is empty or the contents are garbled, it probably means
+ * that the operating system rebooted before the data written by the previous
+ * postmaster made it to disk.  In that case, we can just ignore it; any shared
+ * memory from before the reboot should be gone anyway.
+ */
+static bool
+dsm_read_state_file(dsm_handle *h)
+{
+   int         statefd;
+   char        statebuf[PG_DYNSHMEM_STATE_BUFSIZ];
+   int         nbytes = 0;
+   char       *endptr,
+              *s;
+   dsm_handle  handle;
+
+   /* Read the state file to get the ID of the old control segment. */
+   statefd = open(PG_DYNSHMEM_STATE_FILE, O_RDONLY | PG_BINARY, 0);
+   if (statefd < 0)
+   {
+       if (errno == ENOENT)
+           return false;
+       ereport(ERROR,
+               (errcode_for_file_access(),
+                errmsg("could not open file \"%s\": %m",
+                   PG_DYNSHMEM_STATE_FILE)));
+   }
+   nbytes = read(statefd, statebuf, PG_DYNSHMEM_STATE_BUFSIZ - 1);
+   if (nbytes < 0)
+       ereport(ERROR,
+               (errcode_for_file_access(),
+                errmsg("could not read file \"%s\": %m",
+                   PG_DYNSHMEM_STATE_FILE)));
+   /* make sure buffer is NUL terminated */
+   statebuf[nbytes] = '\0';
+   close(statefd);
+
+   /*
+    * We expect to find the handle of the old control segment here,
+    * on a line by itself.
+    */
+   handle = strtoul(statebuf, &endptr, 10);
+   for (s = endptr; *s == ' ' || *s == '\t'; ++s)
+       ;
+   if (*s != '\n' && *s != '\0')
+       return false;
+
+   /* Looks good. */
+   *h = handle;
+   return true;
+}
+
+/*
+ * Write our control segment handle to the state file, so that if the
+ * postmaster is killed without running it's on_shmem_exit hooks, the
+ * next postmaster can clean things up after restart.
+ */
+static void
+dsm_write_state_file(dsm_handle h)
+{
+   int         statefd;
+   char        statebuf[PG_DYNSHMEM_STATE_BUFSIZ];
+   int         nbytes;
+
+   /* Create or truncate the file. */
+   statefd = open(PG_DYNSHMEM_NEW_STATE_FILE,
+                  O_RDWR | O_CREAT | O_TRUNC | PG_BINARY, 0600);
+   if (statefd < 0)
+       ereport(ERROR,
+               (errcode_for_file_access(),
+                errmsg("could not create file \"%s\": %m",
+                   PG_DYNSHMEM_NEW_STATE_FILE)));
+
+   /* Write contents. */
+   snprintf(statebuf, PG_DYNSHMEM_STATE_BUFSIZ, "%u\n", dsm_control_handle);
+   nbytes = strlen(statebuf);
+   if (write(statefd, statebuf, nbytes) != nbytes)
+   {
+       if (errno == 0)
+           errno = ENOSPC;     /* if no error signalled, assume no space */
+       ereport(ERROR,
+               (errcode_for_file_access(),
+                errmsg("could not write file \"%s\": %m",
+                   PG_DYNSHMEM_NEW_STATE_FILE)));
+   }
+
+   /* Close file. */
+   close(statefd);
+
+   /*
+    * Atomically rename file into place, so that no one ever sees a partially
+    * written state file.
+    */
+   if (rename(PG_DYNSHMEM_NEW_STATE_FILE, PG_DYNSHMEM_STATE_FILE) < 0)
+       ereport(ERROR,
+               (errcode_for_file_access(),
+                errmsg("could not rename file \"%s\": %m",
+                   PG_DYNSHMEM_NEW_STATE_FILE)));
+}
+
+/*
+ * At shutdown time, we iterate over the control segment and remove all
+ * remaining dynamic shared memory segments.  We avoid throwing errors here;
+ * the postmaster is shutting down either way, and this is just non-critical
+ * resource cleanup.
+ */
+static void
+dsm_postmaster_shutdown(int code, Datum arg)
+{
+   uint32      nitems;
+   uint32      i;
+   void       *dsm_control_address;
+   void       *junk_mapped_address = NULL;
+   void       *junk_impl_private = NULL;
+   uint64      junk_mapped_size = 0;
+
+   /*
+    * If some other backend exited uncleanly, it might have corrupted the
+    * control segment while it was dying.  In that case, we warn and ignore
+    * the contents of the control segment.  This may end up leaving behind
+    * stray shared memory segments, but there's not much we can do about
+    * that if the metadata is gone.
+    */
+   nitems = dsm_control->nitems;
+   if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size))
+   {
+       ereport(LOG,
+               (errmsg("dynamic shared memory control segment is corrupt")));
+       return;
+   }
+
+   /* Remove any remaining segments. */
+   for (i = 0; i < nitems; ++i)
+   {
+       dsm_handle  handle;
+
+       /* If the reference count is 0, the slot is actually unused. */
+       if (dsm_control->item[i].refcnt == 0)
+           continue;
+
+       /* Log debugging information. */
+       handle = dsm_control->item[i].handle;
+       elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u",
+           handle);
+
+       /* Destroy the segment. */
+       dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private,
+                   &junk_mapped_address, &junk_mapped_size, LOG);
+   }
+
+   /* Remove the control segment itself. */
+   elog(DEBUG2,
+        "cleaning up dynamic shared memory control segment with ID %u",
+        dsm_control_handle);
+   dsm_control_address = dsm_control;
+   dsm_impl_op(DSM_OP_DESTROY, dsm_control_handle, 0,
+               &dsm_control_impl_private, &dsm_control_address,
+               &dsm_control_mapped_size, LOG);
+   dsm_control = dsm_control_address;
+
+   /* And, finally, remove the state file. */
+   if (unlink(PG_DYNSHMEM_STATE_FILE) < 0)
+       ereport(LOG,
+               (errcode_for_file_access(),
+                errmsg("could not unlink file \"%s\": %m",
+                   PG_DYNSHMEM_STATE_FILE)));
+}
+
+/*
+ * Prepare this backend for dynamic shared memory usage.  Under EXEC_BACKEND,
+ * we must reread the state file and map the control segment; in other cases,
+ * we'll have inherited the postmaster's mapping and global variables.
+ */
+static void
+dsm_backend_startup(void)
+{
+   /* If dynamic shared memory is disabled, reject this. */
+   if (dynamic_shared_memory_type == DSM_IMPL_NONE)
+       ereport(ERROR,
+               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                errmsg("dynamic shared memory is disabled"),
+                errhint("Set dynamic_shared_memory_type to a value other than \"none\".")));
+
+#ifdef EXEC_BACKEND
+   {
+       dsm_handle  control_handle;
+       void       *control_address = NULL;
+
+       /* Read the control segment information from the state file. */
+       if (!dsm_read_state_file(&control_handle))
+           ereport(ERROR,
+                   (errcode(ERRCODE_INTERNAL_ERROR),
+                    errmsg("could not parse dynamic shared memory state file")));
+
+       /* Attach control segment. */
+       dsm_impl_op(DSM_OP_ATTACH, control_handle, 0,
+                   &dsm_control_impl_private, &control_address,
+                   &dsm_control_mapped_size, ERROR);
+       dsm_control_handle = control_handle;
+       dsm_control = control_address;
+       /* If control segment doesn't look sane, something is badly wrong. */
+       if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size))
+       {
+           dsm_impl_op(DSM_OP_DETACH, control_handle, 0,
+                       &dsm_control_impl_private, &control_address,
+                       &dsm_control_mapped_size, WARNING);
+           ereport(FATAL,
+                   (errcode(ERRCODE_INTERNAL_ERROR),
+                    errmsg("dynamic shared memory control segment is not valid")));
+       }
+   }
+#endif
+
+   /* Arrange to detach segments on exit. */
+   on_shmem_exit(dsm_backend_shutdown, 0);
+
+   dsm_init_done = true;
+}
+
+/*
+ * Create a new dynamic shared memory segment.
+ */
+dsm_segment *
+dsm_create(uint64 size)
+{
+   dsm_segment    *seg = dsm_create_descriptor();
+   uint32          i;
+   uint32          nitems;
+
+   /* Unsafe in postmaster (and pointless in a stand-alone backend). */
+   Assert(IsUnderPostmaster);
+
+   if (!dsm_init_done)
+       dsm_backend_startup();
+
+   /* Loop until we find an unused segment identifier. */
+   for (;;)
+   {
+       Assert(seg->mapped_address == NULL && seg->mapped_size == 0);
+       seg->handle = random();
+       if (dsm_impl_op(DSM_OP_CREATE, seg->handle, size, &seg->impl_private,
+                       &seg->mapped_address, &seg->mapped_size, ERROR))
+           break;
+   }
+
+   /* Lock the control segment so we can register the new segment. */
+   LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
+
+   /* Search the control segment for an unused slot. */
+   nitems = dsm_control->nitems;
+   for (i = 0; i < nitems; ++i)
+   {
+       if (dsm_control->item[i].refcnt == 0)
+       {
+           dsm_control->item[i].handle = seg->handle;
+           /* refcnt of 1 triggers destruction, so start at 2 */
+           dsm_control->item[i].refcnt = 2;
+           seg->control_slot = i;
+           LWLockRelease(DynamicSharedMemoryControlLock);
+           return seg;
+       }
+   }
+
+   /* Verify that we can support an additional mapping. */
+   if (nitems >= dsm_control->maxitems)
+       ereport(ERROR,
+               (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
+                errmsg("too many dynamic shared memory segments")));
+
+   /* Enter the handle into a new array slot. */
+   dsm_control->item[nitems].handle = seg->handle;
+   /* refcnt of 1 triggers destruction, so start at 2 */
+   dsm_control->item[nitems].refcnt = 2;
+   seg->control_slot = nitems;
+   dsm_control->nitems++;
+   LWLockRelease(DynamicSharedMemoryControlLock);
+
+   return seg;
+}
+
+/*
+ * Attach a dynamic shared memory segment.
+ *
+ * See comments for dsm_segment_handle() for an explanation of how this
+ * is intended to be used.
+ *
+ * This function will return NULL if the segment isn't known to the system.
+ * This can happen if we're asked to attach the segment, but then everyone
+ * else detaches it (causing it to be destroyed) before we get around to
+ * attaching it.
+ */
+dsm_segment *
+dsm_attach(dsm_handle h)
+{
+   dsm_segment    *seg;
+   dlist_iter      iter;
+   uint32          i;
+   uint32          nitems;
+
+   /* Unsafe in postmaster (and pointless in a stand-alone backend). */
+   Assert(IsUnderPostmaster);
+
+   if (!dsm_init_done)
+       dsm_backend_startup();
+
+   /*
+    * Since this is just a debugging cross-check, we could leave it out
+    * altogether, or include it only in assert-enabled builds.  But since
+    * the list of attached segments should normally be very short, let's
+    * include it always for right now.
+    *
+    * If you're hitting this error, you probably want to attempt to
+    * find an existing mapping via dsm_find_mapping() before calling
+    * dsm_attach() to create a new one.
+    */
+   dlist_foreach(iter, &dsm_segment_list)
+   {
+       seg = dlist_container(dsm_segment, node, iter.cur);
+       if (seg->handle == h)
+           elog(ERROR, "can't attach the same segment more than once");
+   }
+
+   /* Create a new segment descriptor. */
+   seg = dsm_create_descriptor();
+   seg->handle = h;
+
+   /* Bump reference count for this segment in shared memory. */
+   LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
+   nitems = dsm_control->nitems;
+   for (i = 0; i < nitems; ++i)
+   {
+       /* If the reference count is 0, the slot is actually unused. */
+       if (dsm_control->item[i].refcnt == 0)
+           continue;
+
+       /*
+        * If the reference count is 1, the slot is still in use, but the
+        * segment is in the process of going away.  Treat that as if we
+        * didn't find a match.
+        */
+       if (dsm_control->item[i].refcnt == 1)
+           break;
+
+       /* Otherwise, if the descriptor matches, we've found a match. */
+       if (dsm_control->item[i].handle == seg->handle)
+       {
+           dsm_control->item[i].refcnt++;
+           seg->control_slot = i;
+           break;
+       }
+   }
+   LWLockRelease(DynamicSharedMemoryControlLock);
+
+   /*
+    * If we didn't find the handle we're looking for in the control
+    * segment, it probably means that everyone else who had it mapped,
+    * including the original creator, died before we got to this point.
+    * It's up to the caller to decide what to do about that.
+    */
+   if (seg->control_slot == INVALID_CONTROL_SLOT)
+   {
+       dsm_detach(seg);
+       return NULL;
+   }
+
+   /* Here's where we actually try to map the segment. */
+   dsm_impl_op(DSM_OP_ATTACH, seg->handle, 0, &seg->impl_private,
+               &seg->mapped_address, &seg->mapped_size, ERROR);
+
+   return seg;
+}
+
+/*
+ * At backend shutdown time, detach any segments that are still attached.
+ */
+static void
+dsm_backend_shutdown(int code, Datum arg)
+{
+   while (!dlist_is_empty(&dsm_segment_list))
+   {
+       dsm_segment    *seg;
+
+       seg = dlist_head_element(dsm_segment, node, &dsm_segment_list);
+       dsm_detach(seg);
+   }
+}
+
+/*
+ * Resize an existing shared memory segment.
+ *
+ * This may cause the shared memory segment to be remapped at a different
+ * address.  For the caller's convenience, we return the mapped address.
+ */
+void *
+dsm_resize(dsm_segment *seg, uint64 size)
+{
+   Assert(seg->control_slot != INVALID_CONTROL_SLOT);
+   dsm_impl_op(DSM_OP_RESIZE, seg->handle, size, &seg->impl_private,
+               &seg->mapped_address, &seg->mapped_size, ERROR);
+   return seg->mapped_address;
+}
+
+/*
+ * Remap an existing shared memory segment.
+ *
+ * This is intended to be used when some other process has extended the
+ * mapping using dsm_resize(), but we've still only got the initial
+ * portion mapped.  Since this might change the address at which the
+ * segment is mapped, we return the new mapped address.
+ */
+void *
+dsm_remap(dsm_segment *seg)
+{
+   dsm_impl_op(DSM_OP_ATTACH, seg->handle, 0, &seg->impl_private,
+               &seg->mapped_address, &seg->mapped_size, ERROR);
+
+   return seg->mapped_address;
+}
+
+/*
+ * Detach from a shared memory segment, destroying the segment if we
+ * remove the last reference.
+ *
+ * This function should never fail.  It will often be invoked when aborting
+ * a transaction, and a further error won't serve any purpose.  It's not a
+ * complete disaster if we fail to unmap or destroy the segment; it means a
+ * resource leak, but that doesn't necessarily preclude further operations.
+ */
+void
+dsm_detach(dsm_segment *seg)
+{
+   /*
+    * Try to remove the mapping, if one exists.  Normally, there will be,
+    * but maybe not, if we failed partway through a create or attach
+    * operation.  We remove the mapping before decrementing the reference
+    * count so that the process that sees a zero reference count can be
+    * certain that no remaining mappings exist.  Even if this fails, we
+    * pretend that it works, because retrying is likely to fail in the
+    * same way.
+    */
+   if (seg->mapped_address != NULL)
+   {
+       dsm_impl_op(DSM_OP_DETACH, seg->handle, 0, &seg->impl_private,
+                   &seg->mapped_address, &seg->mapped_size, WARNING);
+       seg->impl_private = NULL;
+       seg->mapped_address = NULL;
+       seg->mapped_size = 0;
+   }
+
+   /* Reduce reference count, if we previously increased it. */
+   if (seg->control_slot != INVALID_CONTROL_SLOT)
+   {
+       uint32  refcnt;
+       uint32  control_slot = seg->control_slot;
+
+       LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
+       Assert(dsm_control->item[control_slot].handle == seg->handle);
+       Assert(dsm_control->item[control_slot].refcnt > 1);
+       refcnt = --dsm_control->item[control_slot].refcnt;
+       seg->control_slot = INVALID_CONTROL_SLOT;
+       LWLockRelease(DynamicSharedMemoryControlLock);
+
+       /* If new reference count is 1, try to destroy the segment. */
+       if (refcnt == 1)
+       {
+           /*
+            * If we fail to destroy the segment here, or are killed before
+            * we finish doing so, the reference count will remain at 1, which
+            * will mean that nobody else can attach to the segment.  At
+            * postmaster shutdown time, or when a new postmaster is started
+            * after a hard kill, another attempt will be made to remove the
+            * segment.
+            *
+            * The main case we're worried about here is being killed by
+            * a signal before we can finish removing the segment.  In that
+            * case, it's important to be sure that the segment still gets
+            * removed. If we actually fail to remove the segment for some
+            * other reason, the postmaster may not have any better luck than
+            * we did.  There's not much we can do about that, though.
+            */
+           if (dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private,
+                           &seg->mapped_address, &seg->mapped_size, WARNING))
+           {
+               LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE);
+               Assert(dsm_control->item[control_slot].handle == seg->handle);
+               Assert(dsm_control->item[control_slot].refcnt == 1);
+               dsm_control->item[control_slot].refcnt = 0;
+               LWLockRelease(DynamicSharedMemoryControlLock);
+           }
+       }
+   }
+
+   /* Clean up our remaining backend-private data structures. */
+   if (seg->resowner != NULL)
+       ResourceOwnerForgetDSM(seg->resowner, seg);
+   dlist_delete(&seg->node);
+   pfree(seg);
+}
+
+/*
+ * Keep a dynamic shared memory mapping until end of session.
+ *
+ * By default, mappings are owned by the current resource owner, which
+ * typically means they stick around for the duration of the current query
+ * only.
+ */
+void
+dsm_keep_mapping(dsm_segment *seg)
+{
+   if (seg->resowner != NULL)
+   {
+       ResourceOwnerForgetDSM(seg->resowner, seg);
+       seg->resowner = NULL;
+   }
+}
+
+/*
+ * Find an existing mapping for a shared memory segment, if there is one.
+ */
+dsm_segment *
+dsm_find_mapping(dsm_handle h)
+{
+   dlist_iter      iter;
+   dsm_segment    *seg;
+
+   dlist_foreach(iter, &dsm_segment_list)
+   {
+       seg = dlist_container(dsm_segment, node, iter.cur);
+       if (seg->handle == h)
+           return seg;
+   }
+
+   return NULL;
+}
+
+/*
+ * Get the address at which a dynamic shared memory segment is mapped.
+ */
+void *
+dsm_segment_address(dsm_segment *seg)
+{
+   Assert(seg->mapped_address != NULL);
+   return seg->mapped_address;
+}
+
+/*
+ * Get the size of a mapping.
+ */
+uint64
+dsm_segment_map_length(dsm_segment *seg)
+{
+   Assert(seg->mapped_address != NULL);
+   return seg->mapped_size;
+}
+
+/*
+ * Get a handle for a mapping.
+ *
+ * To establish communication via dynamic shared memory between two backends,
+ * one of them should first call dsm_create() to establish a new shared
+ * memory mapping.  That process should then call dsm_segment_handle() to
+ * obtain a handle for the mapping, and pass that handle to the
+ * coordinating backend via some means (e.g. bgw_main_arg, or via the
+ * main shared memory segment).  The recipient, once in position of the
+ * handle, should call dsm_attach().
+ */
+dsm_handle
+dsm_segment_handle(dsm_segment *seg)
+{
+   return seg->handle;
+}
+
+/*
+ * Create a segment descriptor.
+ */
+static dsm_segment *
+dsm_create_descriptor(void)
+{
+   dsm_segment    *seg;
+
+   ResourceOwnerEnlargeDSMs(CurrentResourceOwner);
+
+   seg = MemoryContextAlloc(TopMemoryContext, sizeof(dsm_segment));
+   dlist_push_head(&dsm_segment_list, &seg->node);
+
+   /* seg->handle must be initialized by the caller */
+   seg->control_slot = INVALID_CONTROL_SLOT;
+   seg->impl_private = NULL;
+   seg->mapped_address = NULL;
+   seg->mapped_size = 0;
+
+   seg->resowner = CurrentResourceOwner;
+   ResourceOwnerRememberDSM(CurrentResourceOwner, seg);
+
+   return seg;
+}
+
+/*
+ * Sanity check a control segment.
+ *
+ * The goal here isn't to detect everything that could possibly be wrong with
+ * the control segment; there's not enough information for that.  Rather, the
+ * goal is to make sure that someone can iterate over the items in the segment
+ * without overrunning the end of the mapping and crashing.  We also check
+ * the magic number since, if that's messed up, this may not even be one of
+ * our segments at all.
+ */
+static bool
+dsm_control_segment_sane(dsm_control_header *control, uint64 mapped_size)
+{
+   if (mapped_size < offsetof(dsm_control_header, item))
+       return false;           /* Mapped size too short to read header. */
+   if (control->magic != PG_DYNSHMEM_CONTROL_MAGIC)
+       return false;           /* Magic number doesn't match. */
+   if (dsm_control_bytes_needed(control->maxitems) > mapped_size)
+       return false;           /* Max item count won't fit in map. */
+   if (control->nitems > control->maxitems)
+       return false;           /* Overfull. */
+   return true;
+}
+
+/*
+ * Compute the number of control-segment bytes needed to store a given
+ * number of items.
+ */
+static uint64
+dsm_control_bytes_needed(uint32 nitems)
+{
+   return offsetof(dsm_control_header, item)
+       + sizeof(dsm_control_item) * (uint64) nitems;
+}
diff --git a/src/backend/storage/ipc/dsm_impl.c b/src/backend/storage/ipc/dsm_impl.c
new file mode 100644 (file)
index 0000000..f929f02
--- /dev/null
@@ -0,0 +1,990 @@
+/*-------------------------------------------------------------------------
+ *
+ * dsm_impl.c
+ *   manage dynamic shared memory segments
+ *
+ * This file provides low-level APIs for creating and destroying shared
+ * memory segments using several different possible techniques.  We refer
+ * to these segments as dynamic because they can be created, altered, and
+ * destroyed at any point during the server life cycle.  This is unlike
+ * the main shared memory segment, of which there is always exactly one
+ * and which is always mapped at a fixed address in every PostgreSQL
+ * background process.
+ *
+ * Because not all systems provide the same primitives in this area, nor
+ * do all primitives behave the same way on all systems, we provide
+ * several implementations of this facility.  Many systems implement
+ * POSIX shared memory (shm_open etc.), which is well-suited to our needs
+ * in this area, with the exception that shared memory identifiers live
+ * in a flat system-wide namespace, raising the uncomfortable prospect of
+ * name collisions with other processes (including other copies of
+ * PostgreSQL) running on the same system.  Some systems only support
+ * the older System V shared memory interface (shmget etc.) which is
+ * also usable; however, the default allocation limits are often quite
+ * small, and the namespace is even more restricted.
+ *
+ * We also provide an mmap-based shared memory implementation.  This may
+ * be useful on systems that provide shared memory via a special-purpose
+ * filesystem; by opting for this implementation, the user can even
+ * control precisely where their shared memory segments are placed.  It
+ * can also be used as a fallback for systems where shm_open and shmget
+ * are not available or can't be used for some reason.  Of course,
+ * mapping a file residing on an actual spinning disk is a fairly poor
+ * approximation for shared memory because writeback may hurt performance
+ * substantially, but there should be few systems where we must make do
+ * with such poor tools.
+ *
+ * As ever, Windows requires its own implemetation.
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *   src/backend/storage/ipc/dsm.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#ifndef WIN32
+#include <sys/mman.h>
+#endif
+#include <sys/stat.h>
+#ifdef HAVE_SYS_IPC_H
+#include <sys/ipc.h>
+#endif
+#ifdef HAVE_SYS_SHM_H
+#include <sys/shm.h>
+#endif
+
+#include "portability/mem.h"
+#include "storage/dsm_impl.h"
+#include "storage/fd.h"
+#include "utils/guc.h"
+#include "utils/memutils.h"
+
+#ifdef USE_DSM_POSIX
+static bool dsm_impl_posix(dsm_op op, dsm_handle handle, uint64 request_size,
+              void **impl_private, void **mapped_address,
+              uint64 *mapped_size, int elevel);
+#endif
+#ifdef USE_DSM_SYSV
+static bool dsm_impl_sysv(dsm_op op, dsm_handle handle, uint64 request_size,
+              void **impl_private, void **mapped_address,
+              uint64 *mapped_size, int elevel);
+#endif
+#ifdef USE_DSM_WINDOWS
+static bool dsm_impl_windows(dsm_op op, dsm_handle handle, uint64 request_size,
+             void **impl_private, void **mapped_address,
+             uint64 *mapped_size, int elevel);
+#endif
+#ifdef USE_DSM_MMAP
+static bool dsm_impl_mmap(dsm_op op, dsm_handle handle, uint64 request_size,
+             void **impl_private, void **mapped_address,
+             uint64 *mapped_size, int elevel);
+#endif
+static int errcode_for_dynamic_shared_memory(void);
+
+const struct config_enum_entry dynamic_shared_memory_options[] = {
+#ifdef USE_DSM_POSIX
+   { "posix", DSM_IMPL_POSIX, false},
+#endif
+#ifdef USE_DSM_SYSV
+   { "sysv", DSM_IMPL_SYSV, false},
+#endif
+#ifdef USE_DSM_WINDOWS
+   { "windows", DSM_IMPL_WINDOWS, false},
+#endif
+#ifdef USE_DSM_MMAP
+   { "mmap", DSM_IMPL_MMAP, false},
+#endif
+   { "none", DSM_IMPL_NONE, false},
+   {NULL, 0, false}
+};
+
+/* Implementation selector. */
+int dynamic_shared_memory_type;
+
+/* Size of buffer to be used for zero-filling. */
+#define ZBUFFER_SIZE               8192
+
+/*------
+ * Perform a low-level shared memory operation in a platform-specific way,
+ * as dictated by the selected implementation.  Each implementation is
+ * required to implement the following primitives.
+ *
+ * DSM_OP_CREATE.  Create a segment whose size is the request_size and
+ * map it.
+ *
+ * DSM_OP_ATTACH.  Map the segment, whose size must be the request_size.
+ * The segment may already be mapped; any existing mapping should be removed
+ * before creating a new one.
+ *
+ * DSM_OP_DETACH.  Unmap the segment.
+ *
+ * DSM_OP_RESIZE.  Resize the segment to the given request_size and
+ * remap the segment at that new size.
+ *
+ * DSM_OP_DESTROY.  Unmap the segment, if it is mapped.  Destroy the
+ * segment.
+ *
+ * Arguments:
+ *   op: The operation to be performed.
+ *   handle: The handle of an existing object, or for DSM_OP_CREATE, the
+ *     a new handle the caller wants created.
+ *   request_size: For DSM_OP_CREATE, the requested size.  For DSM_OP_RESIZE,
+ *     the new size.  Otherwise, 0.
+ *   impl_private: Private, implementation-specific data.  Will be a pointer
+ *     to NULL for the first operation on a shared memory segment within this
+ *     backend; thereafter, it will point to the value to which it was set
+ *     on the previous call.
+ *   mapped_address: Pointer to start of current mapping; pointer to NULL
+ *     if none.  Updated with new mapping address.
+ *   mapped_size: Pointer to size of current mapping; pointer to 0 if none.
+ *     Updated with new mapped size.
+ *   elevel: Level at which to log errors.
+ *
+ * Return value: true on success, false on failure.  When false is returned,
+ * a message should first be logged at the specified elevel, except in the
+ * case where DSM_OP_CREATE experiences a name collision, which should
+ * silently return false.
+ *-----
+ */
+bool
+dsm_impl_op(dsm_op op, dsm_handle handle, uint64 request_size,
+           void **impl_private, void **mapped_address, uint64 *mapped_size,
+           int elevel)
+{
+   Assert(op == DSM_OP_CREATE || op == DSM_OP_RESIZE || request_size == 0);
+   Assert((op != DSM_OP_CREATE && op != DSM_OP_ATTACH) ||
+           (*mapped_address == NULL && *mapped_size == 0));
+
+   if (request_size > (size_t) -1)
+       ereport(ERROR,
+               (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                errmsg("requested shared memory size overflows size_t")));
+
+   switch (dynamic_shared_memory_type)
+   {
+#ifdef USE_DSM_POSIX
+       case DSM_IMPL_POSIX:
+           return dsm_impl_posix(op, handle, request_size, impl_private,
+                                 mapped_address, mapped_size, elevel);
+#endif
+#ifdef USE_DSM_SYSV
+       case DSM_IMPL_SYSV:
+           return dsm_impl_sysv(op, handle, request_size, impl_private,
+                                mapped_address, mapped_size, elevel);
+#endif
+#ifdef USE_DSM_WINDOWS
+       case DSM_IMPL_WINDOWS:
+           return dsm_impl_windows(op, handle, request_size, impl_private,
+                                   mapped_address, mapped_size, elevel);
+#endif
+#ifdef USE_DSM_MMAP
+       case DSM_IMPL_MMAP:
+           return dsm_impl_mmap(op, handle, request_size, impl_private,
+                                mapped_address, mapped_size, elevel);
+#endif
+   }
+   elog(ERROR, "unexpected dynamic shared memory type: %d",
+        dynamic_shared_memory_type);
+}
+
+/*
+ * Does the current dynamic shared memory implementation support resizing
+ * segments?  (The answer here could be platform-dependent in the future,
+ * since AIX allows shmctl(shmid, SHM_RESIZE, &buffer), though you apparently
+ * can't resize segments to anything larger than 256MB that way.  For now,
+ * we keep it simple.)
+ */
+bool
+dsm_impl_can_resize(void)
+{
+   switch (dynamic_shared_memory_type)
+   {
+       case DSM_IMPL_NONE:
+           return false;
+       case DSM_IMPL_POSIX:
+           return true;
+       case DSM_IMPL_SYSV:
+           return false;
+       case DSM_IMPL_WINDOWS:
+           return false;
+       case DSM_IMPL_MMAP:
+           return false;
+       default:
+           return false;       /* should not happen */
+   }
+}
+
+#ifdef USE_DSM_POSIX
+/*
+ * Operating system primitives to support POSIX shared memory.
+ *
+ * POSIX shared memory segments are created and attached using shm_open()
+ * and shm_unlink(); other operations, such as sizing or mapping the
+ * segment, are performed as if the shared memory segments were files.
+ *
+ * Indeed, on some platforms, they may be implemented that way.  While
+ * POSIX shared memory segments seem intended to exist in a flat namespace,
+ * some operating systems may implement them as files, even going so far
+ * to treat a request for /xyz as a request to create a file by that name
+ * in the root directory.  Users of such broken platforms should select
+ * a different shared memory implementation.
+ */
+static bool
+dsm_impl_posix(dsm_op op, dsm_handle handle, uint64 request_size,
+              void **impl_private, void **mapped_address, uint64 *mapped_size,
+              int elevel)
+{
+   char    name[64];
+   int     flags;
+   int     fd;
+   char   *address;
+
+   snprintf(name, 64, "/PostgreSQL.%u", handle);
+
+   /* Handle teardown cases. */
+   if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
+   {
+       if (*mapped_address != NULL
+           && munmap(*mapped_address, *mapped_size) != 0)
+       {
+           ereport(elevel,
+                   (errcode_for_dynamic_shared_memory(),
+                    errmsg("could not unmap shared memory segment \"%s\": %m",
+                       name)));
+           return false;
+       }
+       *mapped_address = NULL;
+       *mapped_size = 0;
+       if (op == DSM_OP_DESTROY && shm_unlink(name) != 0)
+       {
+           ereport(elevel,
+                   (errcode_for_dynamic_shared_memory(),
+                    errmsg("could not remove shared memory segment \"%s\": %m",
+                       name)));
+           return false;
+       }
+       return true;
+   }
+
+   /*
+    * Create new segment or open an existing one for attach or resize.
+    *
+    * Even though we're not going through fd.c, we should be safe against
+    * running out of file descriptors, because of NUM_RESERVED_FDS.  We're
+    * only opening one extra descriptor here, and we'll close it before
+    * returning.
+    */
+   flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
+   if ((fd = shm_open(name, flags, 0600)) == -1)
+   {
+       if (errno != EEXIST)
+           ereport(elevel,
+                   (errcode_for_dynamic_shared_memory(),
+                    errmsg("could not open shared memory segment \"%s\": %m",
+                       name)));
+       return false;
+   }
+
+   /*
+    * If we're attaching the segment, determine the current size; if we are
+    * creating or resizing the segment, set the size to the requested value.
+    */
+   if (op == DSM_OP_ATTACH)
+   {
+       struct stat st;
+
+       if (fstat(fd, &st) != 0)
+       {
+           int     save_errno;
+
+           /* Back out what's already been done. */
+           save_errno = errno;
+           close(fd);
+           errno = save_errno;
+
+           ereport(elevel,
+                   (errcode_for_dynamic_shared_memory(),
+                    errmsg("could not stat shared memory segment \"%s\": %m",
+                       name)));
+           return false;
+       }
+       request_size = st.st_size;
+   }
+   else if (*mapped_size != request_size && ftruncate(fd, request_size))
+   {
+       int     save_errno;
+
+       /* Back out what's already been done. */
+       save_errno = errno;
+       close(fd);
+       if (op == DSM_OP_CREATE)
+           shm_unlink(name);
+       errno = save_errno;
+
+       ereport(elevel,
+               (errcode_for_dynamic_shared_memory(),
+                errmsg("could not resize shared memory segment %s to " UINT64_FORMAT " bytes: %m",
+                   name, request_size)));
+       return false;
+   }
+
+   /*
+    * If we're reattaching or resizing, we must remove any existing mapping,
+    * unless we've already got the right thing mapped.
+    */
+   if (*mapped_address != NULL)
+   {
+       if (*mapped_size == request_size)
+           return true;
+       if (munmap(*mapped_address, *mapped_size) != 0)
+       {
+           int     save_errno;
+
+           /* Back out what's already been done. */
+           save_errno = errno;
+           close(fd);
+           if (op == DSM_OP_CREATE)
+               shm_unlink(name);
+           errno = save_errno;
+
+           ereport(elevel,
+                   (errcode_for_dynamic_shared_memory(),
+                    errmsg("could not unmap shared memory segment \"%s\": %m",
+                       name)));
+           return false;
+       }
+       *mapped_address = NULL;
+       *mapped_size = 0;
+   }
+
+   /* Map it. */
+   address = mmap(NULL, request_size, PROT_READ|PROT_WRITE,
+                  MAP_SHARED|MAP_HASSEMAPHORE, fd, 0);
+   if (address == MAP_FAILED)
+   {
+       int     save_errno;
+
+       /* Back out what's already been done. */
+       save_errno = errno;
+       close(fd);
+       if (op == DSM_OP_CREATE)
+           shm_unlink(name);
+       errno = save_errno;
+
+       ereport(elevel,
+               (errcode_for_dynamic_shared_memory(),
+                errmsg("could not map shared memory segment \"%s\": %m",
+                       name)));
+       return false;
+   }
+   *mapped_address = address;
+   *mapped_size = request_size;
+   close(fd);
+
+   return true;
+}
+#endif
+
+#ifdef USE_DSM_SYSV
+/*
+ * Operating system primitives to support System V shared memory.
+ *
+ * System V shared memory segments are manipulated using shmget(), shmat(),
+ * shmdt(), and shmctl().  There's no portable way to resize such
+ * segments.  As the default allocation limits for System V shared memory
+ * are usually quite low, the POSIX facilities may be preferable; but
+ * those are not supported everywhere.
+ */
+static bool
+dsm_impl_sysv(dsm_op op, dsm_handle handle, uint64 request_size,
+             void **impl_private, void **mapped_address, uint64 *mapped_size,
+             int elevel)
+{
+   key_t   key;
+   int     ident;
+   char   *address;
+   char    name[64];
+   int    *ident_cache;
+
+   /* Resize is not supported for System V shared memory. */
+   if (op == DSM_OP_RESIZE)
+   {
+       elog(elevel, "System V shared memory segments cannot be resized");
+       return false;
+   }
+
+   /* Since resize isn't supported, reattach is a no-op. */
+   if (op == DSM_OP_ATTACH && *mapped_address != NULL)
+       return true;
+
+   /*
+    * POSIX shared memory and mmap-based shared memory identify segments
+    * with names.  To avoid needless error message variation, we use the
+    * handle as the name.
+    */
+   snprintf(name, 64, "%u", handle);
+
+   /*
+    * The System V shared memory namespace is very restricted; names are
+    * of type key_t, which is expected to be some sort of integer data type,
+    * but not necessarily the same one as dsm_handle.  Since we use
+    * dsm_handle to identify shared memory segments across processes, this
+    * might seem like a problem, but it's really not.  If dsm_handle is
+    * bigger than key_t, the cast below might truncate away some bits from
+    * the handle the user-provided, but it'll truncate exactly the same bits
+    * away in exactly the same fashion every time we use that handle, which
+    * is all that really matters.  Conversely, if dsm_handle is smaller than
+    * key_t, we won't use the full range of available key space, but that's
+    * no big deal either.
+    *
+    * We do make sure that the key isn't negative, because that might not
+    * be portable.
+    */
+   key = (key_t) handle;
+   if (key < 1)        /* avoid compiler warning if type is unsigned */
+       key = -key;
+
+   /*
+    * There's one special key, IPC_PRIVATE, which can't be used.  If we end
+    * up with that value by chance during a create operation, just pretend
+    * it already exists, so that caller will retry.  If we run into it
+    * anywhere else, the caller has passed a handle that doesn't correspond
+    * to anything we ever created, which should not happen.
+    */
+   if (key == IPC_PRIVATE)
+   {
+       if (op != DSM_OP_CREATE)
+           elog(DEBUG4, "System V shared memory key may not be IPC_PRIVATE");
+       errno = EEXIST;
+       return false;
+   }
+
+   /*
+    * Before we can do anything with a shared memory segment, we have to
+    * map the shared memory key to a shared memory identifier using shmget().
+    * To avoid repeated lookups, we store the key using impl_private.
+    */
+   if (*impl_private != NULL)
+   {
+       ident_cache = *impl_private;
+       ident = *ident_cache;
+   }
+   else
+   {
+       int     flags = IPCProtection;
+       size_t  segsize;
+
+       /*
+        * Allocate the memory BEFORE acquiring the resource, so that we don't
+        * leak the resource if memory allocation fails.
+        */
+       ident_cache = MemoryContextAlloc(TopMemoryContext, sizeof(int));
+
+       /*
+        * When using shmget to find an existing segment, we must pass the
+        * size as 0.  Passing a non-zero size which is greater than the
+        * actual size will result in EINVAL.
+        */
+       segsize = 0;
+
+       if (op == DSM_OP_CREATE)
+       {
+           flags |= IPC_CREAT | IPC_EXCL;
+           segsize = request_size;
+       }
+
+       if ((ident = shmget(key, segsize, flags)) == -1)
+       {
+           if (errno != EEXIST)
+           {
+               int     save_errno = errno;
+               pfree(ident_cache);
+               errno = save_errno;
+               ereport(elevel,
+                       (errcode_for_dynamic_shared_memory(),
+                        errmsg("could not get shared memory segment: %m")));
+           }
+           return false;
+       }
+
+       *ident_cache = ident;
+       *impl_private = ident_cache;
+   }
+
+   /* Handle teardown cases. */
+   if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
+   {
+       pfree(ident_cache);
+       *impl_private = NULL;
+       if (*mapped_address != NULL && shmdt(*mapped_address) != 0)
+       {
+           ereport(elevel,
+                   (errcode_for_dynamic_shared_memory(),
+                    errmsg("could not unmap shared memory segment \"%s\": %m",
+                       name)));
+           return false;
+       }
+       *mapped_address = NULL;
+       *mapped_size = 0;
+       if (op == DSM_OP_DESTROY && shmctl(ident, IPC_RMID, NULL) < 0)
+       {
+           ereport(elevel,
+                   (errcode_for_dynamic_shared_memory(),
+                    errmsg("could not remove shared memory segment \"%s\": %m",
+                       name)));
+           return false;
+       }
+       return true;
+   }
+
+   /* If we're attaching it, we must use IPC_STAT to determine the size. */
+   if (op == DSM_OP_ATTACH)
+   {
+       struct shmid_ds shm;
+
+       if (shmctl(ident, IPC_STAT, &shm) != 0)
+       {
+           int     save_errno;
+
+           /* Back out what's already been done. */
+           save_errno = errno;
+           if (op == DSM_OP_CREATE)
+               shmctl(ident, IPC_RMID, NULL);
+           errno = save_errno;
+
+           ereport(elevel,
+                   (errcode_for_dynamic_shared_memory(),
+                    errmsg("could not stat shared memory segment \"%s\": %m",
+                       name)));
+           return false;
+       }
+       request_size = shm.shm_segsz;
+   }
+
+   /* Map it. */
+   address = shmat(ident, NULL, PG_SHMAT_FLAGS);
+   if (address == (void *) -1)
+   {
+       int     save_errno;
+
+       /* Back out what's already been done. */
+       save_errno = errno;
+       if (op == DSM_OP_CREATE)
+           shmctl(ident, IPC_RMID, NULL);
+       errno = save_errno;
+
+       ereport(elevel,
+               (errcode_for_dynamic_shared_memory(),
+                errmsg("could not map shared memory segment \"%s\": %m",
+                       name)));
+       return false;
+   }
+   *mapped_address = address;
+   *mapped_size = request_size;
+
+   return true;
+}
+#endif
+
+#ifdef USE_DSM_WINDOWS
+/*
+ * Operating system primitives to support Windows shared memory.
+ *
+ * Windows shared memory implementation is done using file mapping
+ * which can be backed by either physical file or system paging file.
+ * Current implementation uses system paging file as other effects
+ * like performance are not clear for physical file and it is used in similar
+ * way for main shared memory in windows.
+ *
+ * A memory mapping object is a kernel object - they always get deleted when
+ * the last reference to them goes away, either explicitly via a CloseHandle or
+ * when the process containing the reference exits.
+ */
+static bool
+dsm_impl_windows(dsm_op op, dsm_handle handle, uint64 request_size,
+                void **impl_private, void **mapped_address,
+                uint64 *mapped_size, int elevel)
+{
+   char   *address;
+   HANDLE      hmap;
+   char    name[64];
+   MEMORY_BASIC_INFORMATION info;
+
+   /* Resize is not supported for Windows shared memory. */
+   if (op == DSM_OP_RESIZE)
+   {
+       elog(elevel, "Windows shared memory segments cannot be resized");
+       return false;
+   }
+
+   /* Since resize isn't supported, reattach is a no-op. */
+   if (op == DSM_OP_ATTACH && *mapped_address != NULL)
+       return true;
+
+   /*
+    * Storing the shared memory segment in the Global\ namespace, can
+    * allow any process running in any session to access that file
+    * mapping object provided that the caller has the required access rights.
+    * But to avoid issues faced in main shared memory, we are using the naming
+    * convention similar to main shared memory. We can change here once
+    * issue mentioned in GetSharedMemName is resolved.
+    */
+   snprintf(name, 64, "Global/PostgreSQL.%u", handle);
+
+   /*
+    * Handle teardown cases.  Since Windows automatically destroys the object
+    * when no references reamin, we can treat it the same as detach.
+    */
+   if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
+   {
+       if (*mapped_address != NULL
+           && UnmapViewOfFile(*mapped_address) == 0)
+       {
+           _dosmaperr(GetLastError());
+           ereport(elevel,
+                   (errcode_for_dynamic_shared_memory(),
+                    errmsg("could not unmap shared memory segment \"%s\": %m",
+                       name)));
+           return false;
+       }
+       if (*impl_private != NULL
+           && CloseHandle(*impl_private) == 0)
+       {
+           _dosmaperr(GetLastError());
+           ereport(elevel,
+                   (errcode_for_dynamic_shared_memory(),
+                    errmsg("could not remove shared memory segment \"%s\": %m",
+                       name)));
+           return false;
+       }
+
+       *impl_private = NULL;
+       *mapped_address = NULL;
+       *mapped_size = 0;
+       return true;
+   }
+
+   /* Create new segment or open an existing one for attach. */
+   if (op == DSM_OP_CREATE)
+   {
+       DWORD       size_high = (DWORD) (request_size >> 32);
+       DWORD       size_low = (DWORD) request_size;
+       hmap = CreateFileMapping(INVALID_HANDLE_VALUE,  /* Use the pagefile */
+                                NULL,          /* Default security attrs */
+                                PAGE_READWRITE,    /* Memory is read/write */
+                                size_high,     /* Upper 32 bits of size */
+                                size_low,      /* Lower 32 bits of size */
+                                name);
+       _dosmaperr(GetLastError());
+       if (errno == EEXIST)
+       {
+           /*
+            * On Windows, when the segment already exists, a handle for the
+            * existing segment is returned.  We must close it before
+            * returning.  We don't do _dosmaperr here, so errno won't be
+            * modified.
+            */
+           CloseHandle(hmap);
+           ereport(elevel,
+                   (errcode_for_dynamic_shared_memory(),
+                    errmsg("could not open shared memory segment \"%s\": %m",
+                       name)));
+           return false;
+       }
+   }
+   else
+   {
+       hmap = OpenFileMapping(FILE_MAP_WRITE | FILE_MAP_READ,
+                              FALSE,       /* do not inherit the name */
+                              name);       /* name of mapping object */
+       _dosmaperr(GetLastError());
+   }
+
+   if (!hmap)
+   {
+       ereport(elevel,
+               (errcode_for_dynamic_shared_memory(),
+                errmsg("could not open shared memory segment \"%s\": %m",
+                   name)));
+       return false;
+   }
+
+   /* Map it. */
+   address = MapViewOfFile(hmap, FILE_MAP_WRITE | FILE_MAP_READ,
+                           0, 0, 0);
+   if (!address)
+   {
+       int     save_errno;
+
+       _dosmaperr(GetLastError());
+       /* Back out what's already been done. */
+       save_errno = errno;
+       CloseHandle(hmap);
+       errno = save_errno;
+
+       ereport(elevel,
+               (errcode_for_dynamic_shared_memory(),
+                errmsg("could not map shared memory segment \"%s\": %m",
+                       name)));
+       return false;
+   }
+
+   /*
+    * VirtualQuery gives size in page_size units, which is 4K for Windows.
+    * We need size only when we are attaching, but it's better to get the
+    * size when creating new segment to keep size consistent both for
+    * DSM_OP_CREATE and DSM_OP_ATTACH.
+    */
+   if (VirtualQuery(address, &info, sizeof(info)) == 0)
+   {
+       int     save_errno;
+
+       _dosmaperr(GetLastError());
+       /* Back out what's already been done. */
+       save_errno = errno;
+       UnmapViewOfFile(address);
+       CloseHandle(hmap);
+       errno = save_errno;
+
+       ereport(elevel,
+               (errcode_for_dynamic_shared_memory(),
+               errmsg("could not stat shared memory segment \"%s\": %m",
+                   name)));
+       return false;
+   }
+
+   *mapped_address = address;
+   *mapped_size = info.RegionSize;
+   *impl_private = hmap;
+
+   return true;
+}
+#endif
+
+#ifdef USE_DSM_MMAP
+/*
+ * Operating system primitives to support mmap-based shared memory.
+ *
+ * Calling this "shared memory" is somewhat of a misnomer, because what
+ * we're really doing is creating a bunch of files and mapping them into
+ * our address space.  The operating system may feel obliged to
+ * synchronize the contents to disk even if nothing is being paged out,
+ * which will not serve us well.  The user can relocate the pg_dynshmem
+ * directory to a ramdisk to avoid this problem, if available.
+ */
+static bool
+dsm_impl_mmap(dsm_op op, dsm_handle handle, uint64 request_size,
+             void **impl_private, void **mapped_address, uint64 *mapped_size,
+             int elevel)
+{
+   char    name[64];
+   int     flags;
+   int     fd;
+   char   *address;
+
+   snprintf(name, 64, PG_DYNSHMEM_DIR "/" PG_DYNSHMEM_MMAP_FILE_PREFIX "%u",
+       handle);
+
+   /* Handle teardown cases. */
+   if (op == DSM_OP_DETACH || op == DSM_OP_DESTROY)
+   {
+       if (*mapped_address != NULL
+           && munmap(*mapped_address, *mapped_size) != 0)
+       {
+           ereport(elevel,
+                   (errcode_for_dynamic_shared_memory(),
+                    errmsg("could not unmap shared memory segment \"%s\": %m",
+                       name)));
+           return false;
+       }
+       *mapped_address = NULL;
+       *mapped_size = 0;
+       if (op == DSM_OP_DESTROY && unlink(name) != 0)
+       {
+           ereport(elevel,
+                   (errcode_for_dynamic_shared_memory(),
+                    errmsg("could not remove shared memory segment \"%s\": %m",
+                       name)));
+           return false;
+       }
+       return true;
+   }
+
+   /* Create new segment or open an existing one for attach or resize. */
+   flags = O_RDWR | (op == DSM_OP_CREATE ? O_CREAT | O_EXCL : 0);
+   if ((fd = OpenTransientFile(name, flags, 0600)) == -1)
+   {
+       if (errno != EEXIST)
+           ereport(elevel,
+                   (errcode_for_dynamic_shared_memory(),
+                    errmsg("could not open shared memory segment \"%s\": %m",
+                       name)));
+       return false;
+   }
+
+   /*
+    * If we're attaching the segment, determine the current size; if we are
+    * creating or resizing the segment, set the size to the requested value.
+    */
+   if (op == DSM_OP_ATTACH)
+   {
+       struct stat st;
+
+       if (fstat(fd, &st) != 0)
+       {
+           int     save_errno;
+
+           /* Back out what's already been done. */
+           save_errno = errno;
+           CloseTransientFile(fd);
+           errno = save_errno;
+
+           ereport(elevel,
+                   (errcode_for_dynamic_shared_memory(),
+                    errmsg("could not stat shared memory segment \"%s\": %m",
+                       name)));
+           return false;
+       }
+       request_size = st.st_size;
+   }
+   else if (*mapped_size > request_size && ftruncate(fd, request_size))
+   {
+       int     save_errno;
+
+       /* Back out what's already been done. */
+       save_errno = errno;
+       close(fd);
+       if (op == DSM_OP_CREATE)
+           shm_unlink(name);
+       errno = save_errno;
+
+       ereport(elevel,
+               (errcode_for_dynamic_shared_memory(),
+                errmsg("could not resize shared memory segment %s to " UINT64_FORMAT " bytes: %m",
+                   name, request_size)));
+       return false;
+   }
+   else if (*mapped_size < request_size)
+   {
+       /*
+        * Allocate a buffer full of zeros.
+        *
+        * Note: palloc zbuffer, instead of just using a local char array,
+        * to ensure it is reasonably well-aligned; this may save a few
+        * cycles transferring data to the kernel.
+        */
+       char   *zbuffer = (char *) palloc0(ZBUFFER_SIZE);
+       uint32  remaining = request_size;
+       bool    success = true;
+
+       /*
+        * Zero-fill the file. We have to do this the hard way to ensure
+        * that all the file space has really been allocated, so that we
+        * don't later seg fault when accessing the memory mapping.  This
+        * is pretty pessimal.
+        */
+       while (success && remaining > 0)
+       {
+           uint64  goal = remaining;
+
+           if (goal > ZBUFFER_SIZE)
+               goal = ZBUFFER_SIZE;
+           if (write(fd, zbuffer, goal) == goal)
+               remaining -= goal;
+           else
+               success = false;
+       }
+
+       if (!success)
+       {
+           int     save_errno;
+
+           /* Back out what's already been done. */
+           save_errno = errno;
+           CloseTransientFile(fd);
+           if (op == DSM_OP_CREATE)
+               unlink(name);
+           errno = save_errno ? save_errno : ENOSPC;
+
+           ereport(elevel,
+                   (errcode_for_dynamic_shared_memory(),
+                    errmsg("could not resize shared memory segment %s to " UINT64_FORMAT " bytes: %m",
+                       name, request_size)));
+           return false;
+       }
+   }
+
+   /*
+    * If we're reattaching or resizing, we must remove any existing mapping,
+    * unless we've already got the right thing mapped.
+    */
+   if (*mapped_address != NULL)
+   {
+       if (*mapped_size == request_size)
+           return true;
+       if (munmap(*mapped_address, *mapped_size) != 0)
+       {
+           int     save_errno;
+
+           /* Back out what's already been done. */
+           save_errno = errno;
+           CloseTransientFile(fd);
+           if (op == DSM_OP_CREATE)
+               unlink(name);
+           errno = save_errno;
+
+           ereport(elevel,
+                   (errcode_for_dynamic_shared_memory(),
+                    errmsg("could not unmap shared memory segment \"%s\": %m",
+                       name)));
+           return false;
+       }
+       *mapped_address = NULL;
+       *mapped_size = 0;
+   }
+
+   /* Map it. */
+   address = mmap(NULL, request_size, PROT_READ|PROT_WRITE,
+                  MAP_SHARED|MAP_HASSEMAPHORE, fd, 0);
+   if (address == MAP_FAILED)
+   {
+       int     save_errno;
+
+       /* Back out what's already been done. */
+       save_errno = errno;
+       CloseTransientFile(fd);
+       if (op == DSM_OP_CREATE)
+           unlink(name);
+       errno = save_errno;
+
+       ereport(elevel,
+               (errcode_for_dynamic_shared_memory(),
+                errmsg("could not map shared memory segment \"%s\": %m",
+                       name)));
+       return false;
+   }
+   *mapped_address = address;
+   *mapped_size = request_size;
+   CloseTransientFile(fd);
+
+   return true;
+}
+#endif
+
+static int
+errcode_for_dynamic_shared_memory()
+{
+   if (errno == EFBIG || errno == ENOMEM)
+       return errcode(ERRCODE_OUT_OF_MEMORY);
+   else
+       return errcode_for_file_access();
+}
index a0b741b444a2935361dbe9b68dd4bec58c2bd469..040c7aa1044dabb6d4fe9a3e835772932b661b7d 100644 (file)
@@ -30,6 +30,7 @@
 #include "replication/walreceiver.h"
 #include "replication/walsender.h"
 #include "storage/bufmgr.h"
+#include "storage/dsm.h"
 #include "storage/ipc.h"
 #include "storage/pg_shmem.h"
 #include "storage/pmsignal.h"
@@ -249,6 +250,10 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
        ShmemBackendArrayAllocation();
 #endif
 
+   /* Initialize dynamic shared memory facilities. */
+   if (!IsUnderPostmaster)
+       dsm_postmaster_startup();
+
    /*
     * Now give loadable modules a chance to set up their shmem allocations
     */
index ddbeb34ce723fdb551736e3b2b09edc22f169424..1756b48c4fe3566970cdc670a362e5ff726abb5b 100644 (file)
@@ -61,6 +61,7 @@
 #include "replication/walreceiver.h"
 #include "replication/walsender.h"
 #include "storage/bufmgr.h"
+#include "storage/dsm_impl.h"
 #include "storage/standby.h"
 #include "storage/fd.h"
 #include "storage/proc.h"
@@ -385,6 +386,7 @@ static const struct config_enum_entry synchronous_commit_options[] = {
  */
 extern const struct config_enum_entry wal_level_options[];
 extern const struct config_enum_entry sync_method_options[];
+extern const struct config_enum_entry dynamic_shared_memory_options[];
 
 /*
  * GUC option variables that are exported from this module
@@ -3335,6 +3337,16 @@ static struct config_enum ConfigureNamesEnum[] =
        NULL, NULL, NULL
    },
 
+   {
+       {"dynamic_shared_memory_type", PGC_POSTMASTER, RESOURCES_MEM,
+           gettext_noop("Selects the dynamic shared memory implementation used."),
+           NULL
+       },
+       &dynamic_shared_memory_type,
+       DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE, dynamic_shared_memory_options,
+       NULL, NULL, NULL
+   },
+
    {
        {"wal_sync_method", PGC_SIGHUP, WAL_SETTINGS,
            gettext_noop("Selects the method used for forcing WAL updates to disk."),
index 70221f42918acc77051d38b128d4d217d21cbdb0..707edf1d91d4bd651a10933fc3ad5685ca8dba42 100644 (file)
 #work_mem = 1MB                # min 64kB
 #maintenance_work_mem = 16MB       # min 1MB
 #max_stack_depth = 2MB         # min 100kB
+#dynamic_shared_memory_type = posix # the default is the first option
+                   # supported by the operating system:
+                   #   posix
+                   #   sysv
+                   #   windows
+                   #   mmap
+                   # use none to disable dynamic shared memory
 
 # - Disk -
 
index e7ec3931f127910ffca0c5889deebf10a7427aa1..ba1770157996fb8b302652f4af2b0fc981996208 100644 (file)
@@ -98,6 +98,11 @@ typedef struct ResourceOwnerData
    int         nfiles;         /* number of owned temporary files */
    File       *files;          /* dynamically allocated array */
    int         maxfiles;       /* currently allocated array size */
+
+   /* We have built-in support for remembering dynamic shmem segments */
+   int         ndsms;          /* number of owned shmem segments */
+   dsm_segment **dsms;         /* dynamically allocated array */
+   int         maxdsms;        /* currently allocated array size */
 }  ResourceOwnerData;
 
 
@@ -132,6 +137,7 @@ static void PrintPlanCacheLeakWarning(CachedPlan *plan);
 static void PrintTupleDescLeakWarning(TupleDesc tupdesc);
 static void PrintSnapshotLeakWarning(Snapshot snapshot);
 static void PrintFileLeakWarning(File file);
+static void PrintDSMLeakWarning(dsm_segment *seg);
 
 
 /*****************************************************************************
@@ -271,6 +277,21 @@ ResourceOwnerReleaseInternal(ResourceOwner owner,
                PrintRelCacheLeakWarning(owner->relrefs[owner->nrelrefs - 1]);
            RelationClose(owner->relrefs[owner->nrelrefs - 1]);
        }
+
+       /*
+        * Release dynamic shared memory segments.  Note that dsm_detach()
+        * will remove the segment from my list, so I just have to iterate
+        * until there are none.
+        *
+        * As in the preceding cases, warn if there are leftover at commit
+        * time.
+        */
+       while (owner->ndsms > 0)
+       {
+           if (isCommit)
+               PrintDSMLeakWarning(owner->dsms[owner->ndsms - 1]);
+           dsm_detach(owner->dsms[owner->ndsms - 1]);
+       }
    }
    else if (phase == RESOURCE_RELEASE_LOCKS)
    {
@@ -402,6 +423,7 @@ ResourceOwnerDelete(ResourceOwner owner)
    Assert(owner->ncatrefs == 0);
    Assert(owner->ncatlistrefs == 0);
    Assert(owner->nrelrefs == 0);
+   Assert(owner->ndsms == 0);
    Assert(owner->nplanrefs == 0);
    Assert(owner->ntupdescs == 0);
    Assert(owner->nsnapshots == 0);
@@ -438,6 +460,8 @@ ResourceOwnerDelete(ResourceOwner owner)
        pfree(owner->snapshots);
    if (owner->files)
        pfree(owner->files);
+   if (owner->dsms)
+       pfree(owner->dsms);
 
    pfree(owner);
 }
@@ -1230,3 +1254,88 @@ PrintFileLeakWarning(File file)
         "temporary file leak: File %d still referenced",
         file);
 }
+
+/*
+ * Make sure there is room for at least one more entry in a ResourceOwner's
+ * dynamic shmem segment reference array.
+ *
+ * This is separate from actually inserting an entry because if we run out
+ * of memory, it's critical to do so *before* acquiring the resource.
+ */
+void
+ResourceOwnerEnlargeDSMs(ResourceOwner owner)
+{
+   int         newmax;
+
+   if (owner->ndsms < owner->maxdsms)
+       return;                 /* nothing to do */
+
+   if (owner->dsms == NULL)
+   {
+       newmax = 16;
+       owner->dsms = (dsm_segment **)
+           MemoryContextAlloc(TopMemoryContext,
+                              newmax * sizeof(dsm_segment *));
+       owner->maxdsms = newmax;
+   }
+   else
+   {
+       newmax = owner->maxdsms * 2;
+       owner->dsms = (dsm_segment **)
+           repalloc(owner->dsms, newmax * sizeof(dsm_segment *));
+       owner->maxdsms = newmax;
+   }
+}
+
+/*
+ * Remember that a dynamic shmem segment is owned by a ResourceOwner
+ *
+ * Caller must have previously done ResourceOwnerEnlargeDSMs()
+ */
+void
+ResourceOwnerRememberDSM(ResourceOwner owner, dsm_segment *seg)
+{
+   Assert(owner->ndsms < owner->maxdsms);
+   owner->dsms[owner->ndsms] = seg;
+   owner->ndsms++;
+}
+
+/*
+ * Forget that a temporary file is owned by a ResourceOwner
+ */
+void
+ResourceOwnerForgetDSM(ResourceOwner owner, dsm_segment *seg)
+{
+   dsm_segment **dsms = owner->dsms;
+   int         ns1 = owner->ndsms - 1;
+   int         i;
+
+   for (i = ns1; i >= 0; i--)
+   {
+       if (dsms[i] == seg)
+       {
+           while (i < ns1)
+           {
+               dsms[i] = dsms[i + 1];
+               i++;
+           }
+           owner->ndsms = ns1;
+           return;
+       }
+   }
+   elog(ERROR,
+        "dynamic shared memory segment %u is not owned by resource owner %s",
+        dsm_segment_handle(seg), owner->name);
+}
+
+
+/*
+ * Debugging subroutine
+ */
+static void
+PrintDSMLeakWarning(dsm_segment *seg)
+{
+   elog(WARNING,
+        "dynamic shared memory leak: segment %u still referenced",
+        dsm_segment_handle(seg));
+}
index f66f5302883a7d58e8ff313aee19f74e22d60ddf..a6eb0d806162b0665acd1b58ca440bfa6d35c0c0 100644 (file)
@@ -182,6 +182,7 @@ const char *subdirs[] = {
    "pg_xlog",
    "pg_xlog/archive_status",
    "pg_clog",
+   "pg_dynshmem",
    "pg_notify",
    "pg_serial",
    "pg_snapshots",
index 8aabf3c87a4706a2181f5d59eb93d8587ce65044..5eac52d93a9c24f78f7324c45d87ffee0da9cb7c 100644 (file)
 /* Define to 1 if you have the `setsid' function. */
 #undef HAVE_SETSID
 
+/* Define to 1 if you have the `shm_open' function. */
+#undef HAVE_SHM_OPEN
+
 /* Define to 1 if you have the `sigprocmask' function. */
 #undef HAVE_SIGPROCMASK
 
diff --git a/src/include/portability/mem.h b/src/include/portability/mem.h
new file mode 100644 (file)
index 0000000..2a07c10
--- /dev/null
@@ -0,0 +1,40 @@
+/*-------------------------------------------------------------------------
+ *
+ * mem.h
+ *   portability definitions for various memory operations
+ *
+ * Copyright (c) 2001-2013, PostgreSQL Global Development Group
+ *
+ * src/include/portability/mem.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef MEM_H
+#define MEM_H
+
+#define IPCProtection  (0600)  /* access/modify by user only */
+
+#ifdef SHM_SHARE_MMU           /* use intimate shared memory on Solaris */
+#define PG_SHMAT_FLAGS         SHM_SHARE_MMU
+#else
+#define PG_SHMAT_FLAGS         0
+#endif
+
+/* Linux prefers MAP_ANONYMOUS, but the flag is called MAP_ANON on other systems. */
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS          MAP_ANON
+#endif
+
+/* BSD-derived systems have MAP_HASSEMAPHORE, but it's not present (or needed) on Linux. */
+#ifndef MAP_HASSEMAPHORE
+#define MAP_HASSEMAPHORE       0
+#endif
+
+#define PG_MMAP_FLAGS          (MAP_SHARED|MAP_ANONYMOUS|MAP_HASSEMAPHORE)
+
+/* Some really old systems don't define MAP_FAILED. */
+#ifndef MAP_FAILED
+#define MAP_FAILED ((void *) -1)
+#endif
+
+#endif   /* MEM_H */
diff --git a/src/include/storage/dsm.h b/src/include/storage/dsm.h
new file mode 100644 (file)
index 0000000..2b5e722
--- /dev/null
@@ -0,0 +1,39 @@
+/*-------------------------------------------------------------------------
+ *
+ * dsm.h
+ *   manage dynamic shared memory segments
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/dsm.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef DSM_H
+#define DSM_H
+
+#include "storage/dsm_impl.h"
+
+typedef struct dsm_segment dsm_segment;
+
+/* Initialization function. */
+extern void dsm_postmaster_startup(void);
+
+/* Functions that create, update, or remove mappings. */
+extern dsm_segment *dsm_create(uint64 size);
+extern dsm_segment *dsm_attach(dsm_handle h);
+extern void *dsm_resize(dsm_segment *seg, uint64 size);
+extern void *dsm_remap(dsm_segment *seg);
+extern void dsm_detach(dsm_segment *seg);
+
+/* Resource management functions. */
+extern void dsm_keep_mapping(dsm_segment *seg);
+extern dsm_segment *dsm_find_mapping(dsm_handle h);
+
+/* Informational functions. */
+extern void *dsm_segment_address(dsm_segment *seg);
+extern uint64 dsm_segment_map_length(dsm_segment *seg);
+extern dsm_handle dsm_segment_handle(dsm_segment *seg);
+
+#endif   /* DSM_H */
diff --git a/src/include/storage/dsm_impl.h b/src/include/storage/dsm_impl.h
new file mode 100644 (file)
index 0000000..13f1f48
--- /dev/null
@@ -0,0 +1,75 @@
+/*-------------------------------------------------------------------------
+ *
+ * dsm_impl.h
+ *   low-level dynamic shared memory primitives
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/storage/dsm_impl.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef DSM_IMPL_H
+#define DSM_IMPL_H
+
+/* Dynamic shared memory implementations. */
+#define DSM_IMPL_NONE          0
+#define DSM_IMPL_POSIX         1
+#define DSM_IMPL_SYSV          2
+#define DSM_IMPL_WINDOWS       3
+#define DSM_IMPL_MMAP          4
+
+/*
+ * Determine which dynamic shared memory implementations will be supported
+ * on this platform, and which one will be the default.
+ */
+#ifdef WIN32
+#define USE_DSM_WINDOWS
+#define DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE     DSM_IMPL_WINDOWS
+#else
+#ifdef HAVE_SHM_OPEN
+#define USE_DSM_POSIX
+#define DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE     DSM_IMPL_POSIX
+#endif
+#define USE_DSM_SYSV
+#ifndef DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE
+#define DEFAULT_DYNAMIC_SHARED_MEMORY_TYPE     DSM_IMPL_SYSV
+#endif
+#define USE_DSM_MMAP
+#endif
+
+/* GUC. */
+extern int dynamic_shared_memory_type;
+
+/*
+ * Directory for on-disk state.
+ *
+ * This is used by all implementations for crash recovery and by the mmap
+ * implementation for storage.
+ */
+#define PG_DYNSHMEM_DIR                    "pg_dynshmem"
+#define PG_DYNSHMEM_MMAP_FILE_PREFIX   "mmap."
+
+/* A "name" for a dynamic shared memory segment. */
+typedef uint32 dsm_handle;
+
+/* All the shared-memory operations we know about. */
+typedef enum
+{
+   DSM_OP_CREATE,
+   DSM_OP_ATTACH,
+   DSM_OP_DETACH,
+   DSM_OP_RESIZE,
+   DSM_OP_DESTROY
+} dsm_op;
+
+/* Create, attach to, detach from, resize, or destroy a segment. */
+extern bool dsm_impl_op(dsm_op op, dsm_handle handle, uint64 request_size,
+           void **impl_private, void **mapped_address, uint64 *mapped_size,
+           int elevel);
+
+/* Some implementations cannot resize segments.  Can this one? */
+extern bool dsm_impl_can_resize(void);
+
+#endif   /* DSM_IMPL_H */
index 39415a398a643a84edd7a4a1fbc3c2e7a9803075..730c47ba68691f9162da094ba0cad13ebb08bd2e 100644 (file)
@@ -80,6 +80,7 @@ typedef enum LWLockId
    OldSerXidLock,
    SyncRepLock,
    BackgroundWorkerLock,
+   DynamicSharedMemoryControlLock,
    /* Individual lock IDs end here */
    FirstBufMappingLock,
    FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,
index a5d8707be2f5cc89734e7d2b7a21b2ebffc0ae0d..6693483368b608f6778c3d5ad8487412273b6561 100644 (file)
@@ -16,6 +16,7 @@
 #ifndef RESOWNER_PRIVATE_H
 #define RESOWNER_PRIVATE_H
 
+#include "storage/dsm.h"
 #include "storage/fd.h"
 #include "storage/lock.h"
 #include "utils/catcache.h"
@@ -80,4 +81,11 @@ extern void ResourceOwnerRememberFile(ResourceOwner owner,
 extern void ResourceOwnerForgetFile(ResourceOwner owner,
                        File file);
 
+/* support for dynamic shared memory management */
+extern void ResourceOwnerEnlargeDSMs(ResourceOwner owner);
+extern void ResourceOwnerRememberDSM(ResourceOwner owner,
+                         dsm_segment *);
+extern void ResourceOwnerForgetDSM(ResourceOwner owner,
+                       dsm_segment *);
+
 #endif   /* RESOWNER_PRIVATE_H */