</listitem>
</varlistentry>
+ <varlistentry id="guc-huge-tlb-pages" xreflabel="huge_tlb_pages">
+ <term><varname>huge_tlb_pages</varname> (<type>enum</type>)</term>
+ <indexterm>
+ <primary><varname>huge_tlb_pages</> configuration parameter</primary>
+ </indexterm>
+ <listitem>
+ <para>
+ Enables/disables the use of huge TLB pages. Valid values are
+ <literal>try</literal> (the default), <literal>on</literal>,
+ and <literal>off</literal>.
+ </para>
+
+ <para>
+ At present, this feature is supported only on Linux. The setting
+ is ignored on other systems.
+ </para>
+
+ <para>
+ The use of huge TLB pages results in smaller page tables and
+ less CPU time spent on memory management, increasing performance. For
+ more details, see
+ <ulink url="https://wiki.debian.org/Hugepages">the Debian wiki</ulink>.
+ Remember that you will need at least shared_buffers / huge page size +
+ 1 huge TLB pages. So for example for a system with 6GB shared buffers
+ and a hugepage size of 2kb of you will need at least 3156 huge pages.
+ </para>
+
+ <para>
+ With <varname>huge_tlb_pages</varname> set to <literal>try</literal>,
+ the server will try to use huge pages, but fall back to using
+ normal allocation if that fails. With <literal>on</literal, failure
+ to use huge pages will prevent the server from starting up. With
+ <literal>off</literal>, huge pages will not be used.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry id="guc-temp-buffers" xreflabel="temp_buffers">
<term><varname>temp_buffers</varname> (<type>integer</type>)</term>
<indexterm>
#include "portability/mem.h"
#include "storage/ipc.h"
#include "storage/pg_shmem.h"
+#include "utils/guc.h"
typedef key_t IpcMemoryKey; /* shared memory key passed to shmget(2) */
unsigned long UsedShmemSegID = 0;
void *UsedShmemSegAddr = NULL;
static Size AnonymousShmemSize;
-static void *AnonymousShmem;
+static void *AnonymousShmem = NULL;
static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size);
static void IpcMemoryDetach(int status, Datum shmaddr);
return true;
}
+/*
+ * Creates an anonymous mmap()ed shared memory segment.
+ *
+ * Pass the requested size in *size. This function will modify *size to the
+ * actual size of the allocation, if it ends up allocating a segment that is
+ * larger than requested.
+ */
+#ifndef EXEC_BACKEND
+static void *
+CreateAnonymousSegment(Size *size)
+{
+ Size allocsize;
+ void *ptr = MAP_FAILED;
+
+#ifndef MAP_HUGETLB
+ if (huge_tlb_pages == HUGE_TLB_ON)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("huge TLB pages not supported on this platform")));
+#else
+ if (huge_tlb_pages == HUGE_TLB_ON || huge_tlb_pages == HUGE_TLB_TRY)
+ {
+ /*
+ * Round up the request size to a suitable large value.
+ *
+ * Some Linux kernel versions are known to have a bug, which causes
+ * mmap() with MAP_HUGETLB to fail if the request size is not a
+ * multiple of any supported huge page size. To work around that, we
+ * round up the request size to nearest 2MB. 2MB is the most common
+ * huge page page size on affected systems.
+ *
+ * Aside from that bug, even with a kernel that does the allocation
+ * correctly, rounding it up ourselves avoids wasting memory. Without
+ * it, if we for example make an allocation of 2MB + 1 bytes, the
+ * kernel might decide to use two 2MB huge pages for that, and waste 2
+ * MB - 1 of memory. When we do the rounding ourselves, we can use
+ * that space for allocations.
+ */
+ int hugepagesize = 2 * 1024 * 1024;
+
+ allocsize = *size;
+ if (allocsize % hugepagesize != 0)
+ allocsize += hugepagesize - (allocsize % hugepagesize);
+
+ ptr = mmap(NULL, *size, PROT_READ | PROT_WRITE,
+ PG_MMAP_FLAGS | MAP_HUGETLB, -1, 0);
+ if (huge_tlb_pages == HUGE_TLB_TRY && ptr == MAP_FAILED)
+ elog(DEBUG1, "mmap with MAP_HUGETLB failed, huge pages disabled: %m");
+ }
+#endif
+
+ if (huge_tlb_pages == HUGE_TLB_OFF ||
+ (huge_tlb_pages == HUGE_TLB_TRY && ptr == MAP_FAILED))
+ {
+ allocsize = *size;
+ ptr = mmap(NULL, *size, PROT_READ | PROT_WRITE, PG_MMAP_FLAGS, -1, 0);
+ }
+
+ if (ptr == MAP_FAILED)
+ ereport(FATAL,
+ (errmsg("could not map anonymous shared memory: %m"),
+ (errno == ENOMEM) ?
+ errhint("This error usually means that PostgreSQL's request "
+ "for a shared memory segment exceeded available memory, "
+ "swap space or huge pages. To reduce the request size "
+ "(currently %zu bytes), reduce PostgreSQL's shared "
+ "memory usage, perhaps by reducing shared_buffers or "
+ "max_connections.",
+ *size) : 0));
+
+ *size = allocsize;
+ return ptr;
+}
+#endif
/*
* PGSharedMemoryCreate
PGShmemHeader *hdr;
IpcMemoryId shmid;
struct stat statbuf;
- Size sysvsize = size;
+ Size sysvsize;
+
+#if defined(EXEC_BACKEND) || !defined(MAP_HUGETLB)
+ if (huge_tlb_pages == HUGE_TLB_ON)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("huge TLB pages not supported on this platform")));
+#endif
/* Room for a header? */
Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
* to run many copies of PostgreSQL without needing to adjust system
* settings.
*
+ * We assume that no one will attempt to run PostgreSQL 9.3 or later on
+ * systems that are ancient enough that anonymous shared memory is not
+ * supported, such as pre-2.4 versions of Linux. If that turns out to be
+ * false, we might need to add a run-time test here and do this only if
+ * the running kernel supports it.
+ *
* However, we disable this logic in the EXEC_BACKEND case, and fall back
* to the old method of allocating the entire segment using System V
* shared memory, because there's no way to attach an mmap'd segment to a
* developer use, this shouldn't be a big problem.
*/
#ifndef EXEC_BACKEND
- {
- long pagesize = sysconf(_SC_PAGE_SIZE);
-
- /*
- * Ensure request size is a multiple of pagesize.
- *
- * pagesize will, for practical purposes, always be a power of two.
- * But just in case it isn't, we do it this way instead of using
- * TYPEALIGN().
- */
- if (pagesize > 0 && size % pagesize != 0)
- size += pagesize - (size % pagesize);
+ AnonymousShmem = CreateAnonymousSegment(&size);
+ AnonymousShmemSize = size;
- /*
- * We assume that no one will attempt to run PostgreSQL 9.3 or later
- * on systems that are ancient enough that anonymous shared memory is
- * not supported, such as pre-2.4 versions of Linux. If that turns
- * out to be false, we might need to add a run-time test here and do
- * this only if the running kernel supports it.
- */
- AnonymousShmem = mmap(NULL, size, PROT_READ | PROT_WRITE, PG_MMAP_FLAGS,
- -1, 0);
- if (AnonymousShmem == MAP_FAILED)
- ereport(FATAL,
- (errmsg("could not map anonymous shared memory: %m"),
- (errno == ENOMEM) ?
- errhint("This error usually means that PostgreSQL's request "
- "for a shared memory segment exceeded available memory "
- "or swap space. To reduce the request size (currently "
- "%zu bytes), reduce PostgreSQL's shared memory usage, "
- "perhaps by reducing shared_buffers or "
- "max_connections.",
- size) : 0));
- AnonymousShmemSize = size;
-
- /* Now we need only allocate a minimal-sized SysV shmem block. */
- sysvsize = sizeof(PGShmemHeader);
- }
+ /* Now we need only allocate a minimal-sized SysV shmem block. */
+ sysvsize = sizeof(PGShmemHeader);
+#else
+ sysvsize = size;
#endif
/* Make sure PGSharedMemoryAttach doesn't fail without need */
#include "storage/dsm_impl.h"
#include "storage/standby.h"
#include "storage/fd.h"
+#include "storage/pg_shmem.h"
#include "storage/proc.h"
#include "storage/predicate.h"
#include "tcop/tcopprot.h"
{NULL, 0, false}
};
+/*
+ * Although only "on", "off", "try" are documented, we accept all the likely
+ * variants of "on" and "off".
+ */
+static const struct config_enum_entry huge_tlb_options[] = {
+ {"off", HUGE_TLB_OFF, false},
+ {"on", HUGE_TLB_ON, false},
+ {"try", HUGE_TLB_TRY, false},
+ {"true", HUGE_TLB_ON, true},
+ {"false", HUGE_TLB_OFF, true},
+ {"yes", HUGE_TLB_ON, true},
+ {"no", HUGE_TLB_OFF, true},
+ {"1", HUGE_TLB_ON, true},
+ {"0", HUGE_TLB_OFF, true},
+ {NULL, 0, false}
+};
+
/*
* Options for enum values stored in other modules
*/
int tcp_keepalives_interval;
int tcp_keepalives_count;
+/*
+ * This really belongs in pg_shmem.c, but is defined here so that it doesn't
+ * need to be duplicated in all the different implementations of pg_shmem.c.
+ */
+int huge_tlb_pages;
+
/*
* These variables are all dummies that don't do anything, except in some
* cases provide the value for SHOW to display. The real state is elsewhere
NULL, NULL, NULL
},
+ {
+ {"huge_tlb_pages", PGC_POSTMASTER, RESOURCES_MEM,
+ gettext_noop("Use of huge TLB pages on Linux"),
+ NULL
+ },
+ &huge_tlb_pages,
+ HUGE_TLB_TRY, huge_tlb_options,
+ NULL, NULL, NULL
+ },
/* End-of-list marker */
{