Introduce a maintenance_io_concurrency setting.
authorThomas Munro <tmunro@postgresql.org>
Sun, 15 Mar 2020 23:31:34 +0000 (12:31 +1300)
committerThomas Munro <tmunro@postgresql.org>
Mon, 16 Mar 2020 04:14:26 +0000 (17:14 +1300)
Introduce a GUC and a tablespace option to control I/O prefetching, much
like effective_io_concurrency, but for work that is done on behalf of
many client sessions.

Use the new setting in heapam.c instead of the hard-coded formula
effective_io_concurrency + 10 introduced by commit 558a9165e08.  Go with
a default value of 10 for now, because it's a round number pretty close
to the value used for that existing case.

Discussion: https://postgr.es/m/CA%2BhUKGJUw08dPs_3EUcdO6M90GnjofPYrWp4YSLaBkgYwS-AqA%40mail.gmail.com

12 files changed:
doc/src/sgml/config.sgml
doc/src/sgml/ref/alter_tablespace.sgml
doc/src/sgml/ref/create_tablespace.sgml
src/backend/access/common/reloptions.c
src/backend/access/heap/heapam.c
src/backend/storage/buffer/bufmgr.c
src/backend/utils/cache/spccache.c
src/backend/utils/misc/guc.c
src/bin/psql/tab-complete.c
src/include/commands/tablespace.h
src/include/storage/bufmgr.h
src/include/utils/spccache.h

index 3cac340f3237f2358c6ca503945a56ad8ecc792d..672bf6f1ee7e2086987d167cd3cb06071c5b1c0d 100644 (file)
@@ -2229,6 +2229,26 @@ include_dir 'conf.d'
        </listitem>
       </varlistentry>
 
+      <varlistentry id="guc-maintenance-io-concurrency" xreflabel="maintenance_io_concurrency">
+       <term><varname>maintenance_io_concurrency</varname> (<type>integer</type>)
+       <indexterm>
+        <primary><varname>maintenance_io_concurrency</varname> configuration parameter</primary>
+       </indexterm>
+       </term>
+       <listitem>
+        <para>
+         Similar to <varname>effective_io_concurrency</varname>, but used
+         for maintenance work that is done on behalf of many client sessions.
+        </para>
+        <para>
+         The default is 10 on supported systems, otherwise 0.  This value can
+         be overridden for tables in a particular tablespace by setting the
+         tablespace parameter of the same name (see
+         <xref linkend="sql-altertablespace"/>).
+        </para>
+       </listitem>
+      </varlistentry>
       <varlistentry id="guc-max-worker-processes" xreflabel="max_worker_processes">
        <term><varname>max_worker_processes</varname> (<type>integer</type>)
        <indexterm>
index acec33469f12ca626e858c2f0f7cda624f089698..356fb9f93f32df8f49a2d1c8985f158274571055 100644 (file)
@@ -84,13 +84,16 @@ ALTER TABLESPACE <replaceable>name</replaceable> RESET ( <replaceable class="par
      <para>
       A tablespace parameter to be set or reset.  Currently, the only
       available parameters are <varname>seq_page_cost</varname>,
-      <varname>random_page_cost</varname> and <varname>effective_io_concurrency</varname>.
-      Setting either value for a particular tablespace will override the
+      <varname>random_page_cost</varname>, <varname>effective_io_concurrency</varname>
+      and <varname>maintenance_io_concurrency</varname>.
+      Setting these values for a particular tablespace will override the
       planner's usual estimate of the cost of reading pages from tables in
-      that tablespace, as established by the configuration parameters of the
+      that tablespace, and the executor's prefetching behavior, as established
+      by the configuration parameters of the
       same name (see <xref linkend="guc-seq-page-cost"/>,
       <xref linkend="guc-random-page-cost"/>,
-      <xref linkend="guc-effective-io-concurrency"/>).  This may be useful if
+      <xref linkend="guc-effective-io-concurrency"/>,
+      <xref linkend="guc-maintenance-io-concurrency"/>).  This may be useful if
       one tablespace is located on a disk which is faster or slower than the
       remainder of the I/O subsystem.
      </para>
index c621ec2c6bf802e90bf7c6c4df8d8facf10abe5f..462b8831c27464e3f1e3b5d20e6d7b4b11a5f394 100644 (file)
@@ -106,13 +106,16 @@ CREATE TABLESPACE <replaceable class="parameter">tablespace_name</replaceable>
        <para>
         A tablespace parameter to be set or reset.  Currently, the only
         available parameters are <varname>seq_page_cost</varname>,
-        <varname>random_page_cost</varname> and <varname>effective_io_concurrency</varname>.
-        Setting either value for a particular tablespace will override the
+        <varname>random_page_cost</varname>, <varname>effective_io_concurrency</varname>
+        and <varname>maintenance_io_concurrency</varname>.
+        Setting these values for a particular tablespace will override the
         planner's usual estimate of the cost of reading pages from tables in
-        that tablespace, as established by the configuration parameters of the
+        that tablespace, and the executor's prefetching behavior, as established
+        by the configuration parameters of the
         same name (see <xref linkend="guc-seq-page-cost"/>,
         <xref linkend="guc-random-page-cost"/>,
-        <xref linkend="guc-effective-io-concurrency"/>).  This may be useful if
+        <xref linkend="guc-effective-io-concurrency"/>,
+        <xref linkend="guc-maintenance-io-concurrency"/>).  This may be useful if
         one tablespace is located on a disk which is faster or slower than the
         remainder of the I/O subsystem.
        </para>
index c3d45c7a248ee6f995132c381b6d9c40ce3b4436..ec207d3b26c0e48a8b581f792bc022ed7249efb6 100644 (file)
@@ -349,6 +349,19 @@ static relopt_int intRelOpts[] =
                -1, 0, MAX_IO_CONCURRENCY
 #else
                0, 0, 0
+#endif
+       },
+       {
+               {
+                       "maintenance_io_concurrency",
+                       "Number of simultaneous requests that can be handled efficiently by the disk subsystem for maintenance work.",
+                       RELOPT_KIND_TABLESPACE,
+                       ShareUpdateExclusiveLock
+               },
+#ifdef USE_PREFETCH
+               -1, 0, MAX_IO_CONCURRENCY
+#else
+               0, 0, 0
 #endif
        },
        {
@@ -1700,7 +1713,8 @@ tablespace_reloptions(Datum reloptions, bool validate)
        static const relopt_parse_elt tab[] = {
                {"random_page_cost", RELOPT_TYPE_REAL, offsetof(TableSpaceOpts, random_page_cost)},
                {"seq_page_cost", RELOPT_TYPE_REAL, offsetof(TableSpaceOpts, seq_page_cost)},
-               {"effective_io_concurrency", RELOPT_TYPE_INT, offsetof(TableSpaceOpts, effective_io_concurrency)}
+               {"effective_io_concurrency", RELOPT_TYPE_INT, offsetof(TableSpaceOpts, effective_io_concurrency)},
+               {"maintenance_io_concurrency", RELOPT_TYPE_INT, offsetof(TableSpaceOpts, maintenance_io_concurrency)}
        };
 
        return (bytea *) build_reloptions(reloptions, validate,
index 5a32e62ed0e761aab4fc5710e2ff2fff80d75990..29694b8aa4a3b6b242ab0b197f02f9a7221499f3 100644 (file)
@@ -7003,7 +7003,6 @@ heap_compute_xid_horizon_for_tuples(Relation rel,
        Page            hpage;
 #ifdef USE_PREFETCH
        XidHorizonPrefetchState prefetch_state;
-       int                     io_concurrency;
        int                     prefetch_distance;
 #endif
 
@@ -7026,24 +7025,15 @@ heap_compute_xid_horizon_for_tuples(Relation rel,
        /*
         * Compute the prefetch distance that we will attempt to maintain.
         *
-        * We don't use the regular formula to determine how much to prefetch
-        * here, but instead just add a constant to effective_io_concurrency.
-        * That's because it seems best to do some prefetching here even when
-        * effective_io_concurrency is set to 0, but if the DBA thinks it's OK to
-        * do more prefetching for other operations, then it's probably OK to do
-        * more prefetching in this case, too. It may be that this formula is too
-        * simplistic, but at the moment there is no evidence of that or any idea
-        * about what would work better.
-        *
         * Since the caller holds a buffer lock somewhere in rel, we'd better make
         * sure that isn't a catalog relation before we call code that does
         * syscache lookups, to avoid risk of deadlock.
         */
        if (IsCatalogRelation(rel))
-               io_concurrency = effective_io_concurrency;
+               prefetch_distance = maintenance_io_concurrency;
        else
-               io_concurrency = get_tablespace_io_concurrency(rel->rd_rel->reltablespace);
-       prefetch_distance = Min((io_concurrency) + 10, MAX_IO_CONCURRENCY);
+               prefetch_distance =
+                       get_tablespace_maintenance_io_concurrency(rel->rd_rel->reltablespace);
 
        /* Start prefetching. */
        xid_horizon_prefetch_buffer(rel, &prefetch_state, prefetch_distance);
index 7a7748b6955f957ba62bd9c50a86b1e6c436589b..e05e2b34565d077552afa7079943fc5278906fa0 100644 (file)
@@ -119,6 +119,13 @@ bool               track_io_timing = false;
  */
 int                    effective_io_concurrency = 0;
 
+/*
+ * Like effective_io_concurrency, but used by maintenance code paths that might
+ * benefit from a higher setting because they work on behalf of many sessions.
+ * Overridden by the tablespace setting of the same name.
+ */
+int                    maintenance_io_concurrency = 0;
+
 /*
  * GUC variables about triggering kernel writeback for buffers written; OS
  * dependent defaults are set via the GUC mechanism.
index c4a0f719fb5b01f1982f182116dabb678f37220f..e0c3c1b1c1173eb33659827df816880dcc496d10 100644 (file)
@@ -221,3 +221,17 @@ get_tablespace_io_concurrency(Oid spcid)
        else
                return spc->opts->effective_io_concurrency;
 }
+
+/*
+ * get_tablespace_maintenance_io_concurrency
+ */
+int
+get_tablespace_maintenance_io_concurrency(Oid spcid)
+{
+       TableSpaceCacheEntry *spc = get_tablespace(spcid);
+
+       if (!spc->opts || spc->opts->maintenance_io_concurrency < 0)
+               return maintenance_io_concurrency;
+       else
+               return spc->opts->maintenance_io_concurrency;
+}
index 326e773b25fc6361e91d7cbcf8ad71be3dd574a9..68082315acda0cfcba168a45fed735a27b72b454 100644 (file)
@@ -196,6 +196,7 @@ static bool check_autovacuum_max_workers(int *newval, void **extra, GucSource so
 static bool check_max_wal_senders(int *newval, void **extra, GucSource source);
 static bool check_autovacuum_work_mem(int *newval, void **extra, GucSource source);
 static bool check_effective_io_concurrency(int *newval, void **extra, GucSource source);
+static bool check_maintenance_io_concurrency(int *newval, void **extra, GucSource source);
 static void assign_pgstat_temp_directory(const char *newval, void *extra);
 static bool check_application_name(char **newval, void **extra, GucSource source);
 static void assign_application_name(const char *newval, void *extra);
@@ -2884,6 +2885,24 @@ static struct config_int ConfigureNamesInt[] =
                check_effective_io_concurrency, NULL, NULL
        },
 
+       {
+               {"maintenance_io_concurrency",
+                       PGC_USERSET,
+                       RESOURCES_ASYNCHRONOUS,
+                       gettext_noop("A variant of effective_io_concurrency that is used for maintenance work."),
+                       NULL,
+                       GUC_EXPLAIN
+               },
+               &maintenance_io_concurrency,
+#ifdef USE_PREFETCH
+               10,
+#else
+               0,
+#endif
+               0, MAX_IO_CONCURRENCY,
+               check_maintenance_io_concurrency, NULL, NULL
+       },
+
        {
                {"backend_flush_after", PGC_USERSET, RESOURCES_ASYNCHRONOUS,
                        gettext_noop("Number of pages after which previously performed writes are flushed to disk."),
@@ -11466,6 +11485,19 @@ check_effective_io_concurrency(int *newval, void **extra, GucSource source)
        return true;
 }
 
+static bool
+check_maintenance_io_concurrency(int *newval, void **extra, GucSource source)
+{
+#ifndef USE_PREFETCH
+       if (*newval != 0)
+       {
+               GUC_check_errdetail("maintenance_io_concurrency must be set to 0 on platforms that lack posix_fadvise().");
+               return false;
+       }
+#endif                                                 /* USE_PREFETCH */
+       return true;
+}
+
 static void
 assign_pgstat_temp_directory(const char *newval, void *extra)
 {
index 174c3db62309fc50361489aeaddc0566131f77b1..ae35fa4aa98d9c54d06cdcf83d642b695131fcdb 100644 (file)
@@ -2140,7 +2140,7 @@ psql_completion(const char *text, int start, int end)
        /* ALTER TABLESPACE <foo> SET|RESET ( */
        else if (Matches("ALTER", "TABLESPACE", MatchAny, "SET|RESET", "("))
                COMPLETE_WITH("seq_page_cost", "random_page_cost",
-                                         "effective_io_concurrency");
+                                         "effective_io_concurrency", "maintenance_io_concurrency");
 
        /* ALTER TEXT SEARCH */
        else if (Matches("ALTER", "TEXT", "SEARCH"))
index 41c457052d97d98cc8c91716ce978abe9f63d210..fd1b28fca2268edad243e23b38142c3664aed4e5 100644 (file)
@@ -40,6 +40,7 @@ typedef struct TableSpaceOpts
        float8          random_page_cost;
        float8          seq_page_cost;
        int                     effective_io_concurrency;
+       int                     maintenance_io_concurrency;
 } TableSpaceOpts;
 
 extern Oid     CreateTableSpace(CreateTableSpaceStmt *stmt);
index 2bf5afdade6229987e840bbed6f33834c08702c5..d2a5b52f6e20adf783919c33137979a50f75b6a8 100644 (file)
@@ -58,6 +58,7 @@ extern int    bgwriter_lru_maxpages;
 extern double bgwriter_lru_multiplier;
 extern bool track_io_timing;
 extern int     effective_io_concurrency;
+extern int     maintenance_io_concurrency;
 
 extern int     checkpoint_flush_after;
 extern int     backend_flush_after;
index 5112ba3c376cc4b3bf098f36847cf97442200476..7e4ec69aa2ef83bf4cdfa1d8dc71b77dc0197801 100644 (file)
@@ -16,5 +16,6 @@
 void           get_tablespace_page_costs(Oid spcid, float8 *spc_random_page_cost,
                                                                          float8 *spc_seq_page_cost);
 int                    get_tablespace_io_concurrency(Oid spcid);
+int                    get_tablespace_maintenance_io_concurrency(Oid spcid);
 
 #endif                                                 /* SPCCACHE_H */