Invent recursive_worktable_factor GUC to replace hard-wired constant.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 24 Mar 2022 15:47:41 +0000 (11:47 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 24 Mar 2022 15:47:41 +0000 (11:47 -0400)
Up to now, the planner estimated the size of a recursive query's
worktable as 10 times the size of the non-recursive term.  It's hard
to see how to do significantly better than that automatically, but
we can give users control over the multiplier to allow tuning for
specific use-cases.  The default behavior remains the same.

Simon Riggs

Discussion: https://postgr.es/m/CANbhV-EuaLm4H3g0+BSTYHEGxJj3Kht0R+rJ8vT57Dejnh=_nA@mail.gmail.com

doc/src/sgml/config.sgml
src/backend/optimizer/path/costsize.c
src/backend/utils/misc/guc.c
src/backend/utils/misc/postgresql.conf.sample
src/include/optimizer/cost.h
src/include/optimizer/optimizer.h

index 7a48973b3c86a186a5f66086ff755200017ac58c..05df48131d7e66f4039edd9d90a930b59b73a294 100644 (file)
@@ -5919,6 +5919,29 @@ SELECT * FROM parent WHERE key = 2400;
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-recursive-worktable-factor" xreflabel="recursive_worktable_factor">
+      <term><varname>recursive_worktable_factor</varname> (<type>floating point</type>)
+      <indexterm>
+       <primary><varname>recursive_worktable_factor</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Sets the planner's estimate of the average size of the working
+        table of a <link linkend="queries-with-recursive">recursive
+        query</link>, as a multiple of the estimated size of the initial
+        non-recursive term of the query.  This helps the planner choose
+        the most appropriate method for joining the working table to the
+        query's other tables.
+        The default value is <literal>10.0</literal>.  A smaller value
+        such as <literal>1.0</literal> can be helpful when the recursion
+        has low <quote>fan-out</quote> from one step to the next, as for
+        example in shortest-path queries.  Graph analytics queries may
+        benefit from larger-than-default values.
+       </para>
+      </listitem>
+     </varlistentry>
+
      </variablelist>
     </sect2>
    </sect1>
index 4d9f3b4bb6bb6d251a221b4e654c6921bd62dac0..1b07ea392d909e9dfed8f6ba067c695186a32a8d 100644 (file)
@@ -123,6 +123,7 @@ double              cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;
 double         cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;
 double         parallel_tuple_cost = DEFAULT_PARALLEL_TUPLE_COST;
 double         parallel_setup_cost = DEFAULT_PARALLEL_SETUP_COST;
+double         recursive_worktable_factor = DEFAULT_RECURSIVE_WORKTABLE_FACTOR;
 
 int                    effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
 
@@ -5665,10 +5666,11 @@ set_cte_size_estimates(PlannerInfo *root, RelOptInfo *rel, double cte_rows)
        if (rte->self_reference)
        {
                /*
-                * In a self-reference, arbitrarily assume the average worktable size
-                * is about 10 times the nonrecursive term's size.
+                * In a self-reference, we assume the average worktable size is a
+                * multiple of the nonrecursive term's size.  The best multiplier will
+                * vary depending on query "fan-out", so make its value adjustable.
                 */
-               rel->tuples = 10 * cte_rows;
+               rel->tuples = clamp_row_est(recursive_worktable_factor * cte_rows);
        }
        else
        {
index f70f7f5c01e0a5a96a012b80aa955a8ecd65ef46..b86137dc38536a19521b1b326c1a8543f088f23f 100644 (file)
@@ -3740,6 +3740,18 @@ static struct config_real ConfigureNamesReal[] =
                NULL, NULL, NULL
        },
 
+       {
+               {"recursive_worktable_factor", PGC_USERSET, QUERY_TUNING_OTHER,
+                       gettext_noop("Sets the planner's estimate of the average size "
+                                                "of a recursive query's working table."),
+                       NULL,
+                       GUC_EXPLAIN
+               },
+               &recursive_worktable_factor,
+               DEFAULT_RECURSIVE_WORKTABLE_FACTOR, 0.001, 1000000.0,
+               NULL, NULL, NULL
+       },
+
        {
                {"geqo_selection_bias", PGC_USERSET, QUERY_TUNING_GEQO,
                        gettext_noop("GEQO: selective pressure within the population."),
index 4cf5b26a3638b1454ea5eec65d7769ab33c2568a..b933fade8c66fe0b10d0942d5ef5acb4117a5fcb 100644 (file)
                                        # JOIN clauses
 #plan_cache_mode = auto                        # auto, force_generic_plan or
                                        # force_custom_plan
+#recursive_worktable_factor = 10.0     # range 0.001-1000000
 
 
 #------------------------------------------------------------------------------
index 356a51f370aa7b80cc330af5a947cf9358fa6c25..bc12071af6ea3154211cc1f7f52f628166094e97 100644 (file)
@@ -29,6 +29,8 @@
 #define DEFAULT_PARALLEL_TUPLE_COST 0.1
 #define DEFAULT_PARALLEL_SETUP_COST  1000.0
 
+/* defaults for non-Cost parameters */
+#define DEFAULT_RECURSIVE_WORKTABLE_FACTOR  10.0
 #define DEFAULT_EFFECTIVE_CACHE_SIZE  524288   /* measured in pages */
 
 typedef enum
index 6b8ee0c69fa72c65817e9fcfe8e7c7b05da18d37..2302ab6d546fc2cd3c769587a22a9feed8480770 100644 (file)
@@ -91,6 +91,7 @@ extern PGDLLIMPORT double cpu_index_tuple_cost;
 extern PGDLLIMPORT double cpu_operator_cost;
 extern PGDLLIMPORT double parallel_tuple_cost;
 extern PGDLLIMPORT double parallel_setup_cost;
+extern PGDLLIMPORT double recursive_worktable_factor;
 extern PGDLLIMPORT int effective_cache_size;
 
 extern double clamp_row_est(double nrows);