Invent recursive_worktable_factor GUC to replace hard-wired constant.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 24 Mar 2022 15:47:41 +0000 (11:47 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 24 Mar 2022 15:47:41 +0000 (11:47 -0400)
Up to now, the planner estimated the size of a recursive query's
worktable as 10 times the size of the non-recursive term.  It's hard
to see how to do significantly better than that automatically, but
we can give users control over the multiplier to allow tuning for
specific use-cases.  The default behavior remains the same.

Simon Riggs

Discussion: https://postgr.es/m/CANbhV-EuaLm4H3g0+BSTYHEGxJj3Kht0R+rJ8vT57Dejnh=_nA@mail.gmail.com

doc/src/sgml/config.sgml
src/backend/optimizer/path/costsize.c
src/backend/utils/misc/guc.c
src/backend/utils/misc/postgresql.conf.sample
src/include/optimizer/cost.h
src/include/optimizer/optimizer.h

index 7a48973b3c86a186a5f66086ff755200017ac58c..05df48131d7e66f4039edd9d90a930b59b73a294 100644 (file)
@@ -5919,6 +5919,29 @@ SELECT * FROM parent WHERE key = 2400;
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-recursive-worktable-factor" xreflabel="recursive_worktable_factor">
+      <term><varname>recursive_worktable_factor</varname> (<type>floating point</type>)
+      <indexterm>
+       <primary><varname>recursive_worktable_factor</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Sets the planner's estimate of the average size of the working
+        table of a <link linkend="queries-with-recursive">recursive
+        query</link>, as a multiple of the estimated size of the initial
+        non-recursive term of the query.  This helps the planner choose
+        the most appropriate method for joining the working table to the
+        query's other tables.
+        The default value is <literal>10.0</literal>.  A smaller value
+        such as <literal>1.0</literal> can be helpful when the recursion
+        has low <quote>fan-out</quote> from one step to the next, as for
+        example in shortest-path queries.  Graph analytics queries may
+        benefit from larger-than-default values.
+       </para>
+      </listitem>
+     </varlistentry>
+
      </variablelist>
     </sect2>
    </sect1>
index 4d9f3b4bb6bb6d251a221b4e654c6921bd62dac0..1b07ea392d909e9dfed8f6ba067c695186a32a8d 100644 (file)
@@ -123,6 +123,7 @@ double      cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;
 double     cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;
 double     parallel_tuple_cost = DEFAULT_PARALLEL_TUPLE_COST;
 double     parallel_setup_cost = DEFAULT_PARALLEL_SETUP_COST;
+double     recursive_worktable_factor = DEFAULT_RECURSIVE_WORKTABLE_FACTOR;
 
 int            effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
 
@@ -5665,10 +5666,11 @@ set_cte_size_estimates(PlannerInfo *root, RelOptInfo *rel, double cte_rows)
    if (rte->self_reference)
    {
        /*
-        * In a self-reference, arbitrarily assume the average worktable size
-        * is about 10 times the nonrecursive term's size.
+        * In a self-reference, we assume the average worktable size is a
+        * multiple of the nonrecursive term's size.  The best multiplier will
+        * vary depending on query "fan-out", so make its value adjustable.
         */
-       rel->tuples = 10 * cte_rows;
+       rel->tuples = clamp_row_est(recursive_worktable_factor * cte_rows);
    }
    else
    {
index f70f7f5c01e0a5a96a012b80aa955a8ecd65ef46..b86137dc38536a19521b1b326c1a8543f088f23f 100644 (file)
@@ -3740,6 +3740,18 @@ static struct config_real ConfigureNamesReal[] =
        NULL, NULL, NULL
    },
 
+   {
+       {"recursive_worktable_factor", PGC_USERSET, QUERY_TUNING_OTHER,
+           gettext_noop("Sets the planner's estimate of the average size "
+                        "of a recursive query's working table."),
+           NULL,
+           GUC_EXPLAIN
+       },
+       &recursive_worktable_factor,
+       DEFAULT_RECURSIVE_WORKTABLE_FACTOR, 0.001, 1000000.0,
+       NULL, NULL, NULL
+   },
+
    {
        {"geqo_selection_bias", PGC_USERSET, QUERY_TUNING_GEQO,
            gettext_noop("GEQO: selective pressure within the population."),
index 4cf5b26a3638b1454ea5eec65d7769ab33c2568a..b933fade8c66fe0b10d0942d5ef5acb4117a5fcb 100644 (file)
                    # JOIN clauses
 #plan_cache_mode = auto            # auto, force_generic_plan or
                    # force_custom_plan
+#recursive_worktable_factor = 10.0 # range 0.001-1000000
 
 
 #------------------------------------------------------------------------------
index 356a51f370aa7b80cc330af5a947cf9358fa6c25..bc12071af6ea3154211cc1f7f52f628166094e97 100644 (file)
@@ -29,6 +29,8 @@
 #define DEFAULT_PARALLEL_TUPLE_COST 0.1
 #define DEFAULT_PARALLEL_SETUP_COST  1000.0
 
+/* defaults for non-Cost parameters */
+#define DEFAULT_RECURSIVE_WORKTABLE_FACTOR  10.0
 #define DEFAULT_EFFECTIVE_CACHE_SIZE  524288   /* measured in pages */
 
 typedef enum
index 6b8ee0c69fa72c65817e9fcfe8e7c7b05da18d37..2302ab6d546fc2cd3c769587a22a9feed8480770 100644 (file)
@@ -91,6 +91,7 @@ extern PGDLLIMPORT double cpu_index_tuple_cost;
 extern PGDLLIMPORT double cpu_operator_cost;
 extern PGDLLIMPORT double parallel_tuple_cost;
 extern PGDLLIMPORT double parallel_setup_cost;
+extern PGDLLIMPORT double recursive_worktable_factor;
 extern PGDLLIMPORT int effective_cache_size;
 
 extern double clamp_row_est(double nrows);