Reordering DISTINCT keys to match input path's pathkeys

author Richard Guo <rguo@postgresql.org>

Tue, 26 Nov 2024 00:25:18 +0000 (09:25 +0900)

committer Richard Guo <rguo@postgresql.org>

Tue, 26 Nov 2024 00:25:18 +0000 (09:25 +0900)
author Richard Guo <rguo@postgresql.org>
Tue, 26 Nov 2024 00:25:18 +0000 (09:25 +0900)
committer Richard Guo <rguo@postgresql.org>
Tue, 26 Nov 2024 00:25:18 +0000 (09:25 +0900)
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml

index a84e60c09b95f5018390ba6d48ae6dd2d63cc7a0..76ab72db9640cad19b8e3fa3def763933d22361b 100644 (file)
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -5360,6 +5360,20 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
        </listitem>
       </varlistentry>
  
+     <varlistentry id="guc-enable-distinct-reordering" xreflabel="enable_distinct_reordering">
+      <term><varname>enable_distinct_reordering</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>enable_distinct_reordering</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Enables or disables the query planner's ability to reorder DISTINCT
+        keys to match the input path's pathkeys. The default is <literal>on</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
       <varlistentry id="guc-enable-gathermerge" xreflabel="enable_gathermerge">
        <term><varname>enable_gathermerge</varname> (<type>boolean</type>)
        <indexterm>
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c

index 7cf15e89b634735c5e1fc8d74658ea0303fa215f..03c2ac36e05f6e0507f99e76277157df6690992b 100644 (file)
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -2207,6 +2207,41 @@ pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
         return n;
  }
  
+/*
+ * pathkeys_useful_for_distinct
+ *             Count the number of pathkeys that are useful for DISTINCT or DISTINCT
+ *             ON clause.
+ *
+ * DISTINCT keys could be reordered to benefit from the given pathkey list.  As
+ * with pathkeys_useful_for_grouping, we return the number of leading keys in
+ * the list that are shared by the distinctClause pathkeys.
+ */
+static int
+pathkeys_useful_for_distinct(PlannerInfo *root, List *pathkeys)
+{
+       int                     n_common_pathkeys;
+
+       /*
+        * distinct_pathkeys may have become empty if all of the pathkeys were
+        * determined to be redundant.  Return 0 in this case.
+        */
+       if (root->distinct_pathkeys == NIL)
+               return 0;
+
+       /* walk the pathkeys and search for matching DISTINCT key */
+       n_common_pathkeys = 0;
+       foreach_node(PathKey, pathkey, pathkeys)
+       {
+               /* no matching DISTINCT key, we're done */
+               if (!list_member_ptr(root->distinct_pathkeys, pathkey))
+                       break;
+
+               n_common_pathkeys++;
+       }
+
+       return n_common_pathkeys;
+}
+
  /*
   * pathkeys_useful_for_setop
   *             Count the number of leading common pathkeys root's 'setop_pathkeys' in
@@ -2240,6 +2275,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
         if (nuseful2 > nuseful)
                 nuseful = nuseful2;
         nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
+       if (nuseful2 > nuseful)
+               nuseful = nuseful2;
+       nuseful2 = pathkeys_useful_for_distinct(root, pathkeys);
         if (nuseful2 > nuseful)
                 nuseful = nuseful2;
         nuseful2 = pathkeys_useful_for_setop(root, pathkeys);
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index 1f78dc3d530fb645660cce1af9bdd07d95c653df..b665a7762ec9b7eb94c2aaa87602933dfb1f8d82 100644 (file)
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -67,6 +67,7 @@
  double         cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION;
  int                    debug_parallel_query = DEBUG_PARALLEL_OFF;
  bool           parallel_leader_participation = true;
+bool           enable_distinct_reordering = true;
  
  /* Hook for plugins to get control in planner() */
  planner_hook_type planner_hook = NULL;
@@ -198,6 +199,9 @@ static void create_partial_distinct_paths(PlannerInfo *root,
  static RelOptInfo *create_final_distinct_paths(PlannerInfo *root,
                                                                                            RelOptInfo *input_rel,
                                                                                            RelOptInfo *distinct_rel);
+static List *get_useful_pathkeys_for_distinct(PlannerInfo *root,
+                                                                                         List *needed_pathkeys,
+                                                                                         List *path_pathkeys);
  static RelOptInfo *create_ordered_paths(PlannerInfo *root,
                                                                                 RelOptInfo *input_rel,
                                                                                 PathTarget *target,
@@ -235,6 +239,12 @@ static RelOptInfo *create_partial_grouping_paths(PlannerInfo *root,
                                                                                                  grouping_sets_data *gd,
                                                                                                  GroupPathExtraData *extra,
                                                                                                  bool force_rel_creation);
+static Path *make_ordered_path(PlannerInfo *root,
+                                                          RelOptInfo *rel,
+                                                          Path *path,
+                                                          Path *cheapest_path,
+                                                          List *pathkeys,
+                                                          double limit_tuples);
  static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel);
  static bool can_partial_agg(PlannerInfo *root);
  static void apply_scanjoin_target_to_paths(PlannerInfo *root,
@@ -4938,7 +4948,10 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
  
         /*
          * Try sorting the cheapest path and incrementally sorting any paths with
-        * presorted keys and put a unique paths atop of those.
+        * presorted keys and put a unique paths atop of those.  We'll also
+        * attempt to reorder the required pathkeys to match the input path's
+        * pathkeys as much as possible, in hopes of avoiding a possible need to
+        * re-sort.
          */
         if (grouping_is_sortable(root->processed_distinctClause))
         {
@@ -4946,86 +4959,67 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
                 {
                         Path       *input_path = (Path *) lfirst(lc);
                         Path       *sorted_path;
-                       bool            is_sorted;
-                       int                     presorted_keys;
+                       List       *useful_pathkeys_list = NIL;
  
-                       is_sorted = pathkeys_count_contained_in(root->distinct_pathkeys,
-                                                                                                       input_path->pathkeys,
-                                                                                                       &presorted_keys);
+                       useful_pathkeys_list =
+                               get_useful_pathkeys_for_distinct(root,
+                                                                                                root->distinct_pathkeys,
+                                                                                                input_path->pathkeys);
+                       Assert(list_length(useful_pathkeys_list) > 0);
  
-                       if (is_sorted)
-                               sorted_path = input_path;
-                       else
+                       foreach_node(List, useful_pathkeys, useful_pathkeys_list)
                         {
-                               /*
-                                * Try at least sorting the cheapest path and also try
-                                * incrementally sorting any path which is partially sorted
-                                * already (no need to deal with paths which have presorted
-                                * keys when incremental sort is disabled unless it's the
-                                * cheapest partial path).
-                                */
-                               if (input_path != cheapest_partial_path &&
-                                       (presorted_keys == 0 || !enable_incremental_sort))
+                               sorted_path = make_ordered_path(root,
+                                                                                               partial_distinct_rel,
+                                                                                               input_path,
+                                                                                               cheapest_partial_path,
+                                                                                               useful_pathkeys,
+                                                                                               -1.0);
+
+                               if (sorted_path == NULL)
                                         continue;
  
                                 /*
-                                * We've no need to consider both a sort and incremental sort.
-                                * We'll just do a sort if there are no presorted keys and an
-                                * incremental sort when there are presorted keys.
+                                * An empty distinct_pathkeys means all tuples have the same
+                                * value for the DISTINCT clause.  See
+                                * create_final_distinct_paths()
                                  */
-                               if (presorted_keys == 0 || !enable_incremental_sort)
-                                       sorted_path = (Path *) create_sort_path(root,
-                                                                                                                       partial_distinct_rel,
-                                                                                                                       input_path,
-                                                                                                                       root->distinct_pathkeys,
-                                                                                                                       -1.0);
-                               else
-                                       sorted_path = (Path *) create_incremental_sort_path(root,
-                                                                                                                                               partial_distinct_rel,
-                                                                                                                                               input_path,
-                                                                                                                                               root->distinct_pathkeys,
-                                                                                                                                               presorted_keys,
-                                                                                                                                               -1.0);
-                       }
-
-                       /*
-                        * An empty distinct_pathkeys means all tuples have the same value
-                        * for the DISTINCT clause.  See create_final_distinct_paths()
-                        */
-                       if (root->distinct_pathkeys == NIL)
-                       {
-                               Node       *limitCount;
+                               if (root->distinct_pathkeys == NIL)
+                               {
+                                       Node       *limitCount;
  
-                               limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
-                                                                                               sizeof(int64),
-                                                                                               Int64GetDatum(1), false,
-                                                                                               FLOAT8PASSBYVAL);
+                                       limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
+                                                                                                       sizeof(int64),
+                                                                                                       Int64GetDatum(1), false,
+                                                                                                       FLOAT8PASSBYVAL);
  
-                               /*
-                                * Apply a LimitPath onto the partial path to restrict the
-                                * tuples from each worker to 1.  create_final_distinct_paths
-                                * will need to apply an additional LimitPath to restrict this
-                                * to a single row after the Gather node.  If the query
-                                * already has a LIMIT clause, then we could end up with three
-                                * Limit nodes in the final plan.  Consolidating the top two
-                                * of these could be done, but does not seem worth troubling
-                                * over.
-                                */
-                               add_partial_path(partial_distinct_rel, (Path *)
-                                                                create_limit_path(root, partial_distinct_rel,
-                                                                                                  sorted_path,
-                                                                                                  NULL,
-                                                                                                  limitCount,
-                                                                                                  LIMIT_OPTION_COUNT,
-                                                                                                  0, 1));
-                       }
-                       else
-                       {
-                               add_partial_path(partial_distinct_rel, (Path *)
-                                                                create_upper_unique_path(root, partial_distinct_rel,
-                                                                                                                 sorted_path,
-                                                                                                                 list_length(root->distinct_pathkeys),
-                                                                                                                 numDistinctRows));
+                                       /*
+                                        * Apply a LimitPath onto the partial path to restrict the
+                                        * tuples from each worker to 1.
+                                        * create_final_distinct_paths will need to apply an
+                                        * additional LimitPath to restrict this to a single row
+                                        * after the Gather node.  If the query already has a
+                                        * LIMIT clause, then we could end up with three Limit
+                                        * nodes in the final plan.  Consolidating the top two of
+                                        * these could be done, but does not seem worth troubling
+                                        * over.
+                                        */
+                                       add_partial_path(partial_distinct_rel, (Path *)
+                                                                        create_limit_path(root, partial_distinct_rel,
+                                                                                                          sorted_path,
+                                                                                                          NULL,
+                                                                                                          limitCount,
+                                                                                                          LIMIT_OPTION_COUNT,
+                                                                                                          0, 1));
+                               }
+                               else
+                               {
+                                       add_partial_path(partial_distinct_rel, (Path *)
+                                                                        create_upper_unique_path(root, partial_distinct_rel,
+                                                                                                                         sorted_path,
+                                                                                                                         list_length(root->distinct_pathkeys),
+                                                                                                                         numDistinctRows));
+                               }
                         }
                 }
         }
@@ -5134,7 +5128,9 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
                  * Unique node on those.  We also, consider doing an explicit sort of
                  * the cheapest input path and Unique'ing that.  If any paths have
                  * presorted keys then we'll create an incremental sort atop of those
-                * before adding a unique node on the top.
+                * before adding a unique node on the top.  We'll also attempt to
+                * reorder the required pathkeys to match the input path's pathkeys as
+                * much as possible, in hopes of avoiding a possible need to re-sort.
                  *
                  * When we have DISTINCT ON, we must sort by the more rigorous of
                  * DISTINCT and ORDER BY, else it won't have the desired behavior.
@@ -5158,86 +5154,66 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
                 {
                         Path       *input_path = (Path *) lfirst(lc);
                         Path       *sorted_path;
-                       bool            is_sorted;
-                       int                     presorted_keys;
+                       List       *useful_pathkeys_list = NIL;
  
-                       is_sorted = pathkeys_count_contained_in(needed_pathkeys,
-                                                                                                       input_path->pathkeys,
-                                                                                                       &presorted_keys);
+                       useful_pathkeys_list =
+                               get_useful_pathkeys_for_distinct(root,
+                                                                                                needed_pathkeys,
+                                                                                                input_path->pathkeys);
+                       Assert(list_length(useful_pathkeys_list) > 0);
  
-                       if (is_sorted)
-                               sorted_path = input_path;
-                       else
+                       foreach_node(List, useful_pathkeys, useful_pathkeys_list)
                         {
-                               /*
-                                * Try at least sorting the cheapest path and also try
-                                * incrementally sorting any path which is partially sorted
-                                * already (no need to deal with paths which have presorted
-                                * keys when incremental sort is disabled unless it's the
-                                * cheapest input path).
-                                */
-                               if (input_path != cheapest_input_path &&
-                                       (presorted_keys == 0 || !enable_incremental_sort))
+                               sorted_path = make_ordered_path(root,
+                                                                                               distinct_rel,
+                                                                                               input_path,
+                                                                                               cheapest_input_path,
+                                                                                               useful_pathkeys,
+                                                                                               limittuples);
+
+                               if (sorted_path == NULL)
                                         continue;
  
                                 /*
-                                * We've no need to consider both a sort and incremental sort.
-                                * We'll just do a sort if there are no presorted keys and an
-                                * incremental sort when there are presorted keys.
+                                * distinct_pathkeys may have become empty if all of the
+                                * pathkeys were determined to be redundant.  If all of the
+                                * pathkeys are redundant then each DISTINCT target must only
+                                * allow a single value, therefore all resulting tuples must
+                                * be identical (or at least indistinguishable by an equality
+                                * check).  We can uniquify these tuples simply by just taking
+                                * the first tuple.  All we do here is add a path to do "LIMIT
+                                * 1" atop of 'sorted_path'.  When doing a DISTINCT ON we may
+                                * still have a non-NIL sort_pathkeys list, so we must still
+                                * only do this with paths which are correctly sorted by
+                                * sort_pathkeys.
                                  */
-                               if (presorted_keys == 0 || !enable_incremental_sort)
-                                       sorted_path = (Path *) create_sort_path(root,
-                                                                                                                       distinct_rel,
-                                                                                                                       input_path,
-                                                                                                                       needed_pathkeys,
-                                                                                                                       limittuples);
-                               else
-                                       sorted_path = (Path *) create_incremental_sort_path(root,
-                                                                                                                                               distinct_rel,
-                                                                                                                                               input_path,
-                                                                                                                                               needed_pathkeys,
-                                                                                                                                               presorted_keys,
-                                                                                                                                               limittuples);
-                       }
-
-                       /*
-                        * distinct_pathkeys may have become empty if all of the pathkeys
-                        * were determined to be redundant.  If all of the pathkeys are
-                        * redundant then each DISTINCT target must only allow a single
-                        * value, therefore all resulting tuples must be identical (or at
-                        * least indistinguishable by an equality check).  We can uniquify
-                        * these tuples simply by just taking the first tuple.  All we do
-                        * here is add a path to do "LIMIT 1" atop of 'sorted_path'.  When
-                        * doing a DISTINCT ON we may still have a non-NIL sort_pathkeys
-                        * list, so we must still only do this with paths which are
-                        * correctly sorted by sort_pathkeys.
-                        */
-                       if (root->distinct_pathkeys == NIL)
-                       {
-                               Node       *limitCount;
+                               if (root->distinct_pathkeys == NIL)
+                               {
+                                       Node       *limitCount;
  
-                               limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
-                                                                                               sizeof(int64),
-                                                                                               Int64GetDatum(1), false,
-                                                                                               FLOAT8PASSBYVAL);
+                                       limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
+                                                                                                       sizeof(int64),
+                                                                                                       Int64GetDatum(1), false,
+                                                                                                       FLOAT8PASSBYVAL);
  
-                               /*
-                                * If the query already has a LIMIT clause, then we could end
-                                * up with a duplicate LimitPath in the final plan. That does
-                                * not seem worth troubling over too much.
-                                */
-                               add_path(distinct_rel, (Path *)
-                                                create_limit_path(root, distinct_rel, sorted_path,
-                                                                                  NULL, limitCount,
-                                                                                  LIMIT_OPTION_COUNT, 0, 1));
-                       }
-                       else
-                       {
-                               add_path(distinct_rel, (Path *)
-                                                create_upper_unique_path(root, distinct_rel,
-                                                                                                 sorted_path,
-                                                                                                 list_length(root->distinct_pathkeys),
-                                                                                                 numDistinctRows));
+                                       /*
+                                        * If the query already has a LIMIT clause, then we could
+                                        * end up with a duplicate LimitPath in the final plan.
+                                        * That does not seem worth troubling over too much.
+                                        */
+                                       add_path(distinct_rel, (Path *)
+                                                        create_limit_path(root, distinct_rel, sorted_path,
+                                                                                          NULL, limitCount,
+                                                                                          LIMIT_OPTION_COUNT, 0, 1));
+                               }
+                               else
+                               {
+                                       add_path(distinct_rel, (Path *)
+                                                        create_upper_unique_path(root, distinct_rel,
+                                                                                                         sorted_path,
+                                                                                                         list_length(root->distinct_pathkeys),
+                                                                                                         numDistinctRows));
+                               }
                         }
                 }
         }
@@ -5280,6 +5256,82 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
         return distinct_rel;
  }
  
+/*
+ * get_useful_pathkeys_for_distinct
+ *       Get useful orderings of pathkeys for distinctClause by reordering
+ *       'needed_pathkeys' to match the given 'path_pathkeys' as much as possible.
+ *
+ * This returns a list of pathkeys that can be useful for DISTINCT or DISTINCT
+ * ON clause.  For convenience, it always includes the given 'needed_pathkeys'.
+ */
+static List *
+get_useful_pathkeys_for_distinct(PlannerInfo *root, List *needed_pathkeys,
+                                                                List *path_pathkeys)
+{
+       List       *useful_pathkeys_list = NIL;
+       List       *useful_pathkeys = NIL;
+
+       /* always include the given 'needed_pathkeys' */
+       useful_pathkeys_list = lappend(useful_pathkeys_list,
+                                                                  needed_pathkeys);
+
+       if (!enable_distinct_reordering)
+               return useful_pathkeys_list;
+
+       /*
+        * Scan the given 'path_pathkeys' and construct a list of PathKey nodes
+        * that match 'needed_pathkeys', but only up to the longest matching
+        * prefix.
+        *
+        * When we have DISTINCT ON, we must ensure that the resulting pathkey
+        * list matches initial distinctClause pathkeys; otherwise, it won't have
+        * the desired behavior.
+        */
+       foreach_node(PathKey, pathkey, path_pathkeys)
+       {
+               /*
+                * The PathKey nodes are canonical, so they can be checked for
+                * equality by simple pointer comparison.
+                */
+               if (!list_member_ptr(needed_pathkeys, pathkey))
+                       break;
+               if (root->parse->hasDistinctOn &&
+                       !list_member_ptr(root->distinct_pathkeys, pathkey))
+                       break;
+
+               useful_pathkeys = lappend(useful_pathkeys, pathkey);
+       }
+
+       /* If no match at all, no point in reordering needed_pathkeys */
+       if (useful_pathkeys == NIL)
+               return useful_pathkeys_list;
+
+       /*
+        * If not full match, the resulting pathkey list is not useful without
+        * incremental sort.
+        */
+       if (list_length(useful_pathkeys) < list_length(needed_pathkeys) &&
+               !enable_incremental_sort)
+               return useful_pathkeys_list;
+
+       /* Append the remaining PathKey nodes in needed_pathkeys */
+       useful_pathkeys = list_concat_unique_ptr(useful_pathkeys,
+                                                                                        needed_pathkeys);
+
+       /*
+        * If the resulting pathkey list is the same as the 'needed_pathkeys',
+        * just drop it.
+        */
+       if (compare_pathkeys(needed_pathkeys,
+                                                useful_pathkeys) == PATHKEYS_EQUAL)
+               return useful_pathkeys_list;
+
+       useful_pathkeys_list = lappend(useful_pathkeys_list,
+                                                                  useful_pathkeys);
+
+       return useful_pathkeys_list;
+}
+
  /*
   * create_ordered_paths
   *
@@ -7008,58 +7060,6 @@ done:
         return parallel_workers;
  }
  
-/*
- * make_ordered_path
- *             Return a path ordered by 'pathkeys' based on the given 'path'.  May
- *             return NULL if it doesn't make sense to generate an ordered path in
- *             this case.
- */
-static Path *
-make_ordered_path(PlannerInfo *root, RelOptInfo *rel, Path *path,
-                                 Path *cheapest_path, List *pathkeys)
-{
-       bool            is_sorted;
-       int                     presorted_keys;
-
-       is_sorted = pathkeys_count_contained_in(pathkeys,
-                                                                                       path->pathkeys,
-                                                                                       &presorted_keys);
-
-       if (!is_sorted)
-       {
-               /*
-                * Try at least sorting the cheapest path and also try incrementally
-                * sorting any path which is partially sorted already (no need to deal
-                * with paths which have presorted keys when incremental sort is
-                * disabled unless it's the cheapest input path).
-                */
-               if (path != cheapest_path &&
-                       (presorted_keys == 0 || !enable_incremental_sort))
-                       return NULL;
-
-               /*
-                * We've no need to consider both a sort and incremental sort. We'll
-                * just do a sort if there are no presorted keys and an incremental
-                * sort when there are presorted keys.
-                */
-               if (presorted_keys == 0 || !enable_incremental_sort)
-                       path = (Path *) create_sort_path(root,
-                                                                                        rel,
-                                                                                        path,
-                                                                                        pathkeys,
-                                                                                        -1.0);
-               else
-                       path = (Path *) create_incremental_sort_path(root,
-                                                                                                                rel,
-                                                                                                                path,
-                                                                                                                pathkeys,
-                                                                                                                presorted_keys,
-                                                                                                                -1.0);
-       }
-
-       return path;
-}
-
  /*
   * add_paths_to_grouping_rel
   *
@@ -7111,7 +7111,8 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
                                                                                  grouped_rel,
                                                                                  path,
                                                                                  cheapest_path,
-                                                                                info->pathkeys);
+                                                                                info->pathkeys,
+                                                                                -1.0);
                                 if (path == NULL)
                                         continue;
  
@@ -7192,7 +7193,8 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
                                                                                          grouped_rel,
                                                                                          path,
                                                                                          partially_grouped_rel->cheapest_total_path,
-                                                                                        info->pathkeys);
+                                                                                        info->pathkeys,
+                                                                                        -1.0);
  
                                         if (path == NULL)
                                                 continue;
@@ -7443,7 +7445,8 @@ create_partial_grouping_paths(PlannerInfo *root,
                                                                                  partially_grouped_rel,
                                                                                  path,
                                                                                  cheapest_total_path,
-                                                                                info->pathkeys);
+                                                                                info->pathkeys,
+                                                                                -1.0);
  
                                 if (path == NULL)
                                         continue;
@@ -7500,7 +7503,8 @@ create_partial_grouping_paths(PlannerInfo *root,
                                                                                  partially_grouped_rel,
                                                                                  path,
                                                                                  cheapest_partial_path,
-                                                                                info->pathkeys);
+                                                                                info->pathkeys,
+                                                                                -1.0);
  
                                 if (path == NULL)
                                         continue;
@@ -7586,6 +7590,58 @@ create_partial_grouping_paths(PlannerInfo *root,
         return partially_grouped_rel;
  }
  
+/*
+ * make_ordered_path
+ *             Return a path ordered by 'pathkeys' based on the given 'path'.  May
+ *             return NULL if it doesn't make sense to generate an ordered path in
+ *             this case.
+ */
+static Path *
+make_ordered_path(PlannerInfo *root, RelOptInfo *rel, Path *path,
+                                 Path *cheapest_path, List *pathkeys, double limit_tuples)
+{
+       bool            is_sorted;
+       int                     presorted_keys;
+
+       is_sorted = pathkeys_count_contained_in(pathkeys,
+                                                                                       path->pathkeys,
+                                                                                       &presorted_keys);
+
+       if (!is_sorted)
+       {
+               /*
+                * Try at least sorting the cheapest path and also try incrementally
+                * sorting any path which is partially sorted already (no need to deal
+                * with paths which have presorted keys when incremental sort is
+                * disabled unless it's the cheapest input path).
+                */
+               if (path != cheapest_path &&
+                       (presorted_keys == 0 || !enable_incremental_sort))
+                       return NULL;
+
+               /*
+                * We've no need to consider both a sort and incremental sort. We'll
+                * just do a sort if there are no presorted keys and an incremental
+                * sort when there are presorted keys.
+                */
+               if (presorted_keys == 0 || !enable_incremental_sort)
+                       path = (Path *) create_sort_path(root,
+                                                                                        rel,
+                                                                                        path,
+                                                                                        pathkeys,
+                                                                                        limit_tuples);
+               else
+                       path = (Path *) create_incremental_sort_path(root,
+                                                                                                                rel,
+                                                                                                                path,
+                                                                                                                pathkeys,
+                                                                                                                presorted_keys,
+                                                                                                                limit_tuples);
+       }
+
+       return path;
+}
+
  /*
   * Generate Gather and Gather Merge paths for a grouping relation or partial
   * grouping relation.
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c

index 8a67f01200cad1d2dadf1c34714538ffe7bc462e..9845abd6932215c13832041a34bacbedf03f0d28 100644 (file)
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -996,6 +996,16 @@ struct config_bool ConfigureNamesBool[] =
                 true,
                 NULL, NULL, NULL
         },
+       {
+               {"enable_distinct_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
+                       gettext_noop("Enables reordering of DISTINCT pathkeys."),
+                       NULL,
+                       GUC_EXPLAIN
+               },
+               &enable_distinct_reordering,
+               true,
+               NULL, NULL, NULL
+       },
         {
                 {"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
                         gettext_noop("Enables genetic query optimization."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample

index 39a3ac231274bbe24e3fa063a25707903fe51827..407cd1e08ca3e705541f77bf1cb0556042fc4c90 100644 (file)
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -414,6 +414,7 @@
  #enable_sort = on
  #enable_tidscan = on
  #enable_group_by_reordering = on
+#enable_distinct_reordering = on
  
  # - Planner Cost Constants -
  
diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h

index 93e3dc719dab23ace751a7a2ce5b3d72d69ad2dc..2e123e08b791360ebd6f4792e619aeaf6cf69a8a 100644 (file)
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -111,6 +111,7 @@ typedef enum
  /* GUC parameters */
  extern PGDLLIMPORT int debug_parallel_query;
  extern PGDLLIMPORT bool parallel_leader_participation;
+extern PGDLLIMPORT bool enable_distinct_reordering;
  
  extern struct PlannedStmt *planner(Query *parse, const char *query_string,
                                                                    int cursorOptions,
diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out

index 82b8e54f5f17015d2cb9051c4ff25372426a1982..379ba0bc9fa5af34d8c8f92d6350a1daad576a4e 100644 (file)
--- a/src/test/regress/expected/select_distinct.out
+++ b/src/test/regress/expected/select_distinct.out
@@ -464,3 +464,135 @@ SELECT null IS NOT DISTINCT FROM null as "yes";
   t
  (1 row)
  
+--
+-- Test the planner's ability to reorder the distinctClause Pathkeys to match
+-- the input path's ordering
+--
+CREATE TABLE distinct_tbl (x int, y int);
+INSERT INTO distinct_tbl SELECT i%10, i%10 FROM generate_series(1, 1000) AS i;
+CREATE INDEX distinct_tbl_x_y_idx ON distinct_tbl (x, y);
+ANALYZE distinct_tbl;
+-- Produce results with sorting.
+SET enable_hashagg TO OFF;
+-- Ensure we avoid the need to re-sort by reordering the distinctClause
+-- Pathkeys to match the ordering of the input path
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT y, x FROM distinct_tbl;
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Unique
+   ->  Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl
+(2 rows)
+
+SELECT DISTINCT y, x FROM distinct_tbl;
+ y | x 
+---+---
+ 0 | 0
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(10 rows)
+
+-- Ensure we leverage incremental-sort by reordering the distinctClause
+-- Pathkeys to partially match the ordering of the input path
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT y, x FROM (SELECT * FROM distinct_tbl ORDER BY x) s;
+                                  QUERY PLAN                                  
+------------------------------------------------------------------------------
+ Unique
+   ->  Incremental Sort
+         Sort Key: s.x, s.y
+         Presorted Key: s.x
+         ->  Subquery Scan on s
+               ->  Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl
+(6 rows)
+
+SELECT DISTINCT y, x FROM (SELECT * FROM distinct_tbl ORDER BY x) s;
+ y | x 
+---+---
+ 0 | 0
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(10 rows)
+
+-- Ensure we avoid the need to re-sort in partial distinct by reordering the
+-- distinctClause Pathkeys to match the ordering of the input path
+SET parallel_tuple_cost=0;
+SET parallel_setup_cost=0;
+SET min_parallel_table_scan_size=0;
+SET min_parallel_index_scan_size=0;
+SET max_parallel_workers_per_gather=2;
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT y, x FROM distinct_tbl limit 10;
+                                         QUERY PLAN                                          
+---------------------------------------------------------------------------------------------
+ Limit
+   ->  Unique
+         ->  Gather Merge
+               Workers Planned: 1
+               ->  Unique
+                     ->  Parallel Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl
+(6 rows)
+
+SELECT DISTINCT y, x FROM distinct_tbl limit 10;
+ y | x 
+---+---
+ 0 | 0
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(10 rows)
+
+RESET max_parallel_workers_per_gather;
+RESET min_parallel_index_scan_size;
+RESET min_parallel_table_scan_size;
+RESET parallel_setup_cost;
+RESET parallel_tuple_cost;
+-- Ensure we reorder the distinctClause Pathkeys to match the ordering of the
+-- input path even if there is ORDER BY clause
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT y, x FROM distinct_tbl ORDER BY y;
+                               QUERY PLAN                               
+------------------------------------------------------------------------
+ Sort
+   Sort Key: y
+   ->  Unique
+         ->  Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl
+(4 rows)
+
+SELECT DISTINCT y, x FROM distinct_tbl ORDER BY y;
+ y | x 
+---+---
+ 0 | 0
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(10 rows)
+
+RESET enable_hashagg;
+DROP TABLE distinct_tbl;
diff --git a/src/test/regress/expected/select_distinct_on.out b/src/test/regress/expected/select_distinct_on.out

index 958381afe5ebaabb4e039b3a79f678b686969419..69e0cf3959c0e02b3d1f457f418ef3d43477e794 100644 (file)
--- a/src/test/regress/expected/select_distinct_on.out
+++ b/src/test/regress/expected/select_distinct_on.out
@@ -121,3 +121,128 @@ SELECT DISTINCT ON (four) four,hundred
           Filter: (four = 0)
  (3 rows)
  
+--
+-- Test the planner's ability to reorder the distinctClause Pathkeys to match
+-- the input path's ordering
+--
+CREATE TABLE distinct_tbl (x int, y int, z int);
+INSERT INTO distinct_tbl SELECT i%10, i%10, i%10 FROM generate_series(1, 1000) AS i;
+CREATE INDEX distinct_tbl_x_y_idx ON distinct_tbl (x, y);
+ANALYZE distinct_tbl;
+-- Produce results with sorting.
+SET enable_hashagg TO OFF;
+-- Ensure we avoid the need to re-sort by reordering the distinctClause
+-- Pathkeys to match the ordering of the input path
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl;
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Unique
+   ->  Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl
+(2 rows)
+
+SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl;
+ x | y 
+---+---
+ 0 | 0
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(10 rows)
+
+-- Ensure we leverage incremental-sort by reordering the distinctClause
+-- Pathkeys to partially match the ordering of the input path
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (y, x) x, y FROM (SELECT * FROM distinct_tbl ORDER BY x) s;
+                                  QUERY PLAN                                  
+------------------------------------------------------------------------------
+ Unique
+   ->  Incremental Sort
+         Sort Key: s.x, s.y
+         Presorted Key: s.x
+         ->  Subquery Scan on s
+               ->  Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl
+(6 rows)
+
+SELECT DISTINCT ON (y, x) x, y FROM (SELECT * FROM distinct_tbl ORDER BY x) s;
+ x | y 
+---+---
+ 0 | 0
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(10 rows)
+
+-- Ensure we reorder the distinctClause Pathkeys to match the ordering of the
+-- input path even if there is ORDER BY clause
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl ORDER BY y;
+                               QUERY PLAN                               
+------------------------------------------------------------------------
+ Sort
+   Sort Key: y
+   ->  Unique
+         ->  Index Only Scan using distinct_tbl_x_y_idx on distinct_tbl
+(4 rows)
+
+SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl ORDER BY y;
+ x | y 
+---+---
+ 0 | 0
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(10 rows)
+
+-- Ensure the resulting pathkey list matches the initial distinctClause Pathkeys
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (y, x) x, y FROM (select * from distinct_tbl order by x, z, y) s ORDER BY y, x, z;
+                                     QUERY PLAN                                     
+------------------------------------------------------------------------------------
+ Sort
+   Sort Key: s.y, s.x, s.z
+   ->  Unique
+         ->  Incremental Sort
+               Sort Key: s.x, s.y, s.z
+               Presorted Key: s.x
+               ->  Subquery Scan on s
+                     ->  Sort
+                           Sort Key: distinct_tbl.x, distinct_tbl.z, distinct_tbl.y
+                           ->  Seq Scan on distinct_tbl
+(10 rows)
+
+SELECT DISTINCT ON (y, x) x, y FROM (select * from distinct_tbl order by x, z, y) s ORDER BY y, x, z;
+ x | y 
+---+---
+ 0 | 0
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+(10 rows)
+
+RESET enable_hashagg;
+DROP TABLE distinct_tbl;
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out

index fad7fc3a7e0a64b4807369715c0cfde845f38a24..91089ac215f34f599e4906e8fce36aa1043bbb9a 100644 (file)
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -150,6 +150,7 @@ select name, setting from pg_settings where name like 'enable%';
  --------------------------------+---------
   enable_async_append            | on
   enable_bitmapscan              | on
+ enable_distinct_reordering     | on
   enable_gathermerge             | on
   enable_group_by_reordering     | on
   enable_hashagg                 | on
@@ -170,7 +171,7 @@ select name, setting from pg_settings where name like 'enable%';
   enable_seqscan                 | on
   enable_sort                    | on
   enable_tidscan                 | on
-(22 rows)
+(23 rows)
  
  -- There are always wait event descriptions for various types.  InjectionPoint
  -- may be present or absent, depending on history since last postmaster start.
diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql

index da92c197aba5c9102ef57ec3f47045593526820c..50ac7dde396c6599d48b6e303a701d7fdf210a0b 100644 (file)
--- a/src/test/regress/sql/select_distinct.sql
+++ b/src/test/regress/sql/select_distinct.sql
@@ -221,3 +221,56 @@ SELECT 1 IS NOT DISTINCT FROM 2 as "no";
  SELECT 2 IS NOT DISTINCT FROM 2 as "yes";
  SELECT 2 IS NOT DISTINCT FROM null as "no";
  SELECT null IS NOT DISTINCT FROM null as "yes";
+
+--
+-- Test the planner's ability to reorder the distinctClause Pathkeys to match
+-- the input path's ordering
+--
+
+CREATE TABLE distinct_tbl (x int, y int);
+INSERT INTO distinct_tbl SELECT i%10, i%10 FROM generate_series(1, 1000) AS i;
+CREATE INDEX distinct_tbl_x_y_idx ON distinct_tbl (x, y);
+ANALYZE distinct_tbl;
+
+-- Produce results with sorting.
+SET enable_hashagg TO OFF;
+
+-- Ensure we avoid the need to re-sort by reordering the distinctClause
+-- Pathkeys to match the ordering of the input path
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT y, x FROM distinct_tbl;
+SELECT DISTINCT y, x FROM distinct_tbl;
+
+-- Ensure we leverage incremental-sort by reordering the distinctClause
+-- Pathkeys to partially match the ordering of the input path
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT y, x FROM (SELECT * FROM distinct_tbl ORDER BY x) s;
+SELECT DISTINCT y, x FROM (SELECT * FROM distinct_tbl ORDER BY x) s;
+
+-- Ensure we avoid the need to re-sort in partial distinct by reordering the
+-- distinctClause Pathkeys to match the ordering of the input path
+SET parallel_tuple_cost=0;
+SET parallel_setup_cost=0;
+SET min_parallel_table_scan_size=0;
+SET min_parallel_index_scan_size=0;
+SET max_parallel_workers_per_gather=2;
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT y, x FROM distinct_tbl limit 10;
+SELECT DISTINCT y, x FROM distinct_tbl limit 10;
+
+RESET max_parallel_workers_per_gather;
+RESET min_parallel_index_scan_size;
+RESET min_parallel_table_scan_size;
+RESET parallel_setup_cost;
+RESET parallel_tuple_cost;
+
+-- Ensure we reorder the distinctClause Pathkeys to match the ordering of the
+-- input path even if there is ORDER BY clause
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT y, x FROM distinct_tbl ORDER BY y;
+SELECT DISTINCT y, x FROM distinct_tbl ORDER BY y;
+
+RESET enable_hashagg;
+
+DROP TABLE distinct_tbl;
diff --git a/src/test/regress/sql/select_distinct_on.sql b/src/test/regress/sql/select_distinct_on.sql

index 261e6140feb151484687cfeaf5e3270682095f71..d79dace94c41be1094cb1c75ad64f8e884aabf95 100644 (file)
--- a/src/test/regress/sql/select_distinct_on.sql
+++ b/src/test/regress/sql/select_distinct_on.sql
@@ -42,3 +42,43 @@ SELECT DISTINCT ON (four) four,two
  EXPLAIN (COSTS OFF)
  SELECT DISTINCT ON (four) four,hundred
     FROM tenk1 WHERE four = 0 ORDER BY 1,2;
+
+--
+-- Test the planner's ability to reorder the distinctClause Pathkeys to match
+-- the input path's ordering
+--
+
+CREATE TABLE distinct_tbl (x int, y int, z int);
+INSERT INTO distinct_tbl SELECT i%10, i%10, i%10 FROM generate_series(1, 1000) AS i;
+CREATE INDEX distinct_tbl_x_y_idx ON distinct_tbl (x, y);
+ANALYZE distinct_tbl;
+
+-- Produce results with sorting.
+SET enable_hashagg TO OFF;
+
+-- Ensure we avoid the need to re-sort by reordering the distinctClause
+-- Pathkeys to match the ordering of the input path
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl;
+SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl;
+
+-- Ensure we leverage incremental-sort by reordering the distinctClause
+-- Pathkeys to partially match the ordering of the input path
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (y, x) x, y FROM (SELECT * FROM distinct_tbl ORDER BY x) s;
+SELECT DISTINCT ON (y, x) x, y FROM (SELECT * FROM distinct_tbl ORDER BY x) s;
+
+-- Ensure we reorder the distinctClause Pathkeys to match the ordering of the
+-- input path even if there is ORDER BY clause
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl ORDER BY y;
+SELECT DISTINCT ON (y, x) x, y FROM distinct_tbl ORDER BY y;
+
+-- Ensure the resulting pathkey list matches the initial distinctClause Pathkeys
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (y, x) x, y FROM (select * from distinct_tbl order by x, z, y) s ORDER BY y, x, z;
+SELECT DISTINCT ON (y, x) x, y FROM (select * from distinct_tbl order by x, z, y) s ORDER BY y, x, z;
+
+RESET enable_hashagg;
+
+DROP TABLE distinct_tbl;
author	Richard Guo <rguo@postgresql.org>
	Tue, 26 Nov 2024 00:25:18 +0000 (09:25 +0900)
committer	Richard Guo <rguo@postgresql.org>
	Tue, 26 Nov 2024 00:25:18 +0000 (09:25 +0900)
doc/src/sgml/config.sgml		patch \| blob \| blame \| history
src/backend/optimizer/path/pathkeys.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/planner.c		patch \| blob \| blame \| history
src/backend/utils/misc/guc_tables.c		patch \| blob \| blame \| history
src/backend/utils/misc/postgresql.conf.sample		patch \| blob \| blame \| history
src/include/optimizer/optimizer.h		patch \| blob \| blame \| history
src/test/regress/expected/select_distinct.out		patch \| blob \| blame \| history
src/test/regress/expected/select_distinct_on.out		patch \| blob \| blame \| history
src/test/regress/expected/sysviews.out		patch \| blob \| blame \| history
src/test/regress/sql/select_distinct.sql		patch \| blob \| blame \| history
src/test/regress/sql/select_distinct_on.sql		patch \| blob \| blame \| history