Fix usage of aggregate pathkeys in group_keys_reorder_by_pathkeys()

author Alexander Korotkov <akorotkov@postgresql.org>

Fri, 9 Feb 2024 10:56:26 +0000 (12:56 +0200)

committer Alexander Korotkov <akorotkov@postgresql.org>

Fri, 9 Feb 2024 10:56:54 +0000 (12:56 +0200)
author Alexander Korotkov <akorotkov@postgresql.org>
Fri, 9 Feb 2024 10:56:26 +0000 (12:56 +0200)
committer Alexander Korotkov <akorotkov@postgresql.org>
Fri, 9 Feb 2024 10:56:54 +0000 (12:56 +0200)
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c

index 82ff31273b9fe55def88498d148c6e4ccf49dadc..bcc1b7a9ebe213549a6f9f2cff9cb218c584cf0d 100644 (file)
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -355,10 +355,14 @@ pathkeys_contained_in(List *keys1, List *keys2)
  
  /*
   * group_keys_reorder_by_pathkeys
- *             Reorder GROUP BY keys to match the input pathkeys.
+ *             Reorder GROUP BY pathkeys and clauses to match the input pathkeys.
   *
- * Function returns new lists (pathkeys and clauses), original GROUP BY lists
- * stay untouched.
+ * 'pathkeys' is an input list of pathkeys
+ * '*group_pathkeys' and '*group_clauses' are pathkeys and clauses lists to
+ *             reorder.  The pointers are redirected to new lists, original lists
+ *             stay untouched.
+ * 'num_groupby_pathkeys' is the number of first '*group_pathkeys' items to
+ *             search matching pathkeys.
   *
   * Returns the number of GROUP BY keys with a matching pathkey.
   */
@@ -369,12 +373,24 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
  {
         List       *new_group_pathkeys = NIL,
                            *new_group_clauses = NIL;
+       List       *grouping_pathkeys;
         ListCell   *lc;
         int                     n;
  
         if (pathkeys == NIL || *group_pathkeys == NIL)
                 return 0;
  
+       /*
+        * We're going to search within just the first num_groupby_pathkeys of
+        * *group_pathkeys.  The thing is that root->group_pathkeys is passed as
+        * *group_pathkeys containing grouping pathkeys altogether with aggregate
+        * pathkeys.  If we process aggregate pathkeys we could get an invalid
+        * result of get_sortgroupref_clause_noerr(), because their
+        * pathkey->pk_eclass->ec_sortref doesn't referece query targetlist.  So,
+        * we allocate a separate list of pathkeys for lookups.
+        */
+       grouping_pathkeys = list_copy_head(*group_pathkeys, num_groupby_pathkeys);
+
         /*
          * Walk the pathkeys (determining ordering of the input path) and see if
          * there's a matching GROUP BY key. If we find one, we append it to the
@@ -395,7 +411,7 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
                  * there is no sortclause reference for some reason.
                  */
                 if (foreach_current_index(lc) >= num_groupby_pathkeys ||
-                       !list_member_ptr(*group_pathkeys, pathkey) ||
+                       !list_member_ptr(grouping_pathkeys, pathkey) ||
                         pathkey->pk_eclass->ec_sortref == 0)
                         break;
  
@@ -429,6 +445,7 @@ group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
         *group_clauses = list_concat_unique_ptr(new_group_clauses,
                                                                                         *group_clauses);
  
+       list_free(grouping_pathkeys);
         return n;
  }
  
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out

index 7a73c19314b071f1a2db05ac73d098e1955b1d65..f736bab67ef73ce9d9e60223e87835a4655c5f9a 100644 (file)
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2874,6 +2874,30 @@ SELECT y,x,array_agg(distinct w) FROM btg WHERE y < 0 GROUP BY x,y;
  
  RESET enable_incremental_sort;
  DROP TABLE btg;
+-- Check we don't pick aggregate path key instead of grouping path key
+CREATE TABLE group_agg_pk AS SELECT
+  i % 10 AS x,
+  i % 2 AS y,
+  i % 2 AS z,
+  2 AS w,
+  i % 10 AS f
+FROM generate_series(1,100) AS i;
+ANALYZE group_agg_pk;
+SET enable_nestloop = off;
+SET enable_hashjoin = off;
+SELECT
+  c1.z, c1.w, string_agg(''::text, repeat(''::text, c1.f) ORDER BY c1.x,c1.y)
+FROM group_agg_pk c1 JOIN group_agg_pk c2 ON (c1.x = c2.f)
+GROUP BY c1.w, c1.z;
+ z | w | string_agg 
+---+---+------------
+ 0 | 2 | 
+ 1 | 2 | 
+(2 rows)
+
+RESET enable_nestloop;
+RESET enable_hashjoin;
+DROP TABLE group_agg_pk;
  -- The case, when scanning sort order correspond to aggregate sort order but
  -- can not be found in the group-by list
  CREATE TABLE agg_sort_order (c1 int PRIMARY KEY, c2 int);
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql

index 916dbf908f70c9e166a6136c020d092a55f276a2..a19ef713d303a2836defcd26b1a134ed4a88e8fe 100644 (file)
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -1231,6 +1231,25 @@ RESET enable_incremental_sort;
  
  DROP TABLE btg;
  
+-- Check we don't pick aggregate path key instead of grouping path key
+CREATE TABLE group_agg_pk AS SELECT
+  i % 10 AS x,
+  i % 2 AS y,
+  i % 2 AS z,
+  2 AS w,
+  i % 10 AS f
+FROM generate_series(1,100) AS i;
+ANALYZE group_agg_pk;
+SET enable_nestloop = off;
+SET enable_hashjoin = off;
+SELECT
+  c1.z, c1.w, string_agg(''::text, repeat(''::text, c1.f) ORDER BY c1.x,c1.y)
+FROM group_agg_pk c1 JOIN group_agg_pk c2 ON (c1.x = c2.f)
+GROUP BY c1.w, c1.z;
+RESET enable_nestloop;
+RESET enable_hashjoin;
+DROP TABLE group_agg_pk;
+
  -- The case, when scanning sort order correspond to aggregate sort order but
  -- can not be found in the group-by list
  CREATE TABLE agg_sort_order (c1 int PRIMARY KEY, c2 int);
author	Alexander Korotkov <akorotkov@postgresql.org>
	Fri, 9 Feb 2024 10:56:26 +0000 (12:56 +0200)
committer	Alexander Korotkov <akorotkov@postgresql.org>
	Fri, 9 Feb 2024 10:56:54 +0000 (12:56 +0200)
src/backend/optimizer/path/pathkeys.c		patch \| blob \| blame \| history
src/test/regress/expected/aggregates.out		patch \| blob \| blame \| history
src/test/regress/sql/aggregates.sql		patch \| blob \| blame \| history