Fix asymmetry in setting EquivalenceClass.ec_sortref
authorAlexander Korotkov <akorotkov@postgresql.org>
Thu, 6 Jun 2024 10:41:34 +0000 (13:41 +0300)
committerAlexander Korotkov <akorotkov@postgresql.org>
Thu, 6 Jun 2024 10:41:34 +0000 (13:41 +0300)
0452b461bc made get_eclass_for_sort_expr() always set
EquivalenceClass.ec_sortref if it's not done yet.  This leads to an asymmetric
situation when whoever first looks for the EquivalenceClass sets the
ec_sortref.  It is also counterintuitive that get_eclass_for_sort_expr()
performs modification of data structures.

This commit makes make_pathkeys_for_sortclauses_extended() responsible for
setting EquivalenceClass.ec_sortref.  Now we set the
EquivalenceClass.ec_sortref's needed to explore alternative GROUP BY ordering
specifically during building pathkeys by the list of grouping clauses.

Discussion: https://postgr.es/m/17037754-f187-4138-8285-0e2bfebd0dea%40postgrespro.ru
Reported-by: Tom Lane
Author: Andrei Lepikhov
Reviewed-by: Alexander Korotkov, Pavel Borisov
src/backend/optimizer/path/equivclass.c
src/backend/optimizer/path/pathkeys.c
src/backend/optimizer/plan/planner.c
src/include/optimizer/paths.h
src/test/regress/expected/aggregates.out
src/test/regress/sql/aggregates.sql

index 21ce1ae2e1367f61417533d9283db9cdd41df746..51d806326eba32826051757ab500a8a9d307ec65 100644 (file)
@@ -652,18 +652,7 @@ get_eclass_for_sort_expr(PlannerInfo *root,
 
                        if (opcintype == cur_em->em_datatype &&
                                equal(expr, cur_em->em_expr))
-                       {
-                               /*
-                                * Match!
-                                *
-                                * Copy the sortref if it wasn't set yet.  That may happen if
-                                * the ec was constructed from a WHERE clause, i.e. it doesn't
-                                * have a target reference at all.
-                                */
-                               if (cur_ec->ec_sortref == 0 && sortref > 0)
-                                       cur_ec->ec_sortref = sortref;
-                               return cur_ec;
-                       }
+                               return cur_ec;  /* Match! */
                }
        }
 
index 8b258cbef92d2ab2d551e89e73c4f04eacaf4196..df966b18f3452784672fc9c3eea80b0d2b6e941a 100644 (file)
@@ -1355,7 +1355,8 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
                                                                                                        &sortclauses,
                                                                                                        tlist,
                                                                                                        false,
-                                                                                                       &sortable);
+                                                                                                       &sortable,
+                                                                                                       false);
        /* It's caller error if not all clauses were sortable */
        Assert(sortable);
        return result;
@@ -1379,13 +1380,17 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
  * to remove any clauses that can be proven redundant via the eclass logic.
  * Even though we'll have to hash in that case, we might as well not hash
  * redundant columns.)
+ *
+ * If set_ec_sortref is true then sets the value of the pathkey's
+ * EquivalenceClass unless it's already initialized.
  */
 List *
 make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
                                                                           List **sortclauses,
                                                                           List *tlist,
                                                                           bool remove_redundant,
-                                                                          bool *sortable)
+                                                                          bool *sortable,
+                                                                          bool set_ec_sortref)
 {
        List       *pathkeys = NIL;
        ListCell   *l;
@@ -1409,6 +1414,15 @@ make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
                                                                                   sortcl->nulls_first,
                                                                                   sortcl->tleSortGroupRef,
                                                                                   true);
+               if (pathkey->pk_eclass->ec_sortref == 0 && set_ec_sortref)
+               {
+                       /*
+                        * Copy the sortref if it hasn't been set yet.  That may happen if
+                        * the EquivalenceClass was constructed from a WHERE clause, i.e.
+                        * it doesn't have a target reference at all.
+                        */
+                       pathkey->pk_eclass->ec_sortref = sortcl->tleSortGroupRef;
+               }
 
                /* Canonical form eliminates redundant ordering keys */
                if (!pathkey_is_redundant(pathkey, pathkeys))
index 032818423f6f48011f5ebafcf2a717be28e20139..ea107c70cb896f91a92a4a9acb81e38ce5267cbb 100644 (file)
@@ -3395,12 +3395,17 @@ standard_qp_callback(PlannerInfo *root, void *extra)
                 */
                bool            sortable;
 
+               /*
+                * Convert group clauses into pathkeys.  Set the ec_sortref field of
+                * EquivalenceClass'es if it's not set yet.
+                */
                root->group_pathkeys =
                        make_pathkeys_for_sortclauses_extended(root,
                                                                                                   &root->processed_groupClause,
                                                                                                   tlist,
                                                                                                   true,
-                                                                                                  &sortable);
+                                                                                                  &sortable,
+                                                                                                  true);
                if (!sortable)
                {
                        /* Can't sort; no point in considering aggregate ordering either */
@@ -3450,7 +3455,8 @@ standard_qp_callback(PlannerInfo *root, void *extra)
                                                                                                   &root->processed_distinctClause,
                                                                                                   tlist,
                                                                                                   true,
-                                                                                                  &sortable);
+                                                                                                  &sortable,
+                                                                                                  false);
                if (!sortable)
                        root->distinct_pathkeys = NIL;
        }
@@ -3476,7 +3482,8 @@ standard_qp_callback(PlannerInfo *root, void *extra)
                                                                                                   &groupClauses,
                                                                                                   tlist,
                                                                                                   false,
-                                                                                                  &sortable);
+                                                                                                  &sortable,
+                                                                                                  false);
                if (!sortable)
                        root->setop_pathkeys = NIL;
        }
@@ -6061,7 +6068,8 @@ make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
                                                                                                                                 &wc->partitionClause,
                                                                                                                                 tlist,
                                                                                                                                 true,
-                                                                                                                                &sortable);
+                                                                                                                                &sortable,
+                                                                                                                                false);
 
                Assert(sortable);
        }
index 914d9bdef5878fe9527e9c15c6f37ea5d21c5df4..5e88c0224a4175535c99e82fc6e1a2b08adc790c 100644 (file)
@@ -239,7 +239,8 @@ extern List *make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
                                                                                                        List **sortclauses,
                                                                                                        List *tlist,
                                                                                                        bool remove_redundant,
-                                                                                                       bool *sortable);
+                                                                                                       bool *sortable,
+                                                                                                       bool set_ec_sortref);
 extern void initialize_mergeclause_eclasses(PlannerInfo *root,
                                                                                        RestrictInfo *restrictinfo);
 extern void update_mergeclause_eclasses(PlannerInfo *root,
index 2442342e9d1937aea481c6dad7f9b48461e6dcf9..1c1ca7573ad3a4b534263a57fe58d1c01c42bcd0 100644 (file)
@@ -2917,6 +2917,53 @@ GROUP BY c1.w, c1.z;
  5.0000000000000000
 (2 rows)
 
+-- Pathkeys, built in a subtree, can be used to optimize GROUP-BY clause
+-- ordering.  Also, here we check that it doesn't depend on the initial clause
+-- order in the GROUP-BY list.
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c1.x,c2.x;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Group
+   Group Key: c1.x, c1.y
+   ->  Incremental Sort
+         Sort Key: c1.x, c1.y
+         Presorted Key: c1.x
+         ->  Merge Join
+               Merge Cond: (c1.x = c2.x)
+               ->  Sort
+                     Sort Key: c1.x
+                     ->  Seq Scan on group_agg_pk c1
+               ->  Sort
+                     Sort Key: c2.x
+                     ->  Seq Scan on group_agg_pk c2
+(13 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c2.x,c1.x;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Group
+   Group Key: c2.x, c1.y
+   ->  Incremental Sort
+         Sort Key: c2.x, c1.y
+         Presorted Key: c2.x
+         ->  Merge Join
+               Merge Cond: (c1.x = c2.x)
+               ->  Sort
+                     Sort Key: c1.x
+                     ->  Seq Scan on group_agg_pk c1
+               ->  Sort
+                     Sort Key: c2.x
+                     ->  Seq Scan on group_agg_pk c2
+(13 rows)
+
 RESET enable_nestloop;
 RESET enable_hashjoin;
 DROP TABLE group_agg_pk;
index 61a3424c845b90e4ea6353657523893d26bc90fd..1a18ca3d8fe207ab48faec2840c97be61906edbf 100644 (file)
@@ -1263,6 +1263,20 @@ SELECT avg(c1.f ORDER BY c1.x, c1.y)
 FROM group_agg_pk c1 JOIN group_agg_pk c2 ON c1.x = c2.x
 GROUP BY c1.w, c1.z;
 
+-- Pathkeys, built in a subtree, can be used to optimize GROUP-BY clause
+-- ordering.  Also, here we check that it doesn't depend on the initial clause
+-- order in the GROUP-BY list.
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c1.x,c2.x;
+EXPLAIN (COSTS OFF)
+SELECT c1.y,c1.x FROM group_agg_pk c1
+  JOIN group_agg_pk c2
+  ON c1.x = c2.x
+GROUP BY c1.y,c2.x,c1.x;
+
 RESET enable_nestloop;
 RESET enable_hashjoin;
 DROP TABLE group_agg_pk;