summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/optimizer/plan/createplan.c243
-rw-r--r--src/backend/optimizer/plan/planner.c18
-rw-r--r--src/include/optimizer/planmain.h3
-rw-r--r--src/test/regress/expected/xc_groupby.out188
-rw-r--r--src/test/regress/sql/xc_groupby.sql24
5 files changed, 464 insertions, 12 deletions
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 585ee0b15f..db91770da6 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -5054,7 +5054,7 @@ create_remotedelete_plan(PlannerInfo *root, Plan *topplan)
}
/*
- * create_remotegrouping_plan
+ * create_remoteagg_plan
* tries to see if the grouping and aggregates can be pushed down to the
* datanodes.
* Right now we can push with following restrictions
@@ -5073,7 +5073,7 @@ create_remotedelete_plan(PlannerInfo *root, Plan *topplan)
* node in case there are no local clauses.
*/
Plan *
-create_remotegrouping_plan(PlannerInfo *root, Plan *local_plan)
+create_remoteagg_plan(PlannerInfo *root, Plan *local_plan)
{
Query *query = root->parse;
RemoteQuery *agg_left;
@@ -5339,6 +5339,245 @@ create_remotegrouping_plan(PlannerInfo *root, Plan *local_plan)
}
/*
+ * create_remotegroup_plan
+ * given a Group plan, try to push as much of the query to the datanodes and
+ * build a Group plan to combiner the results across the datanodes. The Sort
+ * node under the Group plan is pushed down to RemoteQuery plan, since the
+ * combiner knows how to merge the results across datanodes in sorted manner.
+ * Hence there is no separate Sort node.
+ *
+ * This optimization is applied under following conditions
+ * 1. The scan plans under the Group->Sort node is RemoteQuery
+ * 2. There is not separate Sort, distinct, having clause in the query.
+ *
+ * PGXCTODO: we should lift up as many of these restrictions as possible or give
+ * reasons why those restrictions are needed.
+ */
+Plan *
+create_remotegroup_plan(PlannerInfo *root, Plan *local_plan)
+{
+ Group *group_plan;
+ Query *query = root->parse;
+ Sort *sort_plan;
+ RemoteQuery *remote_scan; /* remote query in the passed in plan */
+ RemoteQuery *remote_group; /* remote query after optimization */
+ Plan *remote_group_plan; /* plan portion of remote_group */
+ Plan *temp_plan;
+ List *local_tlist; /* target list of the local plan */
+ List *temp_vars; /* temporarily hold the VARs */
+ List *temp_vartlist; /* temporarity hold tlist of VARs */
+ ListCell *temp;
+ StringInfo remote_targetlist = makeStringInfo();/* SELECT clause of remote query */
+ StringInfo remote_sql_stmt = makeStringInfo();
+ StringInfo groupby_clause = makeStringInfo(); /* remote query GROUP BY */
+ StringInfo orderby_clause = makeStringInfo(); /* remote query ORDER BY */
+ StringInfo remote_fromlist = makeStringInfo(); /* remote query FROM */
+ StringInfo in_alias = makeStringInfo();
+ Relids in_relids;
+ Index dummy_rtindex;
+ List *base_tlist;
+ RangeTblEntry *dummy_rte;
+ int cntCols;
+
+ if (query->havingQual ||
+ query->distinctClause ||
+ query->sortClause ||
+ query->hasWindowFuncs)
+ return local_plan;
+
+ /* for now only for Group plans */
+ Assert(IsA(local_plan, Group));
+ group_plan = (Group *)local_plan;
+ remote_scan = NULL;
+ temp_plan = local_plan->lefttree;
+ /*
+ * we expect plan tree as Group->Sort->{Result}?->{Material}?->RemoteQuery,
+ * anything else is not handled right now.
+ */
+ if (IsA(temp_plan, Sort))
+ {
+ sort_plan = (Sort *)temp_plan;
+ temp_plan = temp_plan->lefttree;
+ }
+ if (IsA(temp_plan, Result))
+ temp_plan = temp_plan->lefttree;
+ if (IsA(temp_plan, Material))
+ temp_plan = temp_plan->lefttree;
+ if (IsA(temp_plan, RemoteQuery))
+ remote_scan = (RemoteQuery *)temp_plan;
+
+ if (!remote_scan || !sort_plan)
+ return local_plan;
+
+ Assert(IsA(remote_scan, RemoteQuery));
+ Assert(IsA(sort_plan, Sort));
+ /*
+ * grouping_planner will add Sort node before Group node to sort the rows
+ * based on the columns in GROUP BY clause. Hence the columns in Sort and
+ * those in Group node in should be same. The columns are usually in the
+ * same order in both nodes, hence check the equality in order. If this
+ * condition fails, we can not handle this GROUP plan for now.
+ */
+ if (sort_plan->numCols != group_plan->numCols)
+ return local_plan;
+ for (cntCols = 0; cntCols < group_plan->numCols; cntCols++)
+ {
+ if (sort_plan->sortColIdx[cntCols] != group_plan->grpColIdx[cntCols])
+ return local_plan;
+ }
+ /*
+ * We are now ready to create the RemoteQuery node to push the query to
+ * datanode.
+ * 1. create a remote query node reflecting the query to be pushed to the
+ * datanode
+ * 2. Modify the Group node passed in, to accept the results sent by the
+ * datanodes and group them
+ */
+
+ local_tlist = local_plan->targetlist;
+ appendStringInfo(in_alias, "%s_%d", "group", root->rs_alias_index);
+
+ /* find all the relations referenced by targetlist of Group node */
+ temp_vars = pull_var_clause((Node *)local_tlist, PVC_REJECT_PLACEHOLDERS);
+ findReferencedVars(temp_vars, (Plan *)remote_scan, &temp_vartlist, &in_relids);
+ /*
+ * build partial RemoteQuery node to be used for creating the Select clause
+ * to be sent to the remote node. Rest of the node will be built later
+ */
+ remote_group = makeNode(RemoteQuery);
+ /*
+ * Save information about the plan we are reducing.
+ * We may need this information later if more entries are added to it
+ * as part of the remote expression optimization.
+ */
+ remote_group->remotejoin = false;
+ remote_group->inner_alias = pstrdup(in_alias->data);
+ remote_group->inner_reduce_level = remote_scan->reduce_level;
+ remote_group->inner_relids = in_relids;
+ remote_group->inner_statement = pstrdup(remote_scan->sql_statement);
+ remote_group->exec_nodes = remote_scan->exec_nodes;
+ /* don't forget to increment the index for the next time around! */
+ remote_group->reduce_level = root->rs_alias_index++;
+
+ /* generate the select clause of the remote query */
+ appendStringInfoString(remote_targetlist, "SELECT");
+ foreach (temp, local_tlist)
+ {
+ TargetEntry *tle = lfirst(temp);
+ Node *expr = (Node *)tle->expr;
+
+ create_remote_expr(root, local_plan, remote_targetlist, expr, remote_group);
+ /* if this is not last target entry, add a comma with space */
+ if (lnext(temp))
+ appendStringInfoString(remote_targetlist, ",");
+ }
+
+ /* generate the from clause of the remote query */
+ appendStringInfo(remote_fromlist, "FROM (%s) %s",
+ remote_group->inner_statement, remote_group->inner_alias);
+
+ /*
+ * generate group by clause for the remote query and recompute the group by
+ * column locations. We want the tuples from remote node to be ordered by
+ * the grouping columns so that ExecGroup can work without any modification,
+ * hence create a SimpleSort structure to be added to RemoteQuery (which
+ * will merge the sorted results and present to Group node in sorted
+ * manner).
+ */
+ if (query->groupClause)
+ {
+ SimpleSort *remote_sort = makeNode(SimpleSort);
+ char *sep = "";
+
+ /*
+ * reuse the arrays allocated in sort_plan to create SimpleSort
+ * structure. sort_plan is useless henceforth.
+ */
+ remote_sort->numCols = group_plan->numCols;
+ remote_sort->sortColIdx = sort_plan->sortColIdx;
+ remote_sort->sortOperators = sort_plan->sortOperators;
+ remote_sort->nullsFirst = sort_plan->nullsFirst;
+
+ pgxc_locate_grouping_columns(root, local_tlist, group_plan->grpColIdx);
+
+ appendStringInfoString(groupby_clause, "GROUP BY ");
+ appendStringInfoString(orderby_clause, "ORDER BY ");
+ for (cntCols = 0; cntCols < group_plan->numCols; cntCols++)
+ {
+ appendStringInfo(groupby_clause, "%s%d", sep,
+ group_plan->grpColIdx[cntCols]);
+ remote_sort->sortColIdx[cntCols] = group_plan->grpColIdx[cntCols];
+ appendStringInfo(orderby_clause, "%s%d", sep,
+ remote_sort->sortColIdx[cntCols]);
+ sep = ", ";
+ }
+ remote_group->sort = remote_sort;
+ }
+
+ /* generate the remote sql statement from the pieces */
+ appendStringInfo(remote_sql_stmt, "%s %s %s %s", remote_targetlist->data,
+ remote_fromlist->data, groupby_clause->data,
+ orderby_clause->data);
+ /*
+ * set the base_tlist for the RemoteQuery node being created, it's used to
+ * create the tuple descriptor for the result from RemoteQuery and rewrite
+ * the Aggregates targetlist accept the results of the RemoteQuery.
+ */
+ base_tlist = add_to_flat_tlist(NIL, get_tlist_exprs(local_tlist, true));
+ /*
+ * create a dummy RTE for the remote query being created. Append the dummy
+ * range table entry to the range table. Note that this modifies the master
+ * copy the caller passed us, otherwise e.g EXPLAIN VERBOSE will fail to
+ * find the rte the Vars built below refer to.
+ */
+ /* cook up the reltupdesc using this base_tlist */
+ dummy_rte = makeNode(RangeTblEntry);
+ dummy_rte->reltupdesc = ExecTypeFromTL(base_tlist, false);
+ dummy_rte->rtekind = RTE_RELATION;
+ /* use a dummy relname... */
+ dummy_rte->relname = "__FOREIGN_QUERY__";
+ dummy_rte->eref = makeAlias("__FOREIGN_QUERY__", NIL);
+ /* rest will be zeroed out in makeNode() */
+ root->parse->rtable = lappend(root->parse->rtable, dummy_rte);
+ dummy_rtindex = list_length(root->parse->rtable);
+
+ /* build rest of the RemoteQuery node and the plan there */
+ remote_group_plan = &remote_group->scan.plan;
+ /* the join targetlist becomes this node's tlist */
+ remote_group_plan->targetlist = base_tlist;
+ remote_group_plan->lefttree = NULL;
+ remote_group_plan->righttree = NULL;
+ remote_group->scan.scanrelid = dummy_rtindex;
+ remote_group->sql_statement = remote_sql_stmt->data;
+ /* set_plan_refs needs this later */
+ remote_group->base_tlist = base_tlist;
+ remote_group->relname = "__FOREIGN_QUERY__";
+ remote_group->partitioned_replicated = remote_scan->partitioned_replicated;
+ /*
+ * Only quals that can be pushed to the remote side are the ones in the having
+ * clause. Till we work out how to handle having quals in XC, we don't have
+ * any quals here.
+ * PGXCTODO: the RemoteQuery node that was earlier the lefttree of Agg
+ * node, may have local quals. In such case, we have to aggregate and group
+ * at coordinator and can not push the grouping clause to the datanodes. Is
+ * there a case in XC, where we can have local quals?
+ */
+ /* we actually need not worry about costs since this is the final plan */
+ remote_group_plan->startup_cost = remote_scan->scan.plan.startup_cost;
+ remote_group_plan->total_cost = remote_scan->scan.plan.total_cost;
+ remote_group_plan->plan_rows = remote_scan->scan.plan.plan_rows;
+ remote_group_plan->plan_width = remote_scan->scan.plan.plan_width;
+
+ /* modify the passed in Group plan according to the remote query we built */
+ /*
+ * Materialization is always need for RemoteQuery in case we need to restart
+ * the scan
+ */
+ group_plan->plan.lefttree = (Plan *) make_material(remote_group_plan);
+ return (Plan *)group_plan;
+}
+
+/*
* locates the grouping clauses in the given target list. This is very similar
* to locate_grouping_columns except that there is only one target list to
* search into
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 213da56115..e64e938648 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1336,10 +1336,13 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
result_plan);
#ifdef PGXC
/*
- * can we push any clauses to the remote node? try doing that
+ * Grouping will certainly not increase the number of rows
+ * coordinator fetches from datanode, in fact it's expected to
+ * reduce the number drastically. Hence, try pushing GROUP BY
+ * clauses and aggregates to the datanode, thus saving bandwidth.
*/
if (IS_PGXC_COORDINATOR && !IsConnFromCoord())
- result_plan = create_remotegrouping_plan(root, result_plan);
+ result_plan = create_remoteagg_plan(root, result_plan);
#endif /* PGXC */
/* Hashed aggregation produces randomly-ordered results */
current_pathkeys = NIL;
@@ -1412,7 +1415,16 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
extract_grouping_ops(parse->groupClause),
dNumGroups,
result_plan);
- /* The Group node won't change sort ordering */
+#ifdef PGXC
+ /*
+ * Grouping will certainly not increase the number of rows
+ * coordinator fetches from datanode, in fact it's expected to
+ * reduce the number drastically. Hence, try pushing GROUP BY
+ * clauses and aggregates to the datanode, thus saving bandwidth.
+ */
+ if (IS_PGXC_COORDINATOR && !IsConnFromCoord())
+ result_plan = create_remotegroup_plan(root, result_plan);
+#endif /* PGXC */
}
else if (root->hasHavingQual)
{
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index e595f1e003..28b4ad7569 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -82,7 +82,8 @@ extern ModifyTable *make_modifytable(CmdType operation, List *resultRelations,
List *subplans, List *returningLists,
List *rowMarks, int epqParam);
extern bool is_projection_capable_plan(Plan *plan);
-extern Plan *create_remotegrouping_plan(PlannerInfo *root, Plan *agg_plan);
+extern Plan *create_remotegroup_plan(PlannerInfo *root, Plan *local_plan);
+extern Plan *create_remoteagg_plan(PlannerInfo *root, Plan *agg_plan);
/*
* prototypes for plan/initsplan.c
diff --git a/src/test/regress/expected/xc_groupby.out b/src/test/regress/expected/xc_groupby.out
index 672084ac78..e403e37d47 100644
--- a/src/test/regress/expected/xc_groupby.out
+++ b/src/test/regress/expected/xc_groupby.out
@@ -66,7 +66,8 @@ explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group
Output: tab1.val, tab1.val2
(8 rows)
--- group by without aggregate, just like distinct?
+-- group by without aggregate
+set enable_hashagg to off;
select val2 from tab1 group by val2;
val2
------
@@ -78,7 +79,7 @@ select val2 from tab1 group by val2;
explain verbose select val2 from tab1 group by val2;
QUERY PLAN
----------------------------------------------------------------------------------
- HashAggregate (cost=1.02..1.03 rows=1 width=4)
+ Group (cost=1.02..1.03 rows=1 width=4)
Output: tab1.val2
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: tab1.val2
@@ -86,6 +87,95 @@ explain verbose select val2 from tab1 group by val2;
Output: tab1.val2
(6 rows)
+select val + val2 from tab1 group by val + val2;
+ ?column?
+----------
+ 2
+ 3
+ 4
+ 7
+ 8
+ 9
+(6 rows)
+
+explain verbose select val + val2 from tab1 group by val + val2;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ Group (cost=1.03..1.04 rows=1 width=8)
+ Output: ((tab1.val + tab1.val2))
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab1.val2))
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: (tab1.val + tab1.val2)
+(6 rows)
+
+select val + val2, val, val2 from tab1 group by val, val2;
+ ?column? | val | val2
+----------+-----+------
+ 2 | 1 | 1
+ 4 | 1 | 3
+ 3 | 2 | 1
+ 4 | 2 | 2
+ 4 | 3 | 1
+ 7 | 4 | 3
+ 8 | 6 | 2
+ 9 | 6 | 3
+(8 rows)
+
+explain verbose select val + val2, val, val2 from tab1 group by val, val2;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ Group (cost=1.02..1.04 rows=1 width=8)
+ Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: (tab1.val + tab1.val2), tab1.val, tab1.val2
+(6 rows)
+
+select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+ ?column? | val | val2
+----------+-----+------
+ 2 | 1 | 1
+ 6 | 2 | 4
+ 5 | 3 | 2
+ 7 | 3 | 4
+ 5 | 4 | 1
+ 6 | 4 | 2
+(6 rows)
+
+explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ Group (cost=0.01..0.02 rows=1 width=0)
+ Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1 width=4)
+ Output: (tab1.val + tab2.val2), tab1.val, tab2.val2
+(6 rows)
+
+select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+ ?column?
+----------
+ 2
+ 5
+ 6
+ 7
+(4 rows)
+
+explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ Group (cost=0.01..0.02 rows=1 width=0)
+ Output: ((tab1.val + tab2.val2))
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab2.val2))
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1 width=4)
+ Output: (tab1.val + tab2.val2)
+(6 rows)
+
+reset enable_hashagg;
-- group by with aggregates in expression
select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
?column? | val2
@@ -210,7 +300,8 @@ explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group
Output: tab1.val, tab1.val2
(8 rows)
--- group by without aggregate, just like distinct?
+-- group by without aggregate
+set enable_hashagg to off;
select val2 from tab1 group by val2;
val2
------
@@ -222,7 +313,7 @@ select val2 from tab1 group by val2;
explain verbose select val2 from tab1 group by val2;
QUERY PLAN
----------------------------------------------------------------------------------
- HashAggregate (cost=1.02..1.03 rows=1 width=4)
+ Group (cost=1.02..1.03 rows=1 width=4)
Output: tab1.val2
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: tab1.val2
@@ -230,6 +321,95 @@ explain verbose select val2 from tab1 group by val2;
Output: tab1.val2
(6 rows)
+select val + val2 from tab1 group by val + val2;
+ ?column?
+----------
+ 2
+ 3
+ 4
+ 7
+ 8
+ 9
+(6 rows)
+
+explain verbose select val + val2 from tab1 group by val + val2;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ Group (cost=1.03..1.04 rows=1 width=8)
+ Output: ((tab1.val + tab1.val2))
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab1.val2))
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: (tab1.val + tab1.val2)
+(6 rows)
+
+select val + val2, val, val2 from tab1 group by val, val2;
+ ?column? | val | val2
+----------+-----+------
+ 2 | 1 | 1
+ 4 | 1 | 3
+ 3 | 2 | 1
+ 4 | 2 | 2
+ 4 | 3 | 1
+ 7 | 4 | 3
+ 8 | 6 | 2
+ 9 | 6 | 3
+(8 rows)
+
+explain verbose select val + val2, val, val2 from tab1 group by val, val2;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ Group (cost=1.02..1.04 rows=1 width=8)
+ Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: (tab1.val + tab1.val2), tab1.val, tab1.val2
+(6 rows)
+
+select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+ ?column? | val | val2
+----------+-----+------
+ 2 | 1 | 1
+ 6 | 2 | 4
+ 5 | 3 | 2
+ 7 | 3 | 4
+ 5 | 4 | 1
+ 6 | 4 | 2
+(6 rows)
+
+explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ Group (cost=0.01..0.02 rows=1 width=0)
+ Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1 width=4)
+ Output: (tab1.val + tab2.val2), tab1.val, tab2.val2
+(6 rows)
+
+select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+ ?column?
+----------
+ 2
+ 5
+ 6
+ 7
+(4 rows)
+
+explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ Group (cost=0.01..0.02 rows=1 width=0)
+ Output: ((tab1.val + tab2.val2))
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab2.val2))
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1 width=4)
+ Output: (tab1.val + tab2.val2)
+(6 rows)
+
+reset enable_hashagg;
-- group by with aggregates in expression
select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
?column? | val2
diff --git a/src/test/regress/sql/xc_groupby.sql b/src/test/regress/sql/xc_groupby.sql
index 66dfabc35f..1fd0f50e11 100644
--- a/src/test/regress/sql/xc_groupby.sql
+++ b/src/test/regress/sql/xc_groupby.sql
@@ -10,9 +10,19 @@ explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val
-- aggregates over aggregates
select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
--- group by without aggregate, just like distinct?
+-- group by without aggregate
+set enable_hashagg to off;
select val2 from tab1 group by val2;
explain verbose select val2 from tab1 group by val2;
+select val + val2 from tab1 group by val + val2;
+explain verbose select val + val2 from tab1 group by val + val2;
+select val + val2, val, val2 from tab1 group by val, val2;
+explain verbose select val + val2, val, val2 from tab1 group by val, val2;
+select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+reset enable_hashagg;
-- group by with aggregates in expression
select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
@@ -36,9 +46,19 @@ explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val
-- aggregates over aggregates
select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
--- group by without aggregate, just like distinct?
+-- group by without aggregate
+set enable_hashagg to off;
select val2 from tab1 group by val2;
explain verbose select val2 from tab1 group by val2;
+select val + val2 from tab1 group by val + val2;
+explain verbose select val + val2 from tab1 group by val + val2;
+select val + val2, val, val2 from tab1 group by val, val2;
+explain verbose select val + val2, val, val2 from tab1 group by val, val2;
+select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+reset enable_hashagg;
-- group by with aggregates in expression
select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;