summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAshutosh Bapat2011-06-01 08:54:51 +0000
committerMichael P2011-06-06 03:02:41 +0000
commit801af8baca96f403e982e0bf36698dc26be26754 (patch)
treeefaf92df2288ed09a54e6b7f4bf95562ee688688 /src
parenta186a47d75e278909ec1346a2919b9e2cdbe469d (diff)
Push GROUP BY clause to the datanode for grouping without aggregation (i.e. when
we choose Group plan for grouping. Group plan requires that the input to this node be sorted on columns/expression involved in GROUP BY clause. Hence construct a ORDER BY clause corresponding to GROUP BY clause. Also add the same information to the RemoteQuery node, so that the sorted data from different datanodes can be merged at the coordinator so as to feed the sorted output to the Group node. The number of rows resulting after grouping is lesser or equal to the qualifying rows from the relations/joins. Hence by pushing GROUP BY to the datanodes, we reduce (in rare cases keep same) the number of rows fetched from datanodes, thus saving bandwidth. The optimization works under following restrictions 1. Group plan has Sort->Result->Material->RemoteQuery nodes under it. Result and Material nodes are optional. 2. There is no having clause, ORDER BY clause, windowing clause, Distinct clause in the query. 3. Such grouping uses Sorting for Grouping. (direct implication of first restriction) The patch also adds more test to file xc_groupby.
Diffstat (limited to 'src')
-rw-r--r--src/backend/optimizer/plan/createplan.c243
-rw-r--r--src/backend/optimizer/plan/planner.c18
-rw-r--r--src/include/optimizer/planmain.h3
-rw-r--r--src/test/regress/expected/xc_groupby.out188
-rw-r--r--src/test/regress/sql/xc_groupby.sql24
5 files changed, 464 insertions, 12 deletions
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 585ee0b15f..db91770da6 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -5054,7 +5054,7 @@ create_remotedelete_plan(PlannerInfo *root, Plan *topplan)
}
/*
- * create_remotegrouping_plan
+ * create_remoteagg_plan
* tries to see if the grouping and aggregates can be pushed down to the
* datanodes.
* Right now we can push with following restrictions
@@ -5073,7 +5073,7 @@ create_remotedelete_plan(PlannerInfo *root, Plan *topplan)
* node in case there are no local clauses.
*/
Plan *
-create_remotegrouping_plan(PlannerInfo *root, Plan *local_plan)
+create_remoteagg_plan(PlannerInfo *root, Plan *local_plan)
{
Query *query = root->parse;
RemoteQuery *agg_left;
@@ -5339,6 +5339,245 @@ create_remotegrouping_plan(PlannerInfo *root, Plan *local_plan)
}
/*
+ * create_remotegroup_plan
+ * given a Group plan, try to push as much of the query to the datanodes and
+ * build a Group plan to combiner the results across the datanodes. The Sort
+ * node under the Group plan is pushed down to RemoteQuery plan, since the
+ * combiner knows how to merge the results across datanodes in sorted manner.
+ * Hence there is no separate Sort node.
+ *
+ * This optimization is applied under following conditions
+ * 1. The scan plans under the Group->Sort node is RemoteQuery
+ * 2. There is not separate Sort, distinct, having clause in the query.
+ *
+ * PGXCTODO: we should lift up as many of these restrictions as possible or give
+ * reasons why those restrictions are needed.
+ */
+Plan *
+create_remotegroup_plan(PlannerInfo *root, Plan *local_plan)
+{
+ Group *group_plan;
+ Query *query = root->parse;
+ Sort *sort_plan;
+ RemoteQuery *remote_scan; /* remote query in the passed in plan */
+ RemoteQuery *remote_group; /* remote query after optimization */
+ Plan *remote_group_plan; /* plan portion of remote_group */
+ Plan *temp_plan;
+ List *local_tlist; /* target list of the local plan */
+ List *temp_vars; /* temporarily hold the VARs */
+ List *temp_vartlist; /* temporarity hold tlist of VARs */
+ ListCell *temp;
+ StringInfo remote_targetlist = makeStringInfo();/* SELECT clause of remote query */
+ StringInfo remote_sql_stmt = makeStringInfo();
+ StringInfo groupby_clause = makeStringInfo(); /* remote query GROUP BY */
+ StringInfo orderby_clause = makeStringInfo(); /* remote query ORDER BY */
+ StringInfo remote_fromlist = makeStringInfo(); /* remote query FROM */
+ StringInfo in_alias = makeStringInfo();
+ Relids in_relids;
+ Index dummy_rtindex;
+ List *base_tlist;
+ RangeTblEntry *dummy_rte;
+ int cntCols;
+
+ if (query->havingQual ||
+ query->distinctClause ||
+ query->sortClause ||
+ query->hasWindowFuncs)
+ return local_plan;
+
+ /* for now only for Group plans */
+ Assert(IsA(local_plan, Group));
+ group_plan = (Group *)local_plan;
+ remote_scan = NULL;
+ temp_plan = local_plan->lefttree;
+ /*
+ * we expect plan tree as Group->Sort->{Result}?->{Material}?->RemoteQuery,
+ * anything else is not handled right now.
+ */
+ if (IsA(temp_plan, Sort))
+ {
+ sort_plan = (Sort *)temp_plan;
+ temp_plan = temp_plan->lefttree;
+ }
+ if (IsA(temp_plan, Result))
+ temp_plan = temp_plan->lefttree;
+ if (IsA(temp_plan, Material))
+ temp_plan = temp_plan->lefttree;
+ if (IsA(temp_plan, RemoteQuery))
+ remote_scan = (RemoteQuery *)temp_plan;
+
+ if (!remote_scan || !sort_plan)
+ return local_plan;
+
+ Assert(IsA(remote_scan, RemoteQuery));
+ Assert(IsA(sort_plan, Sort));
+ /*
+ * grouping_planner will add Sort node before Group node to sort the rows
+ * based on the columns in GROUP BY clause. Hence the columns in Sort and
+ * those in Group node in should be same. The columns are usually in the
+ * same order in both nodes, hence check the equality in order. If this
+ * condition fails, we can not handle this GROUP plan for now.
+ */
+ if (sort_plan->numCols != group_plan->numCols)
+ return local_plan;
+ for (cntCols = 0; cntCols < group_plan->numCols; cntCols++)
+ {
+ if (sort_plan->sortColIdx[cntCols] != group_plan->grpColIdx[cntCols])
+ return local_plan;
+ }
+ /*
+ * We are now ready to create the RemoteQuery node to push the query to
+ * datanode.
+ * 1. create a remote query node reflecting the query to be pushed to the
+ * datanode
+ * 2. Modify the Group node passed in, to accept the results sent by the
+ * datanodes and group them
+ */
+
+ local_tlist = local_plan->targetlist;
+ appendStringInfo(in_alias, "%s_%d", "group", root->rs_alias_index);
+
+ /* find all the relations referenced by targetlist of Group node */
+ temp_vars = pull_var_clause((Node *)local_tlist, PVC_REJECT_PLACEHOLDERS);
+ findReferencedVars(temp_vars, (Plan *)remote_scan, &temp_vartlist, &in_relids);
+ /*
+ * build partial RemoteQuery node to be used for creating the Select clause
+ * to be sent to the remote node. Rest of the node will be built later
+ */
+ remote_group = makeNode(RemoteQuery);
+ /*
+ * Save information about the plan we are reducing.
+ * We may need this information later if more entries are added to it
+ * as part of the remote expression optimization.
+ */
+ remote_group->remotejoin = false;
+ remote_group->inner_alias = pstrdup(in_alias->data);
+ remote_group->inner_reduce_level = remote_scan->reduce_level;
+ remote_group->inner_relids = in_relids;
+ remote_group->inner_statement = pstrdup(remote_scan->sql_statement);
+ remote_group->exec_nodes = remote_scan->exec_nodes;
+ /* don't forget to increment the index for the next time around! */
+ remote_group->reduce_level = root->rs_alias_index++;
+
+ /* generate the select clause of the remote query */
+ appendStringInfoString(remote_targetlist, "SELECT");
+ foreach (temp, local_tlist)
+ {
+ TargetEntry *tle = lfirst(temp);
+ Node *expr = (Node *)tle->expr;
+
+ create_remote_expr(root, local_plan, remote_targetlist, expr, remote_group);
+ /* if this is not last target entry, add a comma with space */
+ if (lnext(temp))
+ appendStringInfoString(remote_targetlist, ",");
+ }
+
+ /* generate the from clause of the remote query */
+ appendStringInfo(remote_fromlist, "FROM (%s) %s",
+ remote_group->inner_statement, remote_group->inner_alias);
+
+ /*
+ * generate group by clause for the remote query and recompute the group by
+ * column locations. We want the tuples from remote node to be ordered by
+ * the grouping columns so that ExecGroup can work without any modification,
+ * hence create a SimpleSort structure to be added to RemoteQuery (which
+ * will merge the sorted results and present to Group node in sorted
+ * manner).
+ */
+ if (query->groupClause)
+ {
+ SimpleSort *remote_sort = makeNode(SimpleSort);
+ char *sep = "";
+
+ /*
+ * reuse the arrays allocated in sort_plan to create SimpleSort
+ * structure. sort_plan is useless henceforth.
+ */
+ remote_sort->numCols = group_plan->numCols;
+ remote_sort->sortColIdx = sort_plan->sortColIdx;
+ remote_sort->sortOperators = sort_plan->sortOperators;
+ remote_sort->nullsFirst = sort_plan->nullsFirst;
+
+ pgxc_locate_grouping_columns(root, local_tlist, group_plan->grpColIdx);
+
+ appendStringInfoString(groupby_clause, "GROUP BY ");
+ appendStringInfoString(orderby_clause, "ORDER BY ");
+ for (cntCols = 0; cntCols < group_plan->numCols; cntCols++)
+ {
+ appendStringInfo(groupby_clause, "%s%d", sep,
+ group_plan->grpColIdx[cntCols]);
+ remote_sort->sortColIdx[cntCols] = group_plan->grpColIdx[cntCols];
+ appendStringInfo(orderby_clause, "%s%d", sep,
+ remote_sort->sortColIdx[cntCols]);
+ sep = ", ";
+ }
+ remote_group->sort = remote_sort;
+ }
+
+ /* generate the remote sql statement from the pieces */
+ appendStringInfo(remote_sql_stmt, "%s %s %s %s", remote_targetlist->data,
+ remote_fromlist->data, groupby_clause->data,
+ orderby_clause->data);
+ /*
+ * set the base_tlist for the RemoteQuery node being created, it's used to
+ * create the tuple descriptor for the result from RemoteQuery and rewrite
+ * the Aggregates targetlist accept the results of the RemoteQuery.
+ */
+ base_tlist = add_to_flat_tlist(NIL, get_tlist_exprs(local_tlist, true));
+ /*
+ * create a dummy RTE for the remote query being created. Append the dummy
+ * range table entry to the range table. Note that this modifies the master
+ * copy the caller passed us, otherwise e.g EXPLAIN VERBOSE will fail to
+ * find the rte the Vars built below refer to.
+ */
+ /* cook up the reltupdesc using this base_tlist */
+ dummy_rte = makeNode(RangeTblEntry);
+ dummy_rte->reltupdesc = ExecTypeFromTL(base_tlist, false);
+ dummy_rte->rtekind = RTE_RELATION;
+ /* use a dummy relname... */
+ dummy_rte->relname = "__FOREIGN_QUERY__";
+ dummy_rte->eref = makeAlias("__FOREIGN_QUERY__", NIL);
+ /* rest will be zeroed out in makeNode() */
+ root->parse->rtable = lappend(root->parse->rtable, dummy_rte);
+ dummy_rtindex = list_length(root->parse->rtable);
+
+ /* build rest of the RemoteQuery node and the plan there */
+ remote_group_plan = &remote_group->scan.plan;
+ /* the join targetlist becomes this node's tlist */
+ remote_group_plan->targetlist = base_tlist;
+ remote_group_plan->lefttree = NULL;
+ remote_group_plan->righttree = NULL;
+ remote_group->scan.scanrelid = dummy_rtindex;
+ remote_group->sql_statement = remote_sql_stmt->data;
+ /* set_plan_refs needs this later */
+ remote_group->base_tlist = base_tlist;
+ remote_group->relname = "__FOREIGN_QUERY__";
+ remote_group->partitioned_replicated = remote_scan->partitioned_replicated;
+ /*
+ * Only quals that can be pushed to the remote side are the ones in the having
+ * clause. Till we work out how to handle having quals in XC, we don't have
+ * any quals here.
+ * PGXCTODO: the RemoteQuery node that was earlier the lefttree of Agg
+ * node, may have local quals. In such case, we have to aggregate and group
+ * at coordinator and can not push the grouping clause to the datanodes. Is
+ * there a case in XC, where we can have local quals?
+ */
+ /* we actually need not worry about costs since this is the final plan */
+ remote_group_plan->startup_cost = remote_scan->scan.plan.startup_cost;
+ remote_group_plan->total_cost = remote_scan->scan.plan.total_cost;
+ remote_group_plan->plan_rows = remote_scan->scan.plan.plan_rows;
+ remote_group_plan->plan_width = remote_scan->scan.plan.plan_width;
+
+ /* modify the passed in Group plan according to the remote query we built */
+ /*
+ * Materialization is always need for RemoteQuery in case we need to restart
+ * the scan
+ */
+ group_plan->plan.lefttree = (Plan *) make_material(remote_group_plan);
+ return (Plan *)group_plan;
+}
+
+/*
* locates the grouping clauses in the given target list. This is very similar
* to locate_grouping_columns except that there is only one target list to
* search into
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 213da56115..e64e938648 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1336,10 +1336,13 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
result_plan);
#ifdef PGXC
/*
- * can we push any clauses to the remote node? try doing that
+ * Grouping will certainly not increase the number of rows
+ * coordinator fetches from datanode, in fact it's expected to
+ * reduce the number drastically. Hence, try pushing GROUP BY
+ * clauses and aggregates to the datanode, thus saving bandwidth.
*/
if (IS_PGXC_COORDINATOR && !IsConnFromCoord())
- result_plan = create_remotegrouping_plan(root, result_plan);
+ result_plan = create_remoteagg_plan(root, result_plan);
#endif /* PGXC */
/* Hashed aggregation produces randomly-ordered results */
current_pathkeys = NIL;
@@ -1412,7 +1415,16 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
extract_grouping_ops(parse->groupClause),
dNumGroups,
result_plan);
- /* The Group node won't change sort ordering */
+#ifdef PGXC
+ /*
+ * Grouping will certainly not increase the number of rows
+ * coordinator fetches from datanode, in fact it's expected to
+ * reduce the number drastically. Hence, try pushing GROUP BY
+ * clauses and aggregates to the datanode, thus saving bandwidth.
+ */
+ if (IS_PGXC_COORDINATOR && !IsConnFromCoord())
+ result_plan = create_remotegroup_plan(root, result_plan);
+#endif /* PGXC */
}
else if (root->hasHavingQual)
{
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index e595f1e003..28b4ad7569 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -82,7 +82,8 @@ extern ModifyTable *make_modifytable(CmdType operation, List *resultRelations,
List *subplans, List *returningLists,
List *rowMarks, int epqParam);
extern bool is_projection_capable_plan(Plan *plan);
-extern Plan *create_remotegrouping_plan(PlannerInfo *root, Plan *agg_plan);
+extern Plan *create_remotegroup_plan(PlannerInfo *root, Plan *local_plan);
+extern Plan *create_remoteagg_plan(PlannerInfo *root, Plan *agg_plan);
/*
* prototypes for plan/initsplan.c
diff --git a/src/test/regress/expected/xc_groupby.out b/src/test/regress/expected/xc_groupby.out
index 672084ac78..e403e37d47 100644
--- a/src/test/regress/expected/xc_groupby.out
+++ b/src/test/regress/expected/xc_groupby.out
@@ -66,7 +66,8 @@ explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group
Output: tab1.val, tab1.val2
(8 rows)
--- group by without aggregate, just like distinct?
+-- group by without aggregate
+set enable_hashagg to off;
select val2 from tab1 group by val2;
val2
------
@@ -78,7 +79,7 @@ select val2 from tab1 group by val2;
explain verbose select val2 from tab1 group by val2;
QUERY PLAN
----------------------------------------------------------------------------------
- HashAggregate (cost=1.02..1.03 rows=1 width=4)
+ Group (cost=1.02..1.03 rows=1 width=4)
Output: tab1.val2
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: tab1.val2
@@ -86,6 +87,95 @@ explain verbose select val2 from tab1 group by val2;
Output: tab1.val2
(6 rows)
+select val + val2 from tab1 group by val + val2;
+ ?column?
+----------
+ 2
+ 3
+ 4
+ 7
+ 8
+ 9
+(6 rows)
+
+explain verbose select val + val2 from tab1 group by val + val2;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ Group (cost=1.03..1.04 rows=1 width=8)
+ Output: ((tab1.val + tab1.val2))
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab1.val2))
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: (tab1.val + tab1.val2)
+(6 rows)
+
+select val + val2, val, val2 from tab1 group by val, val2;
+ ?column? | val | val2
+----------+-----+------
+ 2 | 1 | 1
+ 4 | 1 | 3
+ 3 | 2 | 1
+ 4 | 2 | 2
+ 4 | 3 | 1
+ 7 | 4 | 3
+ 8 | 6 | 2
+ 9 | 6 | 3
+(8 rows)
+
+explain verbose select val + val2, val, val2 from tab1 group by val, val2;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ Group (cost=1.02..1.04 rows=1 width=8)
+ Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: (tab1.val + tab1.val2), tab1.val, tab1.val2
+(6 rows)
+
+select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+ ?column? | val | val2
+----------+-----+------
+ 2 | 1 | 1
+ 6 | 2 | 4
+ 5 | 3 | 2
+ 7 | 3 | 4
+ 5 | 4 | 1
+ 6 | 4 | 2
+(6 rows)
+
+explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ Group (cost=0.01..0.02 rows=1 width=0)
+ Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1 width=4)
+ Output: (tab1.val + tab2.val2), tab1.val, tab2.val2
+(6 rows)
+
+select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+ ?column?
+----------
+ 2
+ 5
+ 6
+ 7
+(4 rows)
+
+explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ Group (cost=0.01..0.02 rows=1 width=0)
+ Output: ((tab1.val + tab2.val2))
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab2.val2))
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1 width=4)
+ Output: (tab1.val + tab2.val2)
+(6 rows)
+
+reset enable_hashagg;
-- group by with aggregates in expression
select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
?column? | val2
@@ -210,7 +300,8 @@ explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group
Output: tab1.val, tab1.val2
(8 rows)
--- group by without aggregate, just like distinct?
+-- group by without aggregate
+set enable_hashagg to off;
select val2 from tab1 group by val2;
val2
------
@@ -222,7 +313,7 @@ select val2 from tab1 group by val2;
explain verbose select val2 from tab1 group by val2;
QUERY PLAN
----------------------------------------------------------------------------------
- HashAggregate (cost=1.02..1.03 rows=1 width=4)
+ Group (cost=1.02..1.03 rows=1 width=4)
Output: tab1.val2
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: tab1.val2
@@ -230,6 +321,95 @@ explain verbose select val2 from tab1 group by val2;
Output: tab1.val2
(6 rows)
+select val + val2 from tab1 group by val + val2;
+ ?column?
+----------
+ 2
+ 3
+ 4
+ 7
+ 8
+ 9
+(6 rows)
+
+explain verbose select val + val2 from tab1 group by val + val2;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ Group (cost=1.03..1.04 rows=1 width=8)
+ Output: ((tab1.val + tab1.val2))
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab1.val2))
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: (tab1.val + tab1.val2)
+(6 rows)
+
+select val + val2, val, val2 from tab1 group by val, val2;
+ ?column? | val | val2
+----------+-----+------
+ 2 | 1 | 1
+ 4 | 1 | 3
+ 3 | 2 | 1
+ 4 | 2 | 2
+ 4 | 3 | 1
+ 7 | 4 | 3
+ 8 | 6 | 2
+ 9 | 6 | 3
+(8 rows)
+
+explain verbose select val + val2, val, val2 from tab1 group by val, val2;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ Group (cost=1.02..1.04 rows=1 width=8)
+ Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: (tab1.val + tab1.val2), tab1.val, tab1.val2
+(6 rows)
+
+select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+ ?column? | val | val2
+----------+-----+------
+ 2 | 1 | 1
+ 6 | 2 | 4
+ 5 | 3 | 2
+ 7 | 3 | 4
+ 5 | 4 | 1
+ 6 | 4 | 2
+(6 rows)
+
+explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ Group (cost=0.01..0.02 rows=1 width=0)
+ Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1 width=4)
+ Output: (tab1.val + tab2.val2), tab1.val, tab2.val2
+(6 rows)
+
+select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+ ?column?
+----------
+ 2
+ 5
+ 6
+ 7
+(4 rows)
+
+explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ Group (cost=0.01..0.02 rows=1 width=0)
+ Output: ((tab1.val + tab2.val2))
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab2.val2))
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1 width=4)
+ Output: (tab1.val + tab2.val2)
+(6 rows)
+
+reset enable_hashagg;
-- group by with aggregates in expression
select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
?column? | val2
diff --git a/src/test/regress/sql/xc_groupby.sql b/src/test/regress/sql/xc_groupby.sql
index 66dfabc35f..1fd0f50e11 100644
--- a/src/test/regress/sql/xc_groupby.sql
+++ b/src/test/regress/sql/xc_groupby.sql
@@ -10,9 +10,19 @@ explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val
-- aggregates over aggregates
select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
--- group by without aggregate, just like distinct?
+-- group by without aggregate
+set enable_hashagg to off;
select val2 from tab1 group by val2;
explain verbose select val2 from tab1 group by val2;
+select val + val2 from tab1 group by val + val2;
+explain verbose select val + val2 from tab1 group by val + val2;
+select val + val2, val, val2 from tab1 group by val, val2;
+explain verbose select val + val2, val, val2 from tab1 group by val, val2;
+select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+reset enable_hashagg;
-- group by with aggregates in expression
select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
@@ -36,9 +46,19 @@ explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val
-- aggregates over aggregates
select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
--- group by without aggregate, just like distinct?
+-- group by without aggregate
+set enable_hashagg to off;
select val2 from tab1 group by val2;
explain verbose select val2 from tab1 group by val2;
+select val + val2 from tab1 group by val + val2;
+explain verbose select val + val2 from tab1 group by val + val2;
+select val + val2, val, val2 from tab1 group by val, val2;
+explain verbose select val + val2, val, val2 from tab1 group by val, val2;
+select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+reset enable_hashagg;
-- group by with aggregates in expression
select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;