5 files changed, 1815 insertions, 520 deletions
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 97016f9723..a666914d7a 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -41,6 +41,7 @@
 #include "catalog/pg_proc.h"
 #include "catalog/pg_type.h"
 #include "executor/executor.h"
+#include "rewrite/rewriteManip.h"
 #endif
 #include "utils/lsyscache.h"
 
@@ -89,6 +90,8 @@ static Alias *generate_remote_rte_alias(RangeTblEntry *rte, int varno,
 					char *aliasname, int reduce_level);
 static void pgxc_locate_grouping_columns(PlannerInfo *root, List *tlist,
 											AttrNumber *grpColIdx);
+static List *pgxc_process_grouping_targetlist(PlannerInfo *root,
+												List **local_tlist);
 #endif
 static NestLoop *create_nestloop_plan(PlannerInfo *root, NestPath *best_path,
 					 Plan *outer_plan, Plan *inner_plan);
@@ -5056,7 +5059,7 @@ create_remotedelete_plan(PlannerInfo *root, Plan *topplan)
 }
 
 /*
- * create_remoteagg_plan
+ * create_remotegrouping_plan
  * Check if the grouping and aggregates can be pushed down to the
  * datanodes.
  * Right now we can push with following restrictions
@@ -5064,6 +5067,7 @@ create_remotedelete_plan(PlannerInfo *root, Plan *topplan)
  *    expressions in group by clauses
  * 2. No distinct or order by clauses
  * 3. No windowing clause
+ * 4. No having clause
  *
  * Inputs
  * root - planerInfo root for this query
@@ -5075,306 +5079,14 @@ create_remotedelete_plan(PlannerInfo *root, Plan *topplan)
  * node in case there are no local clauses.
  */
 Plan *
-create_remoteagg_plan(PlannerInfo *root, Plan *local_plan)
+create_remotegrouping_plan(PlannerInfo *root, Plan *local_plan)
 {
 	Query		*query = root->parse;
-	RemoteQuery	*agg_left;
-	Plan		*temp_plan = local_plan->lefttree;
-	List		*agg_tlist = local_plan->targetlist;
-	StringInfo	remote_sql_stmt = makeStringInfo();
-	StringInfo	remote_targetlist = makeStringInfo();
-	StringInfo	remote_fromlist = makeStringInfo();
-	StringInfo	groupby_clause = makeStringInfo();
-	StringInfo	in_alias = makeStringInfo();
-	ListCell	*temp;
-	ListCell	*temp_remote;
-	RemoteQuery *agg_remote;
-	Plan		*agg_remote_plan;
-	RangeTblEntry	*dummy_rte;
-	Index			dummy_rtindex;
-	List		*base_tlist;
-	Agg			*agg_plan = NULL;
-	List		*temp_vars;			/* temporarily hold the VARs */
-	List		*temp_vartlist;		/* temporarity hold tlist of VARs */
-	Relids		in_relids;			/* the list of Relids referenced by
-									 * the Agg plan
-									 */
-
-	/* For now only Agg plans */
-	Assert(IsA(local_plan, Agg));
-	agg_plan = (Agg *)local_plan;
-
-	/*
-	 * We don't push aggregation and grouping to datanodes, in case there are
-	 * windowing aggregates, distinct, having clause or sort clauses.
-	 */
-	if (query->hasWindowFuncs ||
-		query->distinctClause ||
-		query->sortClause ||
-		query->havingQual)
-		return local_plan;
-
-	/*
-	 * Optimize if only the tree underneath is reduced to RemoteQuery, any other
-	 * node there indicates that the scans can not be completely pushed to the
-	 * remote data nodes.
-	 * RemoteQuery is hidden underneath Material plan, take it out.
-	 */
-	if (IsA(temp_plan, Material))
-		temp_plan = temp_plan->lefttree;
-	if (!IsA(temp_plan, RemoteQuery))
-		return local_plan;
-	else
-		agg_left = (RemoteQuery *)temp_plan;
-
-	/*
-	 * Walk through the target list and find out whether we can push the
-	 * aggregates and grouping to datanodes. We can do so if the target list
-	 * contains plain aggregates (without any expression involving those) and
-	 * expressions in group by clauses only (last one to make the query legit.
-	 */
-	foreach(temp, agg_tlist)
-	{
-		TargetEntry	*tle = lfirst(temp);
-		Node		*expr = (Node *)tle->expr;
-
-		/*
-		 * PGXCTODO: once we allow sort clauses to be pushed to data nodes,
-		 * along with group by clause, this condition will need to be changed.
-		 */
-		if (!(IsA(expr, Aggref) || tle->ressortgroupref > 0))
-			return local_plan;
-	}
-
-	/*
-	 * Cleared of all the charges, now take following steps
-	 * 1. Create a remote query node reflecting the query to be pushed to the
-	 *    datanode
-	 * 2. Modify the Agg node passed in so that it reflects the aggregation
-	 *    (collection) to be done at the coordinator based on the results sent by
-	 *    the datanodes.
-	 */
-	appendStringInfo(in_alias, "%s_%d", "group", root->rs_alias_index);
-
-	/* Find all the relations referenced by targetlist of Agg node */
-	temp_vars = pull_var_clause((Node *)agg_tlist, PVC_REJECT_PLACEHOLDERS);
-	findReferencedVars(temp_vars, (Plan *)agg_left, &temp_vartlist, &in_relids);
-
-	/*
-	 * Build partial RemoteQuery node to be used for creating the Select clause
-	 * to be sent to the remote node. Rest of the node will be built later
-	 */
-	agg_remote = makeNode(RemoteQuery);
-
-	/*
-	 * Save information about the plan we are reducing.
-	 * We may need this information later if more entries are added to it
-	 * as part of the remote expression optimization.
-	 */
-	agg_remote->remotejoin		   = false;
-	agg_remote->inner_alias		   = pstrdup(in_alias->data);
-	agg_remote->inner_reduce_level = agg_left->reduce_level;
-	agg_remote->inner_relids       = in_relids;
-	agg_remote->inner_statement	   = pstrdup(agg_left->sql_statement);
-	agg_remote->exec_nodes         = agg_left->exec_nodes;
-
-	/* Don't forget to increment the index for the next time around! */
-	agg_remote->reduce_level	   = root->rs_alias_index++;
-
-	/* Generate the select clause of the remote query */
-	appendStringInfoString(remote_targetlist, "SELECT");
-	foreach (temp, agg_tlist)
-	{
-		TargetEntry *tle = lfirst(temp);
-		Node		*expr = (Node *)tle->expr;
-
-		create_remote_expr(root, local_plan, remote_targetlist, expr, agg_remote);
-
-		/* If this is not last target entry, add a comma with space */
-		if (lnext(temp))
-			appendStringInfoString(remote_targetlist, ",");
-	}
-
-	/* Generate the from clause of the remote query */
-	appendStringInfo(remote_fromlist, "FROM (%s) %s",
-							agg_remote->inner_statement, agg_remote->inner_alias);
-
-	/*
-	 * Generate group by clause for the remote query and recompute the group by
-	 * columE.n locations
-	 */
-	if (query->groupClause)
-	{
-		int cntCols;
-		Assert(IsA(local_plan, Agg));
-
-		/*
-		 * Recompute the column ids of the grouping columns,
-		 * the group column indexes computed earlier point in the
-		 * targetlists of the scan plans under this node. But now the grouping
-		 * column indexes will be pointing in the targetlist of the new
-		 * RemoteQuery, hence those need to be recomputed.
-		 */
-		pgxc_locate_grouping_columns(root, agg_tlist, agg_plan->grpColIdx);
-
-		appendStringInfoString(groupby_clause, "GROUP BY ");
-		for (cntCols = 0; cntCols < agg_plan->numCols; cntCols++)
-		{
-			appendStringInfo(groupby_clause, "%d",
-								agg_plan->grpColIdx[cntCols]);
-			if (cntCols < agg_plan->numCols - 1)
-				appendStringInfoString(groupby_clause, ", ");
-		}
-	}
-
-	/* Generate the remote sql statement from the pieces */
-	appendStringInfo(remote_sql_stmt, "%s %s %s", remote_targetlist->data,
-						remote_fromlist->data, groupby_clause->data);
-
-	/*
-	 * Set the base_tlist for the RemoteQuery node being created, it's used to
-	 * create the tuple descriptor for the result from RemoteQuery and rewrite
-	 * the Aggregates targetlist accept the results of the RemoteQuery.
-	 */
-	base_tlist = add_to_flat_tlist(NIL, get_tlist_exprs(agg_tlist, true));
-
-	/*
-	 * We need to change the return types of the aggregates. Datanodes send the
-	 * aggregation results in the form of transition results.
-	 */
-	foreach (temp, base_tlist)
-	{
-		TargetEntry *tle = lfirst(temp);
-		Node		*expr = (Node *)tle->expr;
-		Aggref		*agg;
-
-		if (IsA(expr, Aggref))
-		{
-			agg = (Aggref *)expr;
-			agg->aggtype = agg->aggtrantype;
-		}
-	}
-
-	/*
-	 * Create a dummy RTE for the remote query being created. Append the dummy
-	 * range table entry to the range table. Note that this modifies the master
-	 * copy the caller passed us, otherwise e.g EXPLAIN VERBOSE will fail to
-	 * find the rte the Vars built below refer to. Also create the tuple
-	 * descriptor for the result of this query from the base_tlist (targetlist
-	 * we used to generate the remote node query).
-	 */
-	dummy_rte = makeNode(RangeTblEntry);
-	dummy_rte->reltupdesc = ExecTypeFromTL(base_tlist, false);
-	dummy_rte->rtekind = RTE_RELATION;
-
-	/* Use a dummy relname... */
-	dummy_rte->relname	   = "__FOREIGN_QUERY__";
-	dummy_rte->eref		   = makeAlias("__FOREIGN_QUERY__", NIL);
-
-	/* Rest will be zeroed out in makeNode() */
-	root->parse->rtable = lappend(root->parse->rtable, dummy_rte);
-	dummy_rtindex = list_length(root->parse->rtable);
-
-	/*
-	 * Change the aggref nodes in the local Agg plan to accept the transition
-	 * results from the remote query output. Do this after we have created base
-	 * list, otherwise we might introduce these changes in the base list.
-	 * Do this after the RTE for the remote query is added to the root.
-	 */
-	forboth (temp, agg_tlist, temp_remote, base_tlist)
-	{
-		TargetEntry *tle = lfirst(temp);
-		Node		*expr = (Node *)tle->expr;
-		Aggref		*agg;
-		TargetEntry	*tle_remote = lfirst(temp_remote);
-		Node		*expr_remote = (Node *)tle_remote->expr;
-
-		if (IsA(expr, Aggref))
-		{
-			Assert(IsA(expr_remote, Aggref));
-
-			/*
-			 * Replace the args of the local Aggref with Aggref node to be
-			 * included in RemoteQuery node, so that set_plan_refs can convert
-			 * the args into VAR pointing to the appropriate result in the tuple
-			 * coming from RemoteQuery node.
-			 * PGXCTODO: should we push this change in targetlists of plans
-			 * above?
-			 */
-			agg = (Aggref *)expr;
-			agg->args = list_make1(makeTargetEntry(copyObject(expr_remote), 1, NULL, false));
-		}
-	}
-
-	/* Build rest of the RemoteQuery node and the plan there */
-	agg_remote_plan = &agg_remote->scan.plan;
-
-	/* The join targetlist becomes this node's tlist */
-	agg_remote_plan->targetlist = base_tlist;
-	agg_remote_plan->lefttree 	= NULL;
-	agg_remote_plan->righttree 	= NULL;
-	agg_remote->scan.scanrelid 	= dummy_rtindex;
-	agg_remote->sql_statement = remote_sql_stmt->data;
-
-	/* set_plan_refs needs this later */
-	agg_remote->base_tlist		= base_tlist;
-	agg_remote->relname			= "__FOREIGN_QUERY__";
-	agg_remote->partitioned_replicated = agg_left->partitioned_replicated;
-
-	/*
-	 * Only quals that can be pushed to the remote side the ones in the having
-	 * clause. Till we work out how to handle having quals in XC, we don't have
-	 * any quals here.
-	 * PGXCTODO: the RemoteQuery node that was earlier the lefttree of Agg
-	 * node, may have local quals. In such case, we have to aggregate and group
-	 * at coordinator and can not push the grouping clause to the datanodes. Is
-	 * there a case in XC, where we can have local quals?
-	 * We actually need not worry about costs since this is the final plan.
-	 */
-	agg_remote_plan->startup_cost = agg_left->scan.plan.startup_cost;
-	agg_remote_plan->total_cost   = agg_left->scan.plan.total_cost;
-	agg_remote_plan->plan_rows 	  = agg_left->scan.plan.plan_rows;
-	agg_remote_plan->plan_width   = agg_left->scan.plan.plan_width;
-
-	/*
-	 * Modify the passed in Agg plan according to the remote query we built.
-	 * Materialization is always needed for RemoteQuery in case we need to restart
-	 * the scan.
-	 */
-	agg_plan->plan.lefttree = (Plan *) make_material(agg_remote_plan);
-
-	/* Indicate that we should apply collection function directly */
-	agg_plan->skip_trans = true;
-
-	return (Plan *)agg_plan;
-}
-
-/*
- * create_remotegroup_plan
- * Given a Group plan, try to push as much of the query to the datanodes and
- * build a Group plan to combiner the results across the datanodes. The Sort
- * node under the Group plan is pushed down to RemoteQuery plan, since the
- * combiner knows how to merge the results across datanodes in sorted manner.
- * Hence there is no separate Sort node.
- *
- * This optimization is applied under following conditions
- * 1. The scan plans under the Group->Sort node is RemoteQuery
- * 2. There is not separate Sort, distinct, having clause in the query.
- *
- * PGXCTODO: we should lift up as many of these restrictions as possible or give
- * reasons why those restrictions are needed.
- */
-Plan *
-create_remotegroup_plan(PlannerInfo *root, Plan *local_plan)
-{
-	Group		*group_plan;
-	Query		*query = root->parse;
 	Sort		*sort_plan;
 	RemoteQuery	*remote_scan;	/* remote query in the passed in plan */
 	RemoteQuery	*remote_group;	/* remote query after optimization */
 	Plan		*remote_group_plan;	/* plan portion of remote_group */
 	Plan		*temp_plan;
-	List		*local_tlist;	/* target list of the local plan */
 	List		*temp_vars;			/* temporarily hold the VARs */
 	List		*temp_vartlist;		/* temporarity hold tlist of VARs */
 	ListCell	*temp;
@@ -5384,59 +5096,110 @@ create_remotegroup_plan(PlannerInfo *root, Plan *local_plan)
 	StringInfo	orderby_clause = makeStringInfo();	/* remote query ORDER BY */
 	StringInfo	remote_fromlist = makeStringInfo();	/* remote query FROM */
 	StringInfo	in_alias = makeStringInfo();
-	Relids		in_relids;
+	Relids		in_relids;			/* the list of Relids referenced by lefttree */
 	Index		dummy_rtindex;
 	List		*base_tlist;
 	RangeTblEntry	*dummy_rte;
-	int			cntCols;
+	int			numGroupCols;
+	AttrNumber	*grpColIdx;
 
-	if (query->havingQual ||
+	/*
+	 * We don't push aggregation and grouping to datanodes, in case there are
+	 * windowing aggregates, distinct, having clause or sort clauses.
+	 */
+	if (query->hasWindowFuncs ||
 		query->distinctClause ||
 		query->sortClause ||
-		query->hasWindowFuncs)
+		query->havingQual)
 		return local_plan;
 
-	/* For now only for Group plans are treated */
-	Assert(IsA(local_plan, Group));
-	group_plan = (Group *)local_plan;
-	remote_scan = NULL;
-	temp_plan = local_plan->lefttree;
+	/*
+	 * PGXCTODO: we don't support the parameterised queries yet. So, for the
+	 * time being we don't apply the optimizations for parameterised queries
+	 */
+	if (root->glob->boundParams)
+		return local_plan;
+	/* for now only Agg/Group plans */
+	if (local_plan && IsA(local_plan, Agg))
+	{
+		numGroupCols = ((Agg *)local_plan)->numCols;
+		grpColIdx = ((Agg *)local_plan)->grpColIdx;
+	}
+	else if (local_plan && IsA(local_plan, Group))
+	{
+		numGroupCols = ((Group *)local_plan)->numCols;
+		grpColIdx = ((Group *)local_plan)->grpColIdx;
+	}
+	else
+		return local_plan;
 
 	/*
-	 * We expect plan tree as Group->Sort->{Result}?->{Material}?->RemoteQuery,
+	 * We expect plan tree as Group/Agg->Sort->Result->Material->RemoteQuery,
+	 * Result, Material nodes are optional. Sort is compulsory for Group but not
+	 * for Agg.
 	 * anything else is not handled right now.
 	 */
-	if (IsA(temp_plan, Sort))
+	temp_plan = local_plan->lefttree;
+	remote_scan = NULL;
+	sort_plan = NULL;
+	if (temp_plan && IsA(temp_plan, Sort))
 	{
 		sort_plan = (Sort *)temp_plan;
 		temp_plan = temp_plan->lefttree;
 	}
-	if (IsA(temp_plan, Result))
+	if (temp_plan && IsA(temp_plan, Result))
 		temp_plan = temp_plan->lefttree;
-	if (IsA(temp_plan, Material))
+	if (temp_plan && IsA(temp_plan, Material))
 		temp_plan = temp_plan->lefttree;
-	if (IsA(temp_plan, RemoteQuery))
+	if (temp_plan && IsA(temp_plan, RemoteQuery))
 		remote_scan = (RemoteQuery *)temp_plan;
 
-	if (!remote_scan || !sort_plan)
+	if (!remote_scan)
+		return local_plan;
+	/*
+	 * for Group plan we expect Sort under the Group, which is always the case,
+	 * the condition below is really for some possible non-existent case
+	 */
+	if (IsA(local_plan, Group) && !sort_plan)
 		return local_plan;
 
-	Assert(IsA(remote_scan, RemoteQuery));
-	Assert(IsA(sort_plan, Sort));
 
 	/*
-	 * grouping_planner will add Sort node before Group node to sort the rows
+	 * Grouping_planner may add Sort node to sort the rows
 	 * based on the columns in GROUP BY clause. Hence the columns in Sort and
 	 * those in Group node in should be same. The columns are usually in the
 	 * same order in both nodes, hence check the equality in order. If this
-	 * condition fails, we can not handle this GROUP plan for now.
+	 * condition fails, we can not handle this plan for now.
 	 */
-	if (sort_plan->numCols != group_plan->numCols)
-		return local_plan;
-	for (cntCols = 0; cntCols < group_plan->numCols; cntCols++)
+	if (sort_plan)
 	{
-		if (sort_plan->sortColIdx[cntCols] != group_plan->grpColIdx[cntCols])
+		int cntCols;
+		if (sort_plan->numCols != numGroupCols)
 			return local_plan;
+		for (cntCols = 0; cntCols < numGroupCols; cntCols++)
+		{
+			if (sort_plan->sortColIdx[cntCols] != grpColIdx[cntCols])
+				return local_plan;
+		}
+	}
+
+	/* find all the relations referenced by targetlist of Grouping node */
+	temp_vars = pull_var_clause((Node *)local_plan->targetlist,
+									PVC_REJECT_PLACEHOLDERS);
+	findReferencedVars(temp_vars, (Plan *)remote_scan, &temp_vartlist, &in_relids);
+
+	/*
+	 * process the targetlist of the grouping plan, also construct the
+	 * targetlist of the query to be shipped to the remote side
+	 */
+	base_tlist = pgxc_process_grouping_targetlist(root, &(local_plan->targetlist));
+	if (!base_tlist)
+	{
+		/*
+		 * for some reason we can not construct a targetlist shippable to the
+		 * datanode. Resort to the plan created by grouping_planner()
+		 */
+		return local_plan;
 	}
 
 	/*
@@ -5444,16 +5207,11 @@ create_remotegroup_plan(PlannerInfo *root, Plan *local_plan)
 	 * datanode.
 	 * 1. Create a remote query node reflecting the query to be pushed to the
 	 *    datanode.
-	 * 2. Modify the Group node passed in, to accept the results sent by the
-	 *    datanodes and group them.
+	 * 2. Modify the Grouping node passed in, to accept the results sent by the
+	 *    Datanodes, then group and aggregate them, if needed.
 	 */
-	local_tlist = local_plan->targetlist;
 	appendStringInfo(in_alias, "%s_%d", "group", root->rs_alias_index);
 
-	/* Find all the relations referenced by targetlist of Group node */
-	temp_vars = pull_var_clause((Node *)local_tlist, PVC_REJECT_PLACEHOLDERS);
-	findReferencedVars(temp_vars, (Plan *)remote_scan, &temp_vartlist, &in_relids);
-
 	/*
 	 * Build partial RemoteQuery node to be used for creating the Select clause
 	 * to be sent to the remote node. Rest of the node will be built later
@@ -5471,13 +5229,12 @@ create_remotegroup_plan(PlannerInfo *root, Plan *local_plan)
 	remote_group->inner_relids      	= in_relids;
 	remote_group->inner_statement		= pstrdup(remote_scan->sql_statement);
 	remote_group->exec_nodes			= remote_scan->exec_nodes;
-
 	/* Don't forget to increment the index for the next time around! */
 	remote_group->reduce_level			= root->rs_alias_index++;
 
 	/* Generate the select clause of the remote query */
 	appendStringInfoString(remote_targetlist, "SELECT");
-	foreach (temp, local_tlist)
+	foreach (temp, base_tlist)
 	{
 		TargetEntry *tle = lfirst(temp);
 		Node		*expr = (Node *)tle->expr;
@@ -5503,32 +5260,47 @@ create_remotegroup_plan(PlannerInfo *root, Plan *local_plan)
 	 */
 	if (query->groupClause)
 	{
-		SimpleSort		*remote_sort = makeNode(SimpleSort);
-		char			*sep = "";
+		int cntCols;
+		char *sep;
 
 		/*
-		 * Reuse the arrays allocated in sort_plan to create SimpleSort
-		 * structure. sort_plan is useless henceforth.
+		 * recompute the column ids of the grouping columns,
+		 * the group column indexes computed earlier point in the
+		 * targetlists of the scan plans under this node. But now the grouping
+		 * column indexes will be pointing in the targetlist of the new
+		 * RemoteQuery, hence those need to be recomputed
 		 */
-		remote_sort->numCols = group_plan->numCols;
-		remote_sort->sortColIdx = sort_plan->sortColIdx;
-		remote_sort->sortOperators = sort_plan->sortOperators;
-		remote_sort->nullsFirst = sort_plan->nullsFirst;
-
-		pgxc_locate_grouping_columns(root, local_tlist, group_plan->grpColIdx);
+		pgxc_locate_grouping_columns(root, base_tlist, grpColIdx);
 
 		appendStringInfoString(groupby_clause, "GROUP BY ");
-		appendStringInfoString(orderby_clause, "ORDER BY ");
-		for (cntCols = 0; cntCols < group_plan->numCols; cntCols++)
+		sep = "";
+		for (cntCols = 0; cntCols < numGroupCols; cntCols++)
 		{
-			appendStringInfo(groupby_clause, "%s%d", sep,
-								group_plan->grpColIdx[cntCols]);
-			remote_sort->sortColIdx[cntCols] = group_plan->grpColIdx[cntCols];
-			appendStringInfo(orderby_clause, "%s%d", sep,
-								remote_sort->sortColIdx[cntCols]);
+			appendStringInfo(groupby_clause, "%s%d", sep, grpColIdx[cntCols]);
 			sep = ", ";
 		}
-		remote_group->sort = remote_sort;
+		if (sort_plan)
+		{
+			SimpleSort		*remote_sort = makeNode(SimpleSort);
+			/*
+			 * reuse the arrays allocated in sort_plan to create SimpleSort
+			 * structure. sort_plan is useless henceforth.
+			 */
+			remote_sort->numCols = sort_plan->numCols;
+			remote_sort->sortColIdx = sort_plan->sortColIdx;
+			remote_sort->sortOperators = sort_plan->sortOperators;
+			remote_sort->nullsFirst = sort_plan->nullsFirst;
+			appendStringInfoString(orderby_clause, "ORDER BY ");
+			sep = "";
+			for (cntCols = 0; cntCols < remote_sort->numCols; cntCols++)
+			{
+				remote_sort->sortColIdx[cntCols] = grpColIdx[cntCols];
+				appendStringInfo(orderby_clause, "%s%d", sep,
+								remote_sort->sortColIdx[cntCols]);
+				sep = ", ";
+			}
+			remote_group->sort = remote_sort;
+		}
 	}
 
 	/* Generate the remote sql statement from the pieces */
@@ -5537,20 +5309,13 @@ create_remotegroup_plan(PlannerInfo *root, Plan *local_plan)
 						orderby_clause->data);
 
 	/*
-	 * Set the base_tlist for the RemoteQuery node being created, it's used to
-	 * create the tuple descriptor for the result from RemoteQuery and rewrite
-	 * the Aggregates targetlist accept the results of the RemoteQuery.
-	 */
-	base_tlist = add_to_flat_tlist(NIL, get_tlist_exprs(local_tlist, true));
-
-	/*
 	 * Create a dummy RTE for the remote query being created. Append the dummy
 	 * range table entry to the range table. Note that this modifies the master
 	 * copy the caller passed us, otherwise e.g EXPLAIN VERBOSE will fail to
-	 * find the rte the Vars built below refer to.
+	 * find the rte the Vars built below refer to. Also create the tuple
+	 * descriptor for the result of this query from the base_tlist (targetlist
+	 * we used to generate the remote node query).
 	 */
-
-	/* Cook up the reltupdesc using this base_tlist */
 	dummy_rte = makeNode(RangeTblEntry);
 	dummy_rte->reltupdesc = ExecTypeFromTL(base_tlist, false);
 	dummy_rte->rtekind = RTE_RELATION;
@@ -5577,6 +5342,7 @@ create_remotegroup_plan(PlannerInfo *root, Plan *local_plan)
 	remote_group->base_tlist		= base_tlist;
 	remote_group->relname			= "__FOREIGN_QUERY__";
 	remote_group->partitioned_replicated = remote_scan->partitioned_replicated;
+	remote_group->read_only = query->commandType == CMD_SELECT;
 
 	/*
 	 * Only quals that can be pushed to the remote side are the ones in the having
@@ -5587,21 +5353,24 @@ create_remotegroup_plan(PlannerInfo *root, Plan *local_plan)
 	 * node, may have local quals. In such case, we have to aggregate and group
 	 * at coordinator and can not push the grouping clause to the datanodes. Is
 	 * there a case in XC, where we can have local quals?
+	 * we actually need not worry about costs since this is the final plan
 	 */
-
-	/* We actually do not need to worry about costs since this is the final plan */
 	remote_group_plan->startup_cost	= remote_scan->scan.plan.startup_cost;
 	remote_group_plan->total_cost	= remote_scan->scan.plan.total_cost;
 	remote_group_plan->plan_rows	= remote_scan->scan.plan.plan_rows;
 	remote_group_plan->plan_width	= remote_scan->scan.plan.plan_width;
 
 	/*
-	 * Modify the passed in Group plan according to the remote query we built.
-	 * Materialization is always need for RemoteQuery in case we need to restart
+	 * Modify the passed in grouping plan according to the remote query we built
+	 * Materialization is always needed for RemoteQuery in case we need to restart
 	 * the scan.
 	 */
-	group_plan->plan.lefttree = (Plan *) make_material(remote_group_plan);
-	return (Plan *)group_plan;
+	local_plan->lefttree = (Plan *) make_material(remote_group_plan);
+	/* indicate that we should apply collection function directly */
+	if (IsA(local_plan, Agg))
+		((Agg *)local_plan)->skip_trans = true;
+
+	return local_plan;
 }
 
 /*
@@ -5638,4 +5407,127 @@ pgxc_locate_grouping_columns(PlannerInfo *root, List *tlist,
 		groupColIdx[keyno++] = te->resno;
 	}
 }
+
+/*
+ * pgxc_process_grouping_targetlist
+ * The function scans the targetlist to check if the we can push anything
+ * from the targetlist to the datanode. Following rules govern the choice
+ * 1. Either all of the aggregates are pushed to the datanode or none is pushed
+ * 2. If there are no aggregates, the targetlist is good to be shipped as is
+ * 3. If aggregates are involved in expressions, we push the aggregates to the
+ *    datanodes but not the involving expressions.
+ *
+ * The function constructs the targetlist for the query to be pushed to the
+ * datanode. It modifies the local targetlist to point to the expressions in
+ * remote targetlist wherever necessary (e.g. aggregates)
+ *
+ * PGXCTODO: we should be careful while pushing the function expressions, it's
+ * better to push functions like strlen() which can be evaluated at the
+ * datanode, but we should avoid pushing functions which can only be evaluated
+ * at coordinator.
+ */
+static List *
+pgxc_process_grouping_targetlist(PlannerInfo *root, List **local_tlist)
+{
+	bool	shippable_remote_tlist = true;
+	List	*remote_tlist = NIL;
+	int		next_resno = 1;	/* resno start from 1 */
+	List	*orig_local_tlist = NIL;/* Copy original local_tlist, in case it changes */
+	ListCell	*temp;
+	Query	*query = root->parse;
+
+	/*
+	 * Walk through the target list and find out whether we can push the
+	 * aggregates and grouping to datanodes. We can do so if the target list
+	 * contains plain aggregates (without any expression involving those) and
+	 * expressions in group by clauses only (last one to make the query legit.
+	 */
+	foreach(temp, *local_tlist)
+	{
+		TargetEntry	*local_tle = lfirst(temp);
+		TargetEntry *remote_tle;
+		Node		*expr = (Node *)local_tle->expr;
+
+		if (IsA(expr, Aggref))
+		{
+			Aggref	*aggref = (Aggref *)expr;
+			if (aggref->aggorder || aggref->aggdistinct || aggref->agglevelsup)
+			{
+				shippable_remote_tlist = false;
+				break;
+			}
+		}
+		else if (query->hasAggs && checkExprHasAggs(expr))
+		{
+			/*
+			 * Targetlist expressions which have aggregates embedded inside
+			 * are not handled right now.
+			 * PGXCTODO: We should be able to extract those aggregates out.
+			 * Add those to remote targetlist and modify the local
+			 * targetlist accordingly. Thus we get those aggregates grouped
+			 * and "transitioned" at the datanode.
+			 */
+			shippable_remote_tlist = false;
+			break;
+		}
+
+		remote_tle = makeTargetEntry(copyObject(expr),
+							  next_resno++,
+							  NULL,
+							  false);
+		/* Copy GROUP BY/SORT BY reference for the locating group by columns */
+		remote_tle->ressortgroupref = local_tle->ressortgroupref;
+		remote_tlist = lappend(remote_tlist, remote_tle);
+
+		/*
+		 * Replace the args of the local Aggref with Aggref node to be
+		 * included in RemoteQuery node, so that set_plan_refs can convert
+		 * the args into VAR pointing to the appropriate result in the tuple
+		 * coming from RemoteQuery node
+		 * PGXCTODO: should we push this change in targetlists of plans
+		 * above?
+		 */
+		if (IsA(expr, Aggref))
+		{
+			Aggref	*local_aggref = (Aggref *)expr;
+			Aggref	*remote_aggref = (Aggref *)remote_tle->expr;
+			Assert(IsA(remote_tle->expr, Aggref));
+			remote_aggref->aggtype = remote_aggref->aggtrantype;
+			/*
+			 * We are about to change the local_tlist, check if we have already
+			 * copied original local_tlist, if not take a copy
+			 */
+			if (!orig_local_tlist)
+				orig_local_tlist = copyObject(*local_tlist);
+			/* Is copyObject() needed here? probably yes */
+			local_aggref->args = list_make1(makeTargetEntry(copyObject(remote_tle->expr),
+																	1, NULL,
+																	false));
+		}
+	}
+
+	if (!shippable_remote_tlist)
+	{
+		/*
+		 * If local_tlist has changed but we didn't find anything shippable to
+		 * datanode, we need to restore the local_tlist to original state,
+		 */
+		if (orig_local_tlist)
+			*local_tlist = orig_local_tlist;
+		if (remote_tlist)
+			list_free_deep(remote_tlist);
+		remote_tlist = NIL;
+	}
+	else if (orig_local_tlist)
+	{
+		/*
+		 * If we have changed the targetlist passed, we need to pass back the
+		 * changed targetlist. Free the copy that has been created.
+		 */
+		list_free_deep(orig_local_tlist);
+	}
+
+	return remote_tlist;
+}
+
 #endif
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index e64e938648..5250905b59 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1334,16 +1334,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 												numGroups,
 												agg_counts.numAggs,
 												result_plan);
-#ifdef PGXC
-				/*
-				 * Grouping will certainly not increase the number of rows
-				 * coordinator fetches from datanode, in fact it's expected to
-				 * reduce the number drastically. Hence, try pushing GROUP BY
-				 * clauses and aggregates to the datanode, thus saving bandwidth.
-				 */
-				if (IS_PGXC_COORDINATOR && !IsConnFromCoord())
-					result_plan = create_remoteagg_plan(root, result_plan);
-#endif /* PGXC */
 				/* Hashed aggregation produces randomly-ordered results */
 				current_pathkeys = NIL;
 			}
@@ -1415,16 +1405,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 									extract_grouping_ops(parse->groupClause),
 												  dNumGroups,
 												  result_plan);
-#ifdef PGXC
-				/*
-				 * Grouping will certainly not increase the number of rows
-				 * coordinator fetches from datanode, in fact it's expected to
-				 * reduce the number drastically. Hence, try pushing GROUP BY
-				 * clauses and aggregates to the datanode, thus saving bandwidth.
-				 */
-				if (IS_PGXC_COORDINATOR && !IsConnFromCoord())
-					result_plan = create_remotegroup_plan(root, result_plan);
-#endif /* PGXC */
 			}
 			else if (root->hasHavingQual)
 			{
@@ -1445,6 +1425,17 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 												   parse->havingQual,
 												   NULL);
 			}
+#ifdef PGXC
+			/*
+			 * Grouping will certainly not increase the number of rows
+			 * coordinator fetches from datanode, in fact it's expected to
+			 * reduce the number drastically. Hence, try pushing GROUP BY
+			 * clauses and aggregates to the datanode, thus saving bandwidth.
+			 */
+			if (IS_PGXC_COORDINATOR && !IsConnFromCoord())
+				result_plan = create_remotegrouping_plan(root, result_plan);
+#endif /* PGXC */
+
 		}						/* end of non-minmax-aggregate case */
 
 		/*
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 8005d47c64..e1662eac8d 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -137,8 +137,7 @@ extern Var *search_tlist_for_var(Var *var, List *jtlist);
 extern Plan *create_remoteinsert_plan(PlannerInfo *root, Plan *topplan);
 extern Plan *create_remoteupdate_plan(PlannerInfo *root, Plan *topplan);
 extern Plan *create_remotedelete_plan(PlannerInfo *root, Plan *topplan);
-extern Plan *create_remotegroup_plan(PlannerInfo *root, Plan *local_plan);
-extern Plan *create_remoteagg_plan(PlannerInfo *root, Plan *agg_plan);
+extern Plan *create_remotegrouping_plan(PlannerInfo *root, Plan *local_plan);
 #endif
 
 #endif   /* PLANMAIN_H */
diff --git a/src/test/regress/expected/xc_groupby.out b/src/test/regress/expected/xc_groupby.out
index e403e37d47..31e9a8cb2e 100644
--- a/src/test/regress/expected/xc_groupby.out
+++ b/src/test/regress/expected/xc_groupby.out
@@ -1,3 +1,10 @@
+-- this file contains tests for GROUP BY with combinations of following
+-- 1. enable_hashagg = on/off (to force the grouping by sorting)
+-- 2. distributed or replicated tables across the datanodes
+-- If a testcase is added to any of the combinations, please check if it's
+-- applicable in other combinations as well.
+-- Combination 1: enable_hashagg on and distributed tables
+set enable_hashagg to on;
 -- create required tables and fill them with data
 create table tab1 (val int, val2 int);
 create table tab2 (val int, val2 int);
@@ -11,6 +18,17 @@ select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 g
      3 |  11 | 3.6666666666666667 | 3.66666666666667 |    3
 (3 rows)
 
+explain verbose select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2;
+                                                 QUERY PLAN                                                  
+-------------------------------------------------------------------------------------------------------------
+ HashAggregate  (cost=1.03..1.06 rows=1 width=8)
+   Output: count(*), sum(val), avg(val), ((sum(val))::double precision / (count(*))::double precision), val2
+   ->  Materialize  (cost=0.00..1.01 rows=1 width=8)
+         Output: val, val2
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=8)
+               Output: val, val2
+(6 rows)
+
 -- joins and group by
 select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2;
  count | sum |         avg         |     ?column?     | val2 | val2 
@@ -34,14 +52,14 @@ explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val
                Sort Key: tab1.val2
                ->  Materialize  (cost=0.00..1.01 rows=1 width=8)
                      Output: tab1.val, tab1.val2
-                     ->  Data Node Scan (Node Count [2]) "SELECT val, val2 FROM public.tab1 tab1" on tab1  (cost=0.00..1.01 rows=1000 width=8)
+                     ->  Data Node Scan (Node Count [2]) on tab1  (cost=0.00..1.01 rows=1000 width=8)
                            Output: tab1.val, tab1.val2
          ->  Sort  (cost=1.02..1.03 rows=1 width=8)
                Output: tab2.val, tab2.val2
                Sort Key: tab2.val2
                ->  Materialize  (cost=0.00..1.01 rows=1 width=8)
                      Output: tab2.val, tab2.val2
-                     ->  Data Node Scan (Node Count [2]) "SELECT val, val2 FROM public.tab2 tab2" on tab2  (cost=0.00..1.01 rows=1000 width=8)
+                     ->  Data Node Scan (Node Count [2]) on tab2  (cost=0.00..1.01 rows=1000 width=8)
                            Output: tab2.val, tab2.val2
 (19 rows)
 
@@ -57,17 +75,16 @@ explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group
                                        QUERY PLAN                                       
 ----------------------------------------------------------------------------------------
  HashAggregate  (cost=1.05..1.06 rows=1 width=12)
-   Output: sum((sum(tab1.val))), ((tab1.val2 % 2))
+   Output: sum((pg_catalog.sum((sum(tab1.val))))), ((tab1.val2 % 2))
    ->  HashAggregate  (cost=1.02..1.03 rows=1 width=8)
-         Output: sum(tab1.val), (tab1.val2 % 2), tab1.val2
-         ->  Materialize  (cost=0.00..1.01 rows=1 width=8)
-               Output: tab1.val, tab1.val2
+         Output: pg_catalog.sum((sum(tab1.val))), ((tab1.val2 % 2)), tab1.val2
+         ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+               Output: (sum(tab1.val)), ((tab1.val2 % 2)), tab1.val2
                ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=8)
-                     Output: tab1.val, tab1.val2
+                     Output: sum(tab1.val), (tab1.val2 % 2), tab1.val2
 (8 rows)
 
 -- group by without aggregate
-set enable_hashagg to off;
 select val2 from tab1 group by val2;
  val2 
 ------
@@ -79,7 +96,7 @@ select val2 from tab1 group by val2;
 explain verbose select val2 from tab1 group by val2;
                                     QUERY PLAN                                    
 ----------------------------------------------------------------------------------
- Group  (cost=1.02..1.03 rows=1 width=4)
+ HashAggregate  (cost=1.02..1.03 rows=1 width=4)
    Output: tab1.val2
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: tab1.val2
@@ -90,18 +107,18 @@ explain verbose select val2 from tab1 group by val2;
 select val + val2 from tab1 group by val + val2;
  ?column? 
 ----------
-        2
-        3
         4
         7
+        3
         8
         9
+        2
 (6 rows)
 
 explain verbose select val + val2 from tab1 group by val + val2;
                                     QUERY PLAN                                    
 ----------------------------------------------------------------------------------
- Group  (cost=1.03..1.04 rows=1 width=8)
+ HashAggregate  (cost=1.02..1.03 rows=1 width=8)
    Output: ((tab1.val + tab1.val2))
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: ((tab1.val + tab1.val2))
@@ -112,12 +129,12 @@ explain verbose select val + val2 from tab1 group by val + val2;
 select val + val2, val, val2 from tab1 group by val, val2;
  ?column? | val | val2 
 ----------+-----+------
-        2 |   1 |    1
+        7 |   4 |    3
+        4 |   3 |    1
         4 |   1 |    3
-        3 |   2 |    1
         4 |   2 |    2
-        4 |   3 |    1
-        7 |   4 |    3
+        3 |   2 |    1
+        2 |   1 |    1
         8 |   6 |    2
         9 |   6 |    3
 (8 rows)
@@ -125,7 +142,7 @@ select val + val2, val, val2 from tab1 group by val, val2;
 explain verbose select val + val2, val, val2 from tab1 group by val, val2;
                                     QUERY PLAN                                    
 ----------------------------------------------------------------------------------
- Group  (cost=1.02..1.04 rows=1 width=8)
+ HashAggregate  (cost=1.02..1.03 rows=1 width=8)
    Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
@@ -136,18 +153,18 @@ explain verbose select val + val2, val, val2 from tab1 group by val, val2;
 select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
  ?column? | val | val2 
 ----------+-----+------
-        2 |   1 |    1
-        6 |   2 |    4
         5 |   3 |    2
-        7 |   3 |    4
         5 |   4 |    1
         6 |   4 |    2
+        6 |   2 |    4
+        2 |   1 |    1
+        7 |   3 |    4
 (6 rows)
 
 explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
                                   QUERY PLAN                                   
 -------------------------------------------------------------------------------
- Group  (cost=0.01..0.02 rows=1 width=0)
+ HashAggregate  (cost=0.00..0.01 rows=1 width=0)
    Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
@@ -158,16 +175,16 @@ explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2
 select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
  ?column? 
 ----------
-        2
-        5
         6
+        2
         7
+        5
 (4 rows)
 
 explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
                                   QUERY PLAN                                   
 -------------------------------------------------------------------------------
- Group  (cost=0.01..0.02 rows=1 width=0)
+ HashAggregate  (cost=0.00..0.01 rows=1 width=0)
    Output: ((tab1.val + tab2.val2))
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: ((tab1.val + tab2.val2))
@@ -175,7 +192,6 @@ explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab
                Output: (tab1.val + tab2.val2)
 (6 rows)
 
-reset enable_hashagg;
 -- group by with aggregates in expression
 select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
       ?column?       | val2 
@@ -206,20 +222,366 @@ select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
 (3 rows)
 
 explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
-                                       QUERY PLAN                                       
-----------------------------------------------------------------------------------------
+                                          QUERY PLAN                                           
+-----------------------------------------------------------------------------------------------
  HashAggregate  (cost=1.02..1.04 rows=1 width=8)
-   Output: sum(val), avg(val), ((2 * val2))
-   ->  Result  (cost=0.00..1.02 rows=1 width=8)
-         Output: val, val2, (2 * val2)
-         ->  Materialize  (cost=0.00..1.01 rows=1 width=8)
-               Output: val, val2
-               ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=8)
-                     Output: val, val2
-(8 rows)
+   Output: pg_catalog.sum((sum(tab1.val))), pg_catalog.avg((avg(tab1.val))), ((2 * tab1.val2))
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (sum(tab1.val)), (avg(tab1.val)), ((2 * tab1.val2))
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=8)
+               Output: sum(tab1.val), avg(tab1.val), (2 * tab1.val2)
+(6 rows)
 
 drop table tab1;
 drop table tab2;
+-- some tests involving nulls, characters, float type etc.
+create table def(a int, b varchar(25)); 
+insert into def VALUES (NULL, NULL);
+insert into def VALUES (1, NULL);
+insert into def VALUES (NULL, 'One');
+insert into def VALUES (2, 'Two');
+insert into def VALUES (2, 'Two');
+insert into def VALUES (3, 'Three');
+insert into def VALUES (4, 'Three');
+insert into def VALUES (5, 'Three');
+insert into def VALUES (6, 'Two');
+insert into def VALUES (7, NULL);
+insert into def VALUES (8, 'Two');
+insert into def VALUES (9, 'Three');
+insert into def VALUES (10, 'Three');
+select a,count(a) from def group by a order by a;
+ a  | count 
+----+-------
+  1 |     1
+  2 |     2
+  3 |     1
+  4 |     1
+  5 |     1
+  6 |     1
+  7 |     1
+  8 |     1
+  9 |     1
+ 10 |     1
+    |     0
+(11 rows)
+
+explain verbose select a,count(a) from def group by a order by a;
+                                          QUERY PLAN                                          
+----------------------------------------------------------------------------------------------
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=4)
+   Output: a, count(a)
+   ->  Sort  (cost=1.02..1.03 rows=1 width=4)
+         Output: a
+         Sort Key: def.a
+         ->  Result  (cost=0.00..1.01 rows=1 width=4)
+               Output: a
+               ->  Materialize  (cost=0.00..1.01 rows=1 width=4)
+                     Output: a, b
+                     ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=4)
+                           Output: a, b
+(11 rows)
+
+select avg(a) from def group by a; 
+          avg           
+------------------------
+                       
+     6.0000000000000000
+     8.0000000000000000
+     5.0000000000000000
+     2.0000000000000000
+ 1.00000000000000000000
+     9.0000000000000000
+     3.0000000000000000
+    10.0000000000000000
+     7.0000000000000000
+     4.0000000000000000
+(11 rows)
+
+select avg(a) from def group by a;
+          avg           
+------------------------
+                       
+     6.0000000000000000
+     8.0000000000000000
+     5.0000000000000000
+ 1.00000000000000000000
+     9.0000000000000000
+     2.0000000000000000
+    10.0000000000000000
+     7.0000000000000000
+     3.0000000000000000
+     4.0000000000000000
+(11 rows)
+
+explain verbose select avg(a) from def group by a;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ HashAggregate  (cost=1.02..1.03 rows=1 width=4)
+   Output: pg_catalog.avg((avg(def.a))), def.a
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (avg(def.a)), def.a
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=4)
+               Output: avg(def.a), def.a
+(6 rows)
+
+select avg(a) from def group by b;
+        avg         
+--------------------
+ 4.0000000000000000
+                   
+ 4.5000000000000000
+ 6.2000000000000000
+(4 rows)
+
+explain verbose select avg(a) from def group by b;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ HashAggregate  (cost=1.02..1.03 rows=1 width=33)
+   Output: pg_catalog.avg((avg(def.a))), def.b
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (avg(def.a)), def.b
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=33)
+               Output: avg(def.a), def.b
+(6 rows)
+
+select sum(a) from def group by b;
+ sum 
+-----
+   8
+    
+  18
+  31
+(4 rows)
+
+explain verbose select sum(a) from def group by b;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ HashAggregate  (cost=1.02..1.03 rows=1 width=33)
+   Output: pg_catalog.sum((sum(def.a))), def.b
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (sum(def.a)), def.b
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=33)
+               Output: sum(def.a), def.b
+(6 rows)
+
+select count(*) from def group by b;
+ count 
+-------
+     3
+     1
+     4
+     5
+(4 rows)
+
+explain verbose select count(*) from def group by b;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ HashAggregate  (cost=1.02..1.03 rows=1 width=29)
+   Output: pg_catalog.count(*), def.b
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (count(*)), def.b
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=29)
+               Output: count(*), def.b
+(6 rows)
+
+select count(*) from def where a is not null group by a;
+ count 
+-------
+     1
+     1
+     1
+     1
+     1
+     1
+     2
+     1
+     1
+     1
+(10 rows)
+
+explain verbose select count(*) from def where a is not null group by a;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ HashAggregate  (cost=1.02..1.03 rows=1 width=4)
+   Output: pg_catalog.count(*), def.a
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (count(*)), def.a
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=4)
+               Output: count(*), def.a
+(6 rows)
+
+select b from def group by b;
+   b   
+-------
+ 
+ One
+ Two
+ Three
+(4 rows)
+
+explain verbose select b from def group by b;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ HashAggregate  (cost=1.02..1.03 rows=1 width=29)
+   Output: def.b
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: def.b
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=29)
+               Output: def.b
+(6 rows)
+
+select b,count(b) from def group by b;
+   b   | count 
+-------+-------
+       |     0
+ One   |     1
+ Two   |     4
+ Three |     5
+(4 rows)
+
+explain verbose select b,count(b) from def group by b;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ HashAggregate  (cost=1.02..1.03 rows=1 width=29)
+   Output: def.b, count((count(def.b)))
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: def.b, (count(def.b))
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=29)
+               Output: def.b, count(def.b)
+(6 rows)
+
+select count(*) from def where b is null group by b;
+ count 
+-------
+     3
+(1 row)
+
+explain verbose select count(*) from def where b is null group by b;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ HashAggregate  (cost=1.02..1.03 rows=1 width=29)
+   Output: pg_catalog.count(*), def.b
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (count(*)), def.b
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=29)
+               Output: count(*), def.b
+(6 rows)
+
+create table g(a int, b float, c numeric);
+insert into g values(1,2.1,3.2);
+insert into g values(1,2.1,3.2);
+insert into g values(2,2.3,5.2);
+select sum(a) from g group by a;
+ sum 
+-----
+   2
+   2
+(2 rows)
+
+explain verbose select sum(a) from g group by a;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ HashAggregate  (cost=1.02..1.03 rows=1 width=4)
+   Output: pg_catalog.sum((sum(g.a))), g.a
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (sum(g.a)), g.a
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=4)
+               Output: sum(g.a), g.a
+(6 rows)
+
+select sum(b) from g group by b;
+ sum 
+-----
+ 2.3
+ 4.2
+(2 rows)
+
+explain verbose select sum(b) from g group by b;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ HashAggregate  (cost=1.02..1.03 rows=1 width=8)
+   Output: sum((sum(g.b))), g.b
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (sum(g.b)), g.b
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=8)
+               Output: sum(g.b), g.b
+(6 rows)
+
+select sum(c) from g group by b;
+ sum 
+-----
+ 5.2
+ 6.4
+(2 rows)
+
+explain verbose select sum(c) from g group by b;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ HashAggregate  (cost=1.02..1.03 rows=1 width=40)
+   Output: sum((sum(g.c))), g.b
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (sum(g.c)), g.b
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=40)
+               Output: sum(g.c), g.b
+(6 rows)
+
+select avg(a) from g group by b;
+          avg           
+------------------------
+     2.0000000000000000
+ 1.00000000000000000000
+(2 rows)
+
+explain verbose select avg(a) from g group by b;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ HashAggregate  (cost=1.02..1.03 rows=1 width=12)
+   Output: pg_catalog.avg((avg(g.a))), g.b
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (avg(g.a)), g.b
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=12)
+               Output: avg(g.a), g.b
+(6 rows)
+
+select avg(b) from g group by c;
+ avg 
+-----
+ 2.3
+ 2.1
+(2 rows)
+
+explain verbose select avg(b) from g group by c;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ HashAggregate  (cost=1.02..1.03 rows=1 width=40)
+   Output: pg_catalog.avg((avg(g.b))), g.c
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (avg(g.b)), g.c
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=40)
+               Output: avg(g.b), g.c
+(6 rows)
+
+select avg(c) from g group by c;
+        avg         
+--------------------
+ 5.2000000000000000
+ 3.2000000000000000
+(2 rows)
+
+explain verbose select avg(c) from g group by c;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ HashAggregate  (cost=1.02..1.03 rows=1 width=32)
+   Output: pg_catalog.avg((avg(g.c))), g.c
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (avg(g.c)), g.c
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=32)
+               Output: avg(g.c), g.c
+(6 rows)
+
+drop table def;
+drop table g;
+-- Combination 2, enable_hashagg on and replicated tables.
 -- repeat the same tests for replicated tables
 -- create required tables and fill them with data
 create table tab1 (val int, val2 int) distribute by replication;
@@ -268,14 +630,14 @@ explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val
                Sort Key: tab1.val2
                ->  Materialize  (cost=0.00..1.01 rows=1 width=8)
                      Output: tab1.val, tab1.val2
-                     ->  Data Node Scan (Node Count [1]) "SELECT val, val2 FROM public.tab1 tab1" on tab1  (cost=0.00..1.01 rows=1000 width=8)
+                     ->  Data Node Scan (Node Count [1]) on tab1  (cost=0.00..1.01 rows=1000 width=8)
                            Output: tab1.val, tab1.val2
          ->  Sort  (cost=1.02..1.03 rows=1 width=8)
                Output: tab2.val, tab2.val2
                Sort Key: tab2.val2
                ->  Materialize  (cost=0.00..1.01 rows=1 width=8)
                      Output: tab2.val, tab2.val2
-                     ->  Data Node Scan (Node Count [1]) "SELECT val, val2 FROM public.tab2 tab2" on tab2  (cost=0.00..1.01 rows=1000 width=8)
+                     ->  Data Node Scan (Node Count [1]) on tab2  (cost=0.00..1.01 rows=1000 width=8)
                            Output: tab2.val, tab2.val2
 (19 rows)
 
@@ -291,17 +653,16 @@ explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group
                                        QUERY PLAN                                       
 ----------------------------------------------------------------------------------------
  HashAggregate  (cost=1.05..1.06 rows=1 width=12)
-   Output: sum((sum(tab1.val))), ((tab1.val2 % 2))
+   Output: sum((pg_catalog.sum((sum(tab1.val))))), ((tab1.val2 % 2))
    ->  HashAggregate  (cost=1.02..1.03 rows=1 width=8)
-         Output: sum(tab1.val), (tab1.val2 % 2), tab1.val2
-         ->  Materialize  (cost=0.00..1.01 rows=1 width=8)
-               Output: tab1.val, tab1.val2
+         Output: pg_catalog.sum((sum(tab1.val))), ((tab1.val2 % 2)), tab1.val2
+         ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+               Output: (sum(tab1.val)), ((tab1.val2 % 2)), tab1.val2
                ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=8)
-                     Output: tab1.val, tab1.val2
+                     Output: sum(tab1.val), (tab1.val2 % 2), tab1.val2
 (8 rows)
 
 -- group by without aggregate
-set enable_hashagg to off;
 select val2 from tab1 group by val2;
  val2 
 ------
@@ -313,7 +674,7 @@ select val2 from tab1 group by val2;
 explain verbose select val2 from tab1 group by val2;
                                     QUERY PLAN                                    
 ----------------------------------------------------------------------------------
- Group  (cost=1.02..1.03 rows=1 width=4)
+ HashAggregate  (cost=1.02..1.03 rows=1 width=4)
    Output: tab1.val2
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: tab1.val2
@@ -324,18 +685,18 @@ explain verbose select val2 from tab1 group by val2;
 select val + val2 from tab1 group by val + val2;
  ?column? 
 ----------
-        2
-        3
         4
+        3
         7
         8
         9
+        2
 (6 rows)
 
 explain verbose select val + val2 from tab1 group by val + val2;
                                     QUERY PLAN                                    
 ----------------------------------------------------------------------------------
- Group  (cost=1.03..1.04 rows=1 width=8)
+ HashAggregate  (cost=1.02..1.03 rows=1 width=8)
    Output: ((tab1.val + tab1.val2))
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: ((tab1.val + tab1.val2))
@@ -346,12 +707,12 @@ explain verbose select val + val2 from tab1 group by val + val2;
 select val + val2, val, val2 from tab1 group by val, val2;
  ?column? | val | val2 
 ----------+-----+------
-        2 |   1 |    1
+        7 |   4 |    3
+        4 |   3 |    1
+        4 |   2 |    2
         4 |   1 |    3
         3 |   2 |    1
-        4 |   2 |    2
-        4 |   3 |    1
-        7 |   4 |    3
+        2 |   1 |    1
         8 |   6 |    2
         9 |   6 |    3
 (8 rows)
@@ -359,7 +720,7 @@ select val + val2, val, val2 from tab1 group by val, val2;
 explain verbose select val + val2, val, val2 from tab1 group by val, val2;
                                     QUERY PLAN                                    
 ----------------------------------------------------------------------------------
- Group  (cost=1.02..1.04 rows=1 width=8)
+ HashAggregate  (cost=1.02..1.03 rows=1 width=8)
    Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
@@ -370,18 +731,18 @@ explain verbose select val + val2, val, val2 from tab1 group by val, val2;
 select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
  ?column? | val | val2 
 ----------+-----+------
-        2 |   1 |    1
-        6 |   2 |    4
         5 |   3 |    2
-        7 |   3 |    4
         5 |   4 |    1
         6 |   4 |    2
+        2 |   1 |    1
+        6 |   2 |    4
+        7 |   3 |    4
 (6 rows)
 
 explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
                                   QUERY PLAN                                   
 -------------------------------------------------------------------------------
- Group  (cost=0.01..0.02 rows=1 width=0)
+ HashAggregate  (cost=0.00..0.01 rows=1 width=0)
    Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
@@ -392,16 +753,16 @@ explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2
 select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
  ?column? 
 ----------
-        2
-        5
         6
+        2
         7
+        5
 (4 rows)
 
 explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
                                   QUERY PLAN                                   
 -------------------------------------------------------------------------------
- Group  (cost=0.01..0.02 rows=1 width=0)
+ HashAggregate  (cost=0.00..0.01 rows=1 width=0)
    Output: ((tab1.val + tab2.val2))
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: ((tab1.val + tab2.val2))
@@ -409,7 +770,6 @@ explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab
                Output: (tab1.val + tab2.val2)
 (6 rows)
 
-reset enable_hashagg;
 -- group by with aggregates in expression
 select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
       ?column?       | val2 
@@ -440,22 +800,20 @@ select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
 (3 rows)
 
 explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
-                                       QUERY PLAN                                       
-----------------------------------------------------------------------------------------
+                                          QUERY PLAN                                           
+-----------------------------------------------------------------------------------------------
  HashAggregate  (cost=1.02..1.04 rows=1 width=8)
-   Output: sum(val), avg(val), ((2 * val2))
-   ->  Result  (cost=0.00..1.02 rows=1 width=8)
-         Output: val, val2, (2 * val2)
-         ->  Materialize  (cost=0.00..1.01 rows=1 width=8)
-               Output: val, val2
-               ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=8)
-                     Output: val, val2
-(8 rows)
+   Output: pg_catalog.sum((sum(tab1.val))), pg_catalog.avg((avg(tab1.val))), ((2 * tab1.val2))
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (sum(tab1.val)), (avg(tab1.val)), ((2 * tab1.val2))
+         ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=8)
+               Output: sum(tab1.val), avg(tab1.val), (2 * tab1.val2)
+(6 rows)
 
 drop table tab1;
 drop table tab2;
 -- some tests involving nulls, characters, float type etc.
-create table def(a int, b varchar(25)); 
+create table def(a int, b varchar(25)) distribute by replication; 
 insert into def VALUES (NULL, NULL);
 insert into def VALUES (1, NULL);
 insert into def VALUES (NULL, 'One');
@@ -497,7 +855,7 @@ explain verbose select a,count(a) from def group by a order by a;
                Output: a
                ->  Materialize  (cost=0.00..1.01 rows=1 width=4)
                      Output: a, b
-                     ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=4)
+                     ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=4)
                            Output: a, b
 (11 rows)
 
@@ -508,15 +866,26 @@ select avg(a) from def group by a;
      6.0000000000000000
      8.0000000000000000
      5.0000000000000000
-     2.0000000000000000
  1.00000000000000000000
      9.0000000000000000
+     2.0000000000000000
      3.0000000000000000
     10.0000000000000000
      7.0000000000000000
      4.0000000000000000
 (11 rows)
 
+explain verbose select avg(a) from def group by a; 
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ HashAggregate  (cost=1.02..1.03 rows=1 width=4)
+   Output: pg_catalog.avg((avg(def.a))), def.a
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (avg(def.a)), def.a
+         ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=4)
+               Output: avg(def.a), def.a
+(6 rows)
+
 select avg(a) from def group by a;
           avg           
 ------------------------
@@ -527,9 +896,9 @@ select avg(a) from def group by a;
  1.00000000000000000000
      9.0000000000000000
      2.0000000000000000
+     3.0000000000000000
     10.0000000000000000
      7.0000000000000000
-     3.0000000000000000
      4.0000000000000000
 (11 rows)
 
@@ -540,7 +909,7 @@ explain verbose select avg(a) from def group by a;
    Output: pg_catalog.avg((avg(def.a))), def.a
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (avg(def.a)), def.a
-         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=4)
+         ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=4)
                Output: avg(def.a), def.a
 (6 rows)
 
@@ -560,7 +929,7 @@ explain verbose select avg(a) from def group by b;
    Output: pg_catalog.avg((avg(def.a))), def.b
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (avg(def.a)), def.b
-         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=33)
+         ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=33)
                Output: avg(def.a), def.b
 (6 rows)
 
@@ -580,7 +949,7 @@ explain verbose select sum(a) from def group by b;
    Output: pg_catalog.sum((sum(def.a))), def.b
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (sum(def.a)), def.b
-         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=33)
+         ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=33)
                Output: sum(def.a), def.b
 (6 rows)
 
@@ -600,7 +969,7 @@ explain verbose select count(*) from def group by b;
    Output: pg_catalog.count(*), def.b
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (count(*)), def.b
-         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=29)
+         ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=29)
                Output: count(*), def.b
 (6 rows)
 
@@ -626,7 +995,7 @@ explain verbose select count(*) from def where a is not null group by a;
    Output: pg_catalog.count(*), def.a
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (count(*)), def.a
-         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=4)
+         ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=4)
                Output: count(*), def.a
 (6 rows)
 
@@ -646,7 +1015,7 @@ explain verbose select b from def group by b;
    Output: def.b
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: def.b
-         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=29)
+         ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=29)
                Output: def.b
 (6 rows)
 
@@ -666,7 +1035,7 @@ explain verbose select b,count(b) from def group by b;
    Output: def.b, count((count(def.b)))
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: def.b, (count(def.b))
-         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=29)
+         ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=29)
                Output: def.b, count(def.b)
 (6 rows)
 
@@ -683,11 +1052,11 @@ explain verbose select count(*) from def where b is null group by b;
    Output: pg_catalog.count(*), def.b
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (count(*)), def.b
-         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=29)
+         ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=29)
                Output: count(*), def.b
 (6 rows)
 
-create table g(a int, b float, c numeric);
+create table g(a int, b float, c numeric) distribute by replication;
 insert into g values(1,2.1,3.2);
 insert into g values(1,2.1,3.2);
 insert into g values(2,2.3,5.2);
@@ -705,7 +1074,7 @@ explain verbose select sum(a) from g group by a;
    Output: pg_catalog.sum((sum(g.a))), g.a
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (sum(g.a)), g.a
-         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=4)
+         ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=4)
                Output: sum(g.a), g.a
 (6 rows)
 
@@ -723,7 +1092,7 @@ explain verbose select sum(b) from g group by b;
    Output: sum((sum(g.b))), g.b
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (sum(g.b)), g.b
-         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=8)
+         ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=8)
                Output: sum(g.b), g.b
 (6 rows)
 
@@ -741,7 +1110,7 @@ explain verbose select sum(c) from g group by b;
    Output: sum((sum(g.c))), g.b
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (sum(g.c)), g.b
-         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=40)
+         ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=40)
                Output: sum(g.c), g.b
 (6 rows)
 
@@ -759,7 +1128,7 @@ explain verbose select avg(a) from g group by b;
    Output: pg_catalog.avg((avg(g.a))), g.b
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (avg(g.a)), g.b
-         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=12)
+         ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=12)
                Output: avg(g.a), g.b
 (6 rows)
 
@@ -777,7 +1146,7 @@ explain verbose select avg(b) from g group by c;
    Output: pg_catalog.avg((avg(g.b))), g.c
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (avg(g.b)), g.c
-         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=40)
+         ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=40)
                Output: avg(g.b), g.c
 (6 rows)
 
@@ -795,14 +1164,264 @@ explain verbose select avg(c) from g group by c;
    Output: pg_catalog.avg((avg(g.c))), g.c
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (avg(g.c)), g.c
-         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=32)
+         ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=32)
                Output: avg(g.c), g.c
 (6 rows)
 
 drop table def;
 drop table g;
--- same test with replicated tables
-create table def(a int, b varchar(25)) distribute by replication; 
+reset enable_hashagg;
+-- Combination 3 enable_hashagg off and distributed tables
+set enable_hashagg to off;
+-- create required tables and fill them with data
+create table tab1 (val int, val2 int);
+create table tab2 (val int, val2 int);
+insert into tab1 values (1, 1), (2, 1), (3, 1), (2, 2), (6, 2), (4, 3), (1, 3), (6, 3);
+insert into tab2 values (1, 1), (4, 1), (8, 1), (2, 4), (9, 4), (3, 4), (4, 2), (5, 2), (3, 2);
+select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2;
+ count | sum |        avg         |     ?column?     | val2 
+-------+-----+--------------------+------------------+------
+     3 |   6 | 2.0000000000000000 |                2 |    1
+     2 |   8 | 4.0000000000000000 |                4 |    2
+     3 |  11 | 3.6666666666666667 | 3.66666666666667 |    3
+(3 rows)
+
+explain verbose select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2;
+                                                 QUERY PLAN                                                  
+-------------------------------------------------------------------------------------------------------------
+ GroupAggregate  (cost=1.02..1.07 rows=1 width=8)
+   Output: count(*), sum(val), avg(val), ((sum(val))::double precision / (count(*))::double precision), val2
+   ->  Sort  (cost=1.02..1.03 rows=1 width=8)
+         Output: val, val2
+         Sort Key: tab1.val2
+         ->  Result  (cost=0.00..1.01 rows=1 width=8)
+               Output: val, val2
+               ->  Materialize  (cost=0.00..1.01 rows=1 width=8)
+                     Output: val, val2
+                     ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=8)
+                           Output: val, val2
+(11 rows)
+
+-- joins and group by
+select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2;
+ count | sum |         avg         |     ?column?     | val2 | val2 
+-------+-----+---------------------+------------------+------+------
+     9 |  78 |  8.6666666666666667 | 8.66666666666667 |    1 |    1
+     6 |  96 | 16.0000000000000000 |               16 |    2 |    2
+     3 |     |                     |                  |    3 |     
+     3 |     |                     |                  |      |    4
+(4 rows)
+
+explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2;
+                                                                                    QUERY PLAN                                                                                     
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ GroupAggregate  (cost=2.08..2.14 rows=1 width=16)
+   Output: count(*), sum((tab1.val * tab2.val)), avg((tab1.val * tab2.val)), ((sum((tab1.val * tab2.val)))::double precision / (count(*))::double precision), tab1.val2, tab2.val2
+   ->  Sort  (cost=2.08..2.09 rows=1 width=16)
+         Output: tab1.val, tab2.val, tab1.val2, tab2.val2
+         Sort Key: tab1.val2, tab2.val2
+         ->  Merge Full Join  (cost=2.05..2.07 rows=1 width=16)
+               Output: tab1.val, tab2.val, tab1.val2, tab2.val2
+               Merge Cond: (tab1.val2 = tab2.val2)
+               ->  Sort  (cost=1.02..1.03 rows=1 width=8)
+                     Output: tab1.val, tab1.val2
+                     Sort Key: tab1.val2
+                     ->  Materialize  (cost=0.00..1.01 rows=1 width=8)
+                           Output: tab1.val, tab1.val2
+                           ->  Data Node Scan (Node Count [2]) on tab1  (cost=0.00..1.01 rows=1000 width=8)
+                                 Output: tab1.val, tab1.val2
+               ->  Sort  (cost=1.02..1.03 rows=1 width=8)
+                     Output: tab2.val, tab2.val2
+                     Sort Key: tab2.val2
+                     ->  Materialize  (cost=0.00..1.01 rows=1 width=8)
+                           Output: tab2.val, tab2.val2
+                           ->  Data Node Scan (Node Count [2]) on tab2  (cost=0.00..1.01 rows=1000 width=8)
+                                 Output: tab2.val, tab2.val2
+(22 rows)
+
+-- aggregates over aggregates
+select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
+ sum 
+-----
+   8
+  17
+(2 rows)
+
+explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
+                                             QUERY PLAN                                             
+----------------------------------------------------------------------------------------------------
+ GroupAggregate  (cost=1.07..1.09 rows=1 width=12)
+   Output: sum(q1.y), q1.x
+   ->  Sort  (cost=1.07..1.07 rows=1 width=12)
+         Output: q1.y, q1.x
+         Sort Key: q1.x
+         ->  Subquery Scan on q1  (cost=1.02..1.06 rows=1 width=12)
+               Output: q1.y, q1.x
+               ->  GroupAggregate  (cost=1.02..1.05 rows=1 width=8)
+                     Output: pg_catalog.sum((sum(tab1.val))), ((tab1.val2 % 2)), tab1.val2
+                     ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+                           Output: (sum(tab1.val)), ((tab1.val2 % 2)), tab1.val2
+                           ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=8)
+                                 Output: sum(tab1.val), (tab1.val2 % 2), tab1.val2
+(13 rows)
+
+-- group by without aggregate
+select val2 from tab1 group by val2;
+ val2 
+------
+    1
+    2
+    3
+(3 rows)
+
+explain verbose select val2 from tab1 group by val2;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ Group  (cost=1.02..1.03 rows=1 width=4)
+   Output: tab1.val2
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: tab1.val2
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=4)
+               Output: tab1.val2
+(6 rows)
+
+select val + val2 from tab1 group by val + val2;
+ ?column? 
+----------
+        2
+        3
+        4
+        7
+        8
+        9
+(6 rows)
+
+explain verbose select val + val2 from tab1 group by val + val2;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ Group  (cost=1.03..1.04 rows=1 width=8)
+   Output: ((tab1.val + tab1.val2))
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: ((tab1.val + tab1.val2))
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=8)
+               Output: (tab1.val + tab1.val2)
+(6 rows)
+
+select val + val2, val, val2 from tab1 group by val, val2;
+ ?column? | val | val2 
+----------+-----+------
+        2 |   1 |    1
+        4 |   1 |    3
+        3 |   2 |    1
+        4 |   2 |    2
+        4 |   3 |    1
+        7 |   4 |    3
+        8 |   6 |    2
+        9 |   6 |    3
+(8 rows)
+
+explain verbose select val + val2, val, val2 from tab1 group by val, val2;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ Group  (cost=1.02..1.04 rows=1 width=8)
+   Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=8)
+               Output: (tab1.val + tab1.val2), tab1.val, tab1.val2
+(6 rows)
+
+select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+ ?column? | val | val2 
+----------+-----+------
+        2 |   1 |    1
+        6 |   2 |    4
+        5 |   3 |    2
+        7 |   3 |    4
+        5 |   4 |    1
+        6 |   4 |    2
+(6 rows)
+
+explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ Group  (cost=0.01..0.02 rows=1 width=0)
+   Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1 width=4)
+               Output: (tab1.val + tab2.val2), tab1.val, tab2.val2
+(6 rows)
+
+select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+ ?column? 
+----------
+        2
+        5
+        6
+        7
+(4 rows)
+
+explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ Group  (cost=0.01..0.02 rows=1 width=0)
+   Output: ((tab1.val + tab2.val2))
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: ((tab1.val + tab2.val2))
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1 width=4)
+               Output: (tab1.val + tab2.val2)
+(6 rows)
+
+-- group by with aggregates in expression
+select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
+      ?column?       | val2 
+---------------------+------
+ 11.0000000000000000 |    1
+ 14.0000000000000000 |    2
+ 17.6666666666666667 |    3
+(3 rows)
+
+explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
+                                          QUERY PLAN                                          
+----------------------------------------------------------------------------------------------
+ GroupAggregate  (cost=1.02..1.06 rows=1 width=8)
+   Output: (((count(*) + sum(val)))::numeric + avg(val)), val2
+   ->  Sort  (cost=1.02..1.03 rows=1 width=8)
+         Output: val, val2
+         Sort Key: tab1.val2
+         ->  Result  (cost=0.00..1.01 rows=1 width=8)
+               Output: val, val2
+               ->  Materialize  (cost=0.00..1.01 rows=1 width=8)
+                     Output: val, val2
+                     ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=8)
+                           Output: val, val2
+(11 rows)
+
+-- group by with expressions in group by clause
+select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
+ sum |        avg         | ?column? 
+-----+--------------------+----------
+   6 | 2.0000000000000000 |        2
+   8 | 4.0000000000000000 |        4
+  11 | 3.6666666666666667 |        6
+(3 rows)
+
+explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
+                                          QUERY PLAN                                           
+-----------------------------------------------------------------------------------------------
+ GroupAggregate  (cost=1.03..1.06 rows=1 width=8)
+   Output: pg_catalog.sum((sum(tab1.val))), pg_catalog.avg((avg(tab1.val))), ((2 * tab1.val2))
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (sum(tab1.val)), (avg(tab1.val)), ((2 * tab1.val2))
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=8)
+               Output: sum(tab1.val), avg(tab1.val), (2 * tab1.val2)
+(6 rows)
+
+drop table tab1;
+drop table tab2;
+-- some tests involving nulls, characters, float type etc.
+create table def(a int, b varchar(25)); 
 insert into def VALUES (NULL, NULL);
 insert into def VALUES (1, NULL);
 insert into def VALUES (NULL, 'One');
@@ -844,30 +1463,626 @@ explain verbose select a,count(a) from def group by a order by a;
                Output: a
                ->  Materialize  (cost=0.00..1.01 rows=1 width=4)
                      Output: a, b
-                     ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=4)
+                     ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=4)
                            Output: a, b
 (11 rows)
 
 select avg(a) from def group by a; 
           avg           
 ------------------------
-                       
+ 1.00000000000000000000
+     2.0000000000000000
+     3.0000000000000000
+     4.0000000000000000
+     5.0000000000000000
      6.0000000000000000
+     7.0000000000000000
      8.0000000000000000
-     5.0000000000000000
- 1.00000000000000000000
      9.0000000000000000
+    10.0000000000000000
+                       
+(11 rows)
+
+select avg(a) from def group by a;
+          avg           
+------------------------
+ 1.00000000000000000000
      2.0000000000000000
      3.0000000000000000
-    10.0000000000000000
+     4.0000000000000000
+     5.0000000000000000
+     6.0000000000000000
      7.0000000000000000
+     8.0000000000000000
+     9.0000000000000000
+    10.0000000000000000
+                       
+(11 rows)
+
+explain verbose select avg(a) from def group by a;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=4)
+   Output: pg_catalog.avg((avg(def.a))), def.a
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (avg(def.a)), def.a
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=4)
+               Output: avg(def.a), def.a
+(6 rows)
+
+select avg(a) from def group by b;
+        avg         
+--------------------
+                   
+ 6.2000000000000000
+ 4.5000000000000000
+ 4.0000000000000000
+(4 rows)
+
+explain verbose select avg(a) from def group by b;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=33)
+   Output: pg_catalog.avg((avg(def.a))), def.b
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (avg(def.a)), def.b
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=33)
+               Output: avg(def.a), def.b
+(6 rows)
+
+select sum(a) from def group by b;
+ sum 
+-----
+    
+  31
+  18
+   8
+(4 rows)
+
+explain verbose select sum(a) from def group by b;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=33)
+   Output: pg_catalog.sum((sum(def.a))), def.b
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (sum(def.a)), def.b
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=33)
+               Output: sum(def.a), def.b
+(6 rows)
+
+select count(*) from def group by b;
+ count 
+-------
+     1
+     5
+     4
+     3
+(4 rows)
+
+explain verbose select count(*) from def group by b;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=29)
+   Output: pg_catalog.count(*), def.b
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (count(*)), def.b
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=29)
+               Output: count(*), def.b
+(6 rows)
+
+select count(*) from def where a is not null group by a;
+ count 
+-------
+     1
+     2
+     1
+     1
+     1
+     1
+     1
+     1
+     1
+     1
+(10 rows)
+
+explain verbose select count(*) from def where a is not null group by a;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=4)
+   Output: pg_catalog.count(*), def.a
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (count(*)), def.a
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=4)
+               Output: count(*), def.a
+(6 rows)
+
+select b from def group by b;
+   b   
+-------
+ One
+ Three
+ Two
+ 
+(4 rows)
+
+explain verbose select b from def group by b;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ Group  (cost=1.02..1.03 rows=1 width=29)
+   Output: def.b
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: def.b
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=29)
+               Output: def.b
+(6 rows)
+
+select b,count(b) from def group by b;
+   b   | count 
+-------+-------
+ One   |     1
+ Three |     5
+ Two   |     4
+       |     0
+(4 rows)
+
+explain verbose select b,count(b) from def group by b;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=29)
+   Output: def.b, count((count(def.b)))
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: def.b, (count(def.b))
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=29)
+               Output: def.b, count(def.b)
+(6 rows)
+
+select count(*) from def where b is null group by b;
+ count 
+-------
+     3
+(1 row)
+
+explain verbose select count(*) from def where b is null group by b;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=29)
+   Output: pg_catalog.count(*), def.b
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (count(*)), def.b
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=29)
+               Output: count(*), def.b
+(6 rows)
+
+create table g(a int, b float, c numeric);
+insert into g values(1,2.1,3.2);
+insert into g values(1,2.1,3.2);
+insert into g values(2,2.3,5.2);
+select sum(a) from g group by a;
+ sum 
+-----
+   2
+   2
+(2 rows)
+
+explain verbose select sum(a) from g group by a;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=4)
+   Output: pg_catalog.sum((sum(g.a))), g.a
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (sum(g.a)), g.a
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=4)
+               Output: sum(g.a), g.a
+(6 rows)
+
+select sum(b) from g group by b;
+ sum 
+-----
+ 4.2
+ 2.3
+(2 rows)
+
+explain verbose select sum(b) from g group by b;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=8)
+   Output: sum((sum(g.b))), g.b
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (sum(g.b)), g.b
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=8)
+               Output: sum(g.b), g.b
+(6 rows)
+
+select sum(c) from g group by b;
+ sum 
+-----
+ 6.4
+ 5.2
+(2 rows)
+
+explain verbose select sum(c) from g group by b;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=40)
+   Output: sum((sum(g.c))), g.b
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (sum(g.c)), g.b
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=40)
+               Output: sum(g.c), g.b
+(6 rows)
+
+select avg(a) from g group by b;
+          avg           
+------------------------
+ 1.00000000000000000000
+     2.0000000000000000
+(2 rows)
+
+explain verbose select avg(a) from g group by b;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=12)
+   Output: pg_catalog.avg((avg(g.a))), g.b
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (avg(g.a)), g.b
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=12)
+               Output: avg(g.a), g.b
+(6 rows)
+
+select avg(b) from g group by c;
+ avg 
+-----
+ 2.1
+ 2.3
+(2 rows)
+
+explain verbose select avg(b) from g group by c;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=40)
+   Output: pg_catalog.avg((avg(g.b))), g.c
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (avg(g.b)), g.c
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=40)
+               Output: avg(g.b), g.c
+(6 rows)
+
+select avg(c) from g group by c;
+        avg         
+--------------------
+ 3.2000000000000000
+ 5.2000000000000000
+(2 rows)
+
+explain verbose select avg(c) from g group by c;
+                                    QUERY PLAN                                     
+-----------------------------------------------------------------------------------
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=32)
+   Output: pg_catalog.avg((avg(g.c))), g.c
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (avg(g.c)), g.c
+         ->  Data Node Scan (Node Count [2])  (cost=0.00..1.01 rows=1000 width=32)
+               Output: avg(g.c), g.c
+(6 rows)
+
+drop table def;
+drop table g;
+-- Combination 4 enable_hashagg off and replicated tables.
+-- repeat the same tests for replicated tables
+-- create required tables and fill them with data
+create table tab1 (val int, val2 int) distribute by replication;
+create table tab2 (val int, val2 int) distribute by replication;
+insert into tab1 values (1, 1), (2, 1), (3, 1), (2, 2), (6, 2), (4, 3), (1, 3), (6, 3);
+insert into tab2 values (1, 1), (4, 1), (8, 1), (2, 4), (9, 4), (3, 4), (4, 2), (5, 2), (3, 2);
+select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2;
+ count | sum |        avg         |     ?column?     | val2 
+-------+-----+--------------------+------------------+------
+     3 |   6 | 2.0000000000000000 |                2 |    1
+     2 |   8 | 4.0000000000000000 |                4 |    2
+     3 |  11 | 3.6666666666666667 | 3.66666666666667 |    3
+(3 rows)
+
+explain verbose select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2;
+                                                 QUERY PLAN                                                  
+-------------------------------------------------------------------------------------------------------------
+ GroupAggregate  (cost=1.02..1.07 rows=1 width=8)
+   Output: count(*), sum(val), avg(val), ((sum(val))::double precision / (count(*))::double precision), val2
+   ->  Sort  (cost=1.02..1.03 rows=1 width=8)
+         Output: val, val2
+         Sort Key: tab1.val2
+         ->  Result  (cost=0.00..1.01 rows=1 width=8)
+               Output: val, val2
+               ->  Materialize  (cost=0.00..1.01 rows=1 width=8)
+                     Output: val, val2
+                     ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=8)
+                           Output: val, val2
+(11 rows)
+
+-- joins and group by
+select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2;
+ count | sum |         avg         |     ?column?     | val2 | val2 
+-------+-----+---------------------+------------------+------+------
+     9 |  78 |  8.6666666666666667 | 8.66666666666667 |    1 |    1
+     6 |  96 | 16.0000000000000000 |               16 |    2 |    2
+     3 |     |                     |                  |    3 |     
+     3 |     |                     |                  |      |    4
+(4 rows)
+
+explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2;
+                                                                                    QUERY PLAN                                                                                     
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ GroupAggregate  (cost=2.08..2.14 rows=1 width=16)
+   Output: count(*), sum((tab1.val * tab2.val)), avg((tab1.val * tab2.val)), ((sum((tab1.val * tab2.val)))::double precision / (count(*))::double precision), tab1.val2, tab2.val2
+   ->  Sort  (cost=2.08..2.09 rows=1 width=16)
+         Output: tab1.val, tab2.val, tab1.val2, tab2.val2
+         Sort Key: tab1.val2, tab2.val2
+         ->  Merge Full Join  (cost=2.05..2.07 rows=1 width=16)
+               Output: tab1.val, tab2.val, tab1.val2, tab2.val2
+               Merge Cond: (tab1.val2 = tab2.val2)
+               ->  Sort  (cost=1.02..1.03 rows=1 width=8)
+                     Output: tab1.val, tab1.val2
+                     Sort Key: tab1.val2
+                     ->  Materialize  (cost=0.00..1.01 rows=1 width=8)
+                           Output: tab1.val, tab1.val2
+                           ->  Data Node Scan (Node Count [1]) on tab1  (cost=0.00..1.01 rows=1000 width=8)
+                                 Output: tab1.val, tab1.val2
+               ->  Sort  (cost=1.02..1.03 rows=1 width=8)
+                     Output: tab2.val, tab2.val2
+                     Sort Key: tab2.val2
+                     ->  Materialize  (cost=0.00..1.01 rows=1 width=8)
+                           Output: tab2.val, tab2.val2
+                           ->  Data Node Scan (Node Count [1]) on tab2  (cost=0.00..1.01 rows=1000 width=8)
+                                 Output: tab2.val, tab2.val2
+(22 rows)
+
+-- aggregates over aggregates
+select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
+ sum 
+-----
+   8
+  17
+(2 rows)
+
+explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
+                                             QUERY PLAN                                             
+----------------------------------------------------------------------------------------------------
+ GroupAggregate  (cost=1.07..1.09 rows=1 width=12)
+   Output: sum(q1.y), q1.x
+   ->  Sort  (cost=1.07..1.07 rows=1 width=12)
+         Output: q1.y, q1.x
+         Sort Key: q1.x
+         ->  Subquery Scan on q1  (cost=1.02..1.06 rows=1 width=12)
+               Output: q1.y, q1.x
+               ->  GroupAggregate  (cost=1.02..1.05 rows=1 width=8)
+                     Output: pg_catalog.sum((sum(tab1.val))), ((tab1.val2 % 2)), tab1.val2
+                     ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+                           Output: (sum(tab1.val)), ((tab1.val2 % 2)), tab1.val2
+                           ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=8)
+                                 Output: sum(tab1.val), (tab1.val2 % 2), tab1.val2
+(13 rows)
+
+-- group by without aggregate
+select val2 from tab1 group by val2;
+ val2 
+------
+    1
+    2
+    3
+(3 rows)
+
+explain verbose select val2 from tab1 group by val2;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ Group  (cost=1.02..1.03 rows=1 width=4)
+   Output: tab1.val2
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: tab1.val2
+         ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=4)
+               Output: tab1.val2
+(6 rows)
+
+select val + val2 from tab1 group by val + val2;
+ ?column? 
+----------
+        2
+        3
+        4
+        7
+        8
+        9
+(6 rows)
+
+explain verbose select val + val2 from tab1 group by val + val2;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ Group  (cost=1.03..1.04 rows=1 width=8)
+   Output: ((tab1.val + tab1.val2))
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: ((tab1.val + tab1.val2))
+         ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=8)
+               Output: (tab1.val + tab1.val2)
+(6 rows)
+
+select val + val2, val, val2 from tab1 group by val, val2;
+ ?column? | val | val2 
+----------+-----+------
+        2 |   1 |    1
+        4 |   1 |    3
+        3 |   2 |    1
+        4 |   2 |    2
+        4 |   3 |    1
+        7 |   4 |    3
+        8 |   6 |    2
+        9 |   6 |    3
+(8 rows)
+
+explain verbose select val + val2, val, val2 from tab1 group by val, val2;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ Group  (cost=1.02..1.04 rows=1 width=8)
+   Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
+         ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=8)
+               Output: (tab1.val + tab1.val2), tab1.val, tab1.val2
+(6 rows)
+
+select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+ ?column? | val | val2 
+----------+-----+------
+        2 |   1 |    1
+        6 |   2 |    4
+        5 |   3 |    2
+        7 |   3 |    4
+        5 |   4 |    1
+        6 |   4 |    2
+(6 rows)
+
+explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ Group  (cost=0.01..0.02 rows=1 width=0)
+   Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
+         ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1 width=4)
+               Output: (tab1.val + tab2.val2), tab1.val, tab2.val2
+(6 rows)
+
+select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+ ?column? 
+----------
+        2
+        5
+        6
+        7
+(4 rows)
+
+explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ Group  (cost=0.01..0.02 rows=1 width=0)
+   Output: ((tab1.val + tab2.val2))
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: ((tab1.val + tab2.val2))
+         ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1 width=4)
+               Output: (tab1.val + tab2.val2)
+(6 rows)
+
+-- group by with aggregates in expression
+select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
+      ?column?       | val2 
+---------------------+------
+ 11.0000000000000000 |    1
+ 14.0000000000000000 |    2
+ 17.6666666666666667 |    3
+(3 rows)
+
+explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
+                                          QUERY PLAN                                          
+----------------------------------------------------------------------------------------------
+ GroupAggregate  (cost=1.02..1.06 rows=1 width=8)
+   Output: (((count(*) + sum(val)))::numeric + avg(val)), val2
+   ->  Sort  (cost=1.02..1.03 rows=1 width=8)
+         Output: val, val2
+         Sort Key: tab1.val2
+         ->  Result  (cost=0.00..1.01 rows=1 width=8)
+               Output: val, val2
+               ->  Materialize  (cost=0.00..1.01 rows=1 width=8)
+                     Output: val, val2
+                     ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=8)
+                           Output: val, val2
+(11 rows)
+
+-- group by with expressions in group by clause
+select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
+ sum |        avg         | ?column? 
+-----+--------------------+----------
+   6 | 2.0000000000000000 |        2
+   8 | 4.0000000000000000 |        4
+  11 | 3.6666666666666667 |        6
+(3 rows)
+
+explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
+                                          QUERY PLAN                                           
+-----------------------------------------------------------------------------------------------
+ GroupAggregate  (cost=1.03..1.06 rows=1 width=8)
+   Output: pg_catalog.sum((sum(tab1.val))), pg_catalog.avg((avg(tab1.val))), ((2 * tab1.val2))
+   ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
+         Output: (sum(tab1.val)), (avg(tab1.val)), ((2 * tab1.val2))
+         ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=8)
+               Output: sum(tab1.val), avg(tab1.val), (2 * tab1.val2)
+(6 rows)
+
+drop table tab1;
+drop table tab2;
+-- some tests involving nulls, characters, float type etc.
+create table def(a int, b varchar(25)) distribute by replication; 
+insert into def VALUES (NULL, NULL);
+insert into def VALUES (1, NULL);
+insert into def VALUES (NULL, 'One');
+insert into def VALUES (2, 'Two');
+insert into def VALUES (2, 'Two');
+insert into def VALUES (3, 'Three');
+insert into def VALUES (4, 'Three');
+insert into def VALUES (5, 'Three');
+insert into def VALUES (6, 'Two');
+insert into def VALUES (7, NULL);
+insert into def VALUES (8, 'Two');
+insert into def VALUES (9, 'Three');
+insert into def VALUES (10, 'Three');
+select a,count(a) from def group by a order by a;
+ a  | count 
+----+-------
+  1 |     1
+  2 |     2
+  3 |     1
+  4 |     1
+  5 |     1
+  6 |     1
+  7 |     1
+  8 |     1
+  9 |     1
+ 10 |     1
+    |     0
+(11 rows)
+
+explain verbose select a,count(a) from def group by a order by a;
+                                          QUERY PLAN                                          
+----------------------------------------------------------------------------------------------
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=4)
+   Output: a, count(a)
+   ->  Sort  (cost=1.02..1.03 rows=1 width=4)
+         Output: a
+         Sort Key: def.a
+         ->  Result  (cost=0.00..1.01 rows=1 width=4)
+               Output: a
+               ->  Materialize  (cost=0.00..1.01 rows=1 width=4)
+                     Output: a, b
+                     ->  Data Node Scan (Node Count [1])  (cost=0.00..1.01 rows=1000 width=4)
+                           Output: a, b
+(11 rows)
+
+select avg(a) from def group by a; 
+          avg           
+------------------------
+ 1.00000000000000000000
+     2.0000000000000000
+     3.0000000000000000
      4.0000000000000000
+     5.0000000000000000
+     6.0000000000000000
+     7.0000000000000000
+     8.0000000000000000
+     9.0000000000000000
+    10.0000000000000000
+                       
 (11 rows)
 
 explain verbose select avg(a) from def group by a; 
                                     QUERY PLAN                                    
 ----------------------------------------------------------------------------------
- HashAggregate  (cost=1.02..1.03 rows=1 width=4)
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=4)
    Output: pg_catalog.avg((avg(def.a))), def.a
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (avg(def.a)), def.a
@@ -878,23 +2093,23 @@ explain verbose select avg(a) from def group by a;
 select avg(a) from def group by a;
           avg           
 ------------------------
-                       
-     6.0000000000000000
-     8.0000000000000000
-     5.0000000000000000
  1.00000000000000000000
-     9.0000000000000000
      2.0000000000000000
      3.0000000000000000
-    10.0000000000000000
-     7.0000000000000000
      4.0000000000000000
+     5.0000000000000000
+     6.0000000000000000
+     7.0000000000000000
+     8.0000000000000000
+     9.0000000000000000
+    10.0000000000000000
+                       
 (11 rows)
 
 explain verbose select avg(a) from def group by a;
                                     QUERY PLAN                                    
 ----------------------------------------------------------------------------------
- HashAggregate  (cost=1.02..1.03 rows=1 width=4)
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=4)
    Output: pg_catalog.avg((avg(def.a))), def.a
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (avg(def.a)), def.a
@@ -905,16 +2120,16 @@ explain verbose select avg(a) from def group by a;
 select avg(a) from def group by b;
         avg         
 --------------------
- 4.0000000000000000
                    
- 4.5000000000000000
  6.2000000000000000
+ 4.5000000000000000
+ 4.0000000000000000
 (4 rows)
 
 explain verbose select avg(a) from def group by b;
                                     QUERY PLAN                                     
 -----------------------------------------------------------------------------------
- HashAggregate  (cost=1.02..1.03 rows=1 width=33)
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=33)
    Output: pg_catalog.avg((avg(def.a))), def.b
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (avg(def.a)), def.b
@@ -925,16 +2140,16 @@ explain verbose select avg(a) from def group by b;
 select sum(a) from def group by b;
  sum 
 -----
-   8
     
-  18
   31
+  18
+   8
 (4 rows)
 
 explain verbose select sum(a) from def group by b;
                                     QUERY PLAN                                     
 -----------------------------------------------------------------------------------
- HashAggregate  (cost=1.02..1.03 rows=1 width=33)
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=33)
    Output: pg_catalog.sum((sum(def.a))), def.b
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (sum(def.a)), def.b
@@ -945,16 +2160,16 @@ explain verbose select sum(a) from def group by b;
 select count(*) from def group by b;
  count 
 -------
-     3
      1
-     4
      5
+     4
+     3
 (4 rows)
 
 explain verbose select count(*) from def group by b;
                                     QUERY PLAN                                     
 -----------------------------------------------------------------------------------
- HashAggregate  (cost=1.02..1.03 rows=1 width=29)
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=29)
    Output: pg_catalog.count(*), def.b
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (count(*)), def.b
@@ -966,12 +2181,12 @@ select count(*) from def where a is not null group by a;
  count 
 -------
      1
+     2
      1
      1
      1
      1
      1
-     2
      1
      1
      1
@@ -980,7 +2195,7 @@ select count(*) from def where a is not null group by a;
 explain verbose select count(*) from def where a is not null group by a;
                                     QUERY PLAN                                    
 ----------------------------------------------------------------------------------
- HashAggregate  (cost=1.02..1.03 rows=1 width=4)
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=4)
    Output: pg_catalog.count(*), def.a
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (count(*)), def.a
@@ -991,16 +2206,16 @@ explain verbose select count(*) from def where a is not null group by a;
 select b from def group by b;
    b   
 -------
- 
  One
- Two
  Three
+ Two
+ 
 (4 rows)
 
 explain verbose select b from def group by b;
                                     QUERY PLAN                                     
 -----------------------------------------------------------------------------------
- HashAggregate  (cost=1.02..1.03 rows=1 width=29)
+ Group  (cost=1.02..1.03 rows=1 width=29)
    Output: def.b
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: def.b
@@ -1011,16 +2226,16 @@ explain verbose select b from def group by b;
 select b,count(b) from def group by b;
    b   | count 
 -------+-------
-       |     0
  One   |     1
- Two   |     4
  Three |     5
+ Two   |     4
+       |     0
 (4 rows)
 
 explain verbose select b,count(b) from def group by b;
                                     QUERY PLAN                                     
 -----------------------------------------------------------------------------------
- HashAggregate  (cost=1.02..1.03 rows=1 width=29)
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=29)
    Output: def.b, count((count(def.b)))
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: def.b, (count(def.b))
@@ -1037,7 +2252,7 @@ select count(*) from def where b is null group by b;
 explain verbose select count(*) from def where b is null group by b;
                                     QUERY PLAN                                     
 -----------------------------------------------------------------------------------
- HashAggregate  (cost=1.02..1.03 rows=1 width=29)
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=29)
    Output: pg_catalog.count(*), def.b
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (count(*)), def.b
@@ -1059,7 +2274,7 @@ select sum(a) from g group by a;
 explain verbose select sum(a) from g group by a;
                                     QUERY PLAN                                    
 ----------------------------------------------------------------------------------
- HashAggregate  (cost=1.02..1.03 rows=1 width=4)
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=4)
    Output: pg_catalog.sum((sum(g.a))), g.a
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (sum(g.a)), g.a
@@ -1070,14 +2285,14 @@ explain verbose select sum(a) from g group by a;
 select sum(b) from g group by b;
  sum 
 -----
- 2.3
  4.2
+ 2.3
 (2 rows)
 
 explain verbose select sum(b) from g group by b;
                                     QUERY PLAN                                    
 ----------------------------------------------------------------------------------
- HashAggregate  (cost=1.02..1.03 rows=1 width=8)
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=8)
    Output: sum((sum(g.b))), g.b
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (sum(g.b)), g.b
@@ -1088,14 +2303,14 @@ explain verbose select sum(b) from g group by b;
 select sum(c) from g group by b;
  sum 
 -----
- 5.2
  6.4
+ 5.2
 (2 rows)
 
 explain verbose select sum(c) from g group by b;
                                     QUERY PLAN                                     
 -----------------------------------------------------------------------------------
- HashAggregate  (cost=1.02..1.03 rows=1 width=40)
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=40)
    Output: sum((sum(g.c))), g.b
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (sum(g.c)), g.b
@@ -1106,14 +2321,14 @@ explain verbose select sum(c) from g group by b;
 select avg(a) from g group by b;
           avg           
 ------------------------
-     2.0000000000000000
  1.00000000000000000000
+     2.0000000000000000
 (2 rows)
 
 explain verbose select avg(a) from g group by b;
                                     QUERY PLAN                                     
 -----------------------------------------------------------------------------------
- HashAggregate  (cost=1.02..1.03 rows=1 width=12)
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=12)
    Output: pg_catalog.avg((avg(g.a))), g.b
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (avg(g.a)), g.b
@@ -1124,14 +2339,14 @@ explain verbose select avg(a) from g group by b;
 select avg(b) from g group by c;
  avg 
 -----
- 2.3
  2.1
+ 2.3
 (2 rows)
 
 explain verbose select avg(b) from g group by c;
                                     QUERY PLAN                                     
 -----------------------------------------------------------------------------------
- HashAggregate  (cost=1.02..1.03 rows=1 width=40)
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=40)
    Output: pg_catalog.avg((avg(g.b))), g.c
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (avg(g.b)), g.c
@@ -1142,14 +2357,14 @@ explain verbose select avg(b) from g group by c;
 select avg(c) from g group by c;
         avg         
 --------------------
- 5.2000000000000000
  3.2000000000000000
+ 5.2000000000000000
 (2 rows)
 
 explain verbose select avg(c) from g group by c;
                                     QUERY PLAN                                     
 -----------------------------------------------------------------------------------
- HashAggregate  (cost=1.02..1.03 rows=1 width=32)
+ GroupAggregate  (cost=1.02..1.05 rows=1 width=32)
    Output: pg_catalog.avg((avg(g.c))), g.c
    ->  Materialize  (cost=0.00..0.00 rows=0 width=0)
          Output: (avg(g.c)), g.c
@@ -1159,3 +2374,4 @@ explain verbose select avg(c) from g group by c;
 
 drop table def;
 drop table g;
+reset enable_hashagg;
diff --git a/src/test/regress/sql/xc_groupby.sql b/src/test/regress/sql/xc_groupby.sql
index 1fd0f50e11..f262d7281e 100644
--- a/src/test/regress/sql/xc_groupby.sql
+++ b/src/test/regress/sql/xc_groupby.sql
@@ -1,9 +1,18 @@
+-- this file contains tests for GROUP BY with combinations of following
+-- 1. enable_hashagg = on/off (to force the grouping by sorting)
+-- 2. distributed or replicated tables across the datanodes
+-- If a testcase is added to any of the combinations, please check if it's
+-- applicable in other combinations as well.
+
+-- Combination 1: enable_hashagg on and distributed tables
+set enable_hashagg to on;
 -- create required tables and fill them with data
 create table tab1 (val int, val2 int);
 create table tab2 (val int, val2 int);
 insert into tab1 values (1, 1), (2, 1), (3, 1), (2, 2), (6, 2), (4, 3), (1, 3), (6, 3);
 insert into tab2 values (1, 1), (4, 1), (8, 1), (2, 4), (9, 4), (3, 4), (4, 2), (5, 2), (3, 2);
 select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2;
+explain verbose select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2;
 -- joins and group by
 select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2;
 explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2;
@@ -11,7 +20,6 @@ explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val
 select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
 explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
 -- group by without aggregate
-set enable_hashagg to off;
 select val2 from tab1 group by val2;
 explain verbose select val2 from tab1 group by val2;
 select val + val2 from tab1 group by val + val2;
@@ -22,7 +30,6 @@ select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val
 explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
 select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
 explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
-reset enable_hashagg;
 -- group by with aggregates in expression
 select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
 explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
@@ -32,6 +39,66 @@ explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
 drop table tab1;
 drop table tab2;
 
+-- some tests involving nulls, characters, float type etc.
+create table def(a int, b varchar(25)); 
+insert into def VALUES (NULL, NULL);
+insert into def VALUES (1, NULL);
+insert into def VALUES (NULL, 'One');
+insert into def VALUES (2, 'Two');
+insert into def VALUES (2, 'Two');
+insert into def VALUES (3, 'Three');
+insert into def VALUES (4, 'Three');
+insert into def VALUES (5, 'Three');
+insert into def VALUES (6, 'Two');
+insert into def VALUES (7, NULL);
+insert into def VALUES (8, 'Two');
+insert into def VALUES (9, 'Three');
+insert into def VALUES (10, 'Three');
+
+select a,count(a) from def group by a order by a;
+explain verbose select a,count(a) from def group by a order by a;
+select avg(a) from def group by a; 
+select avg(a) from def group by a;
+explain verbose select avg(a) from def group by a;
+select avg(a) from def group by b;
+explain verbose select avg(a) from def group by b;
+select sum(a) from def group by b;
+explain verbose select sum(a) from def group by b;
+select count(*) from def group by b;
+explain verbose select count(*) from def group by b;
+select count(*) from def where a is not null group by a;
+explain verbose select count(*) from def where a is not null group by a;
+
+select b from def group by b;
+explain verbose select b from def group by b;
+select b,count(b) from def group by b;
+explain verbose select b,count(b) from def group by b;
+select count(*) from def where b is null group by b;
+explain verbose select count(*) from def where b is null group by b;
+
+create table g(a int, b float, c numeric);
+insert into g values(1,2.1,3.2);
+insert into g values(1,2.1,3.2);
+insert into g values(2,2.3,5.2);
+
+select sum(a) from g group by a;
+explain verbose select sum(a) from g group by a;
+select sum(b) from g group by b;
+explain verbose select sum(b) from g group by b;
+select sum(c) from g group by b;
+explain verbose select sum(c) from g group by b;
+
+select avg(a) from g group by b;
+explain verbose select avg(a) from g group by b;
+select avg(b) from g group by c;
+explain verbose select avg(b) from g group by c;
+select avg(c) from g group by c;
+explain verbose select avg(c) from g group by c;
+
+drop table def;
+drop table g;
+
+-- Combination 2, enable_hashagg on and replicated tables.
 -- repeat the same tests for replicated tables
 -- create required tables and fill them with data
 create table tab1 (val int, val2 int) distribute by replication;
@@ -47,7 +114,6 @@ explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val
 select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
 explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
 -- group by without aggregate
-set enable_hashagg to off;
 select val2 from tab1 group by val2;
 explain verbose select val2 from tab1 group by val2;
 select val + val2 from tab1 group by val + val2;
@@ -58,7 +124,102 @@ select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val
 explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
 select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
 explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+-- group by with aggregates in expression
+select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
+explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
+-- group by with expressions in group by clause
+select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
+explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
+drop table tab1;
+drop table tab2;
+
+-- some tests involving nulls, characters, float type etc.
+create table def(a int, b varchar(25)) distribute by replication; 
+insert into def VALUES (NULL, NULL);
+insert into def VALUES (1, NULL);
+insert into def VALUES (NULL, 'One');
+insert into def VALUES (2, 'Two');
+insert into def VALUES (2, 'Two');
+insert into def VALUES (3, 'Three');
+insert into def VALUES (4, 'Three');
+insert into def VALUES (5, 'Three');
+insert into def VALUES (6, 'Two');
+insert into def VALUES (7, NULL);
+insert into def VALUES (8, 'Two');
+insert into def VALUES (9, 'Three');
+insert into def VALUES (10, 'Three');
+
+select a,count(a) from def group by a order by a;
+explain verbose select a,count(a) from def group by a order by a;
+select avg(a) from def group by a; 
+explain verbose select avg(a) from def group by a; 
+select avg(a) from def group by a;
+explain verbose select avg(a) from def group by a;
+select avg(a) from def group by b;
+explain verbose select avg(a) from def group by b;
+select sum(a) from def group by b;
+explain verbose select sum(a) from def group by b;
+select count(*) from def group by b;
+explain verbose select count(*) from def group by b;
+select count(*) from def where a is not null group by a;
+explain verbose select count(*) from def where a is not null group by a;
+
+select b from def group by b;
+explain verbose select b from def group by b;
+select b,count(b) from def group by b;
+explain verbose select b,count(b) from def group by b;
+select count(*) from def where b is null group by b;
+explain verbose select count(*) from def where b is null group by b;
+
+create table g(a int, b float, c numeric) distribute by replication;
+insert into g values(1,2.1,3.2);
+insert into g values(1,2.1,3.2);
+insert into g values(2,2.3,5.2);
+
+select sum(a) from g group by a;
+explain verbose select sum(a) from g group by a;
+select sum(b) from g group by b;
+explain verbose select sum(b) from g group by b;
+select sum(c) from g group by b;
+explain verbose select sum(c) from g group by b;
+
+select avg(a) from g group by b;
+explain verbose select avg(a) from g group by b;
+select avg(b) from g group by c;
+explain verbose select avg(b) from g group by c;
+select avg(c) from g group by c;
+explain verbose select avg(c) from g group by c;
+
+drop table def;
+drop table g;
 reset enable_hashagg;
+
+-- Combination 3 enable_hashagg off and distributed tables
+set enable_hashagg to off;
+-- create required tables and fill them with data
+create table tab1 (val int, val2 int);
+create table tab2 (val int, val2 int);
+insert into tab1 values (1, 1), (2, 1), (3, 1), (2, 2), (6, 2), (4, 3), (1, 3), (6, 3);
+insert into tab2 values (1, 1), (4, 1), (8, 1), (2, 4), (9, 4), (3, 4), (4, 2), (5, 2), (3, 2);
+select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2;
+explain verbose select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2;
+-- joins and group by
+select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2;
+explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2;
+-- aggregates over aggregates
+select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
+explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
+-- group by without aggregate
+select val2 from tab1 group by val2;
+explain verbose select val2 from tab1 group by val2;
+select val + val2 from tab1 group by val + val2;
+explain verbose select val + val2 from tab1 group by val + val2;
+select val + val2, val, val2 from tab1 group by val, val2;
+explain verbose select val + val2, val, val2 from tab1 group by val, val2;
+select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
 -- group by with aggregates in expression
 select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
 explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
@@ -127,7 +288,42 @@ explain verbose select avg(c) from g group by c;
 drop table def;
 drop table g;
 
--- same test with replicated tables
+-- Combination 4 enable_hashagg off and replicated tables.
+-- repeat the same tests for replicated tables
+-- create required tables and fill them with data
+create table tab1 (val int, val2 int) distribute by replication;
+create table tab2 (val int, val2 int) distribute by replication;
+insert into tab1 values (1, 1), (2, 1), (3, 1), (2, 2), (6, 2), (4, 3), (1, 3), (6, 3);
+insert into tab2 values (1, 1), (4, 1), (8, 1), (2, 4), (9, 4), (3, 4), (4, 2), (5, 2), (3, 2);
+select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2;
+explain verbose select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2;
+-- joins and group by
+select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2;
+explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2;
+-- aggregates over aggregates
+select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
+explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
+-- group by without aggregate
+select val2 from tab1 group by val2;
+explain verbose select val2 from tab1 group by val2;
+select val + val2 from tab1 group by val + val2;
+explain verbose select val + val2 from tab1 group by val + val2;
+select val + val2, val, val2 from tab1 group by val, val2;
+explain verbose select val + val2, val, val2 from tab1 group by val, val2;
+select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+-- group by with aggregates in expression
+select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
+explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
+-- group by with expressions in group by clause
+select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
+explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
+drop table tab1;
+drop table tab2;
+
+-- some tests involving nulls, characters, float type etc.
 create table def(a int, b varchar(25)) distribute by replication; 
 insert into def VALUES (NULL, NULL);
 insert into def VALUES (1, NULL);
@@ -186,3 +382,4 @@ explain verbose select avg(c) from g group by c;
 
 drop table def;
 drop table g;
+reset enable_hashagg;