summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/optimizer/plan/createplan.c612
-rw-r--r--src/backend/optimizer/plan/planner.c31
-rw-r--r--src/include/optimizer/planmain.h3
-rw-r--r--src/test/regress/expected/xc_groupby.out1484
-rw-r--r--src/test/regress/sql/xc_groupby.sql205
5 files changed, 1815 insertions, 520 deletions
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 97016f9723..a666914d7a 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -41,6 +41,7 @@
#include "catalog/pg_proc.h"
#include "catalog/pg_type.h"
#include "executor/executor.h"
+#include "rewrite/rewriteManip.h"
#endif
#include "utils/lsyscache.h"
@@ -89,6 +90,8 @@ static Alias *generate_remote_rte_alias(RangeTblEntry *rte, int varno,
char *aliasname, int reduce_level);
static void pgxc_locate_grouping_columns(PlannerInfo *root, List *tlist,
AttrNumber *grpColIdx);
+static List *pgxc_process_grouping_targetlist(PlannerInfo *root,
+ List **local_tlist);
#endif
static NestLoop *create_nestloop_plan(PlannerInfo *root, NestPath *best_path,
Plan *outer_plan, Plan *inner_plan);
@@ -5056,7 +5059,7 @@ create_remotedelete_plan(PlannerInfo *root, Plan *topplan)
}
/*
- * create_remoteagg_plan
+ * create_remotegrouping_plan
* Check if the grouping and aggregates can be pushed down to the
* datanodes.
* Right now we can push with following restrictions
@@ -5064,6 +5067,7 @@ create_remotedelete_plan(PlannerInfo *root, Plan *topplan)
* expressions in group by clauses
* 2. No distinct or order by clauses
* 3. No windowing clause
+ * 4. No having clause
*
* Inputs
* root - planerInfo root for this query
@@ -5075,306 +5079,14 @@ create_remotedelete_plan(PlannerInfo *root, Plan *topplan)
* node in case there are no local clauses.
*/
Plan *
-create_remoteagg_plan(PlannerInfo *root, Plan *local_plan)
+create_remotegrouping_plan(PlannerInfo *root, Plan *local_plan)
{
Query *query = root->parse;
- RemoteQuery *agg_left;
- Plan *temp_plan = local_plan->lefttree;
- List *agg_tlist = local_plan->targetlist;
- StringInfo remote_sql_stmt = makeStringInfo();
- StringInfo remote_targetlist = makeStringInfo();
- StringInfo remote_fromlist = makeStringInfo();
- StringInfo groupby_clause = makeStringInfo();
- StringInfo in_alias = makeStringInfo();
- ListCell *temp;
- ListCell *temp_remote;
- RemoteQuery *agg_remote;
- Plan *agg_remote_plan;
- RangeTblEntry *dummy_rte;
- Index dummy_rtindex;
- List *base_tlist;
- Agg *agg_plan = NULL;
- List *temp_vars; /* temporarily hold the VARs */
- List *temp_vartlist; /* temporarity hold tlist of VARs */
- Relids in_relids; /* the list of Relids referenced by
- * the Agg plan
- */
-
- /* For now only Agg plans */
- Assert(IsA(local_plan, Agg));
- agg_plan = (Agg *)local_plan;
-
- /*
- * We don't push aggregation and grouping to datanodes, in case there are
- * windowing aggregates, distinct, having clause or sort clauses.
- */
- if (query->hasWindowFuncs ||
- query->distinctClause ||
- query->sortClause ||
- query->havingQual)
- return local_plan;
-
- /*
- * Optimize if only the tree underneath is reduced to RemoteQuery, any other
- * node there indicates that the scans can not be completely pushed to the
- * remote data nodes.
- * RemoteQuery is hidden underneath Material plan, take it out.
- */
- if (IsA(temp_plan, Material))
- temp_plan = temp_plan->lefttree;
- if (!IsA(temp_plan, RemoteQuery))
- return local_plan;
- else
- agg_left = (RemoteQuery *)temp_plan;
-
- /*
- * Walk through the target list and find out whether we can push the
- * aggregates and grouping to datanodes. We can do so if the target list
- * contains plain aggregates (without any expression involving those) and
- * expressions in group by clauses only (last one to make the query legit.
- */
- foreach(temp, agg_tlist)
- {
- TargetEntry *tle = lfirst(temp);
- Node *expr = (Node *)tle->expr;
-
- /*
- * PGXCTODO: once we allow sort clauses to be pushed to data nodes,
- * along with group by clause, this condition will need to be changed.
- */
- if (!(IsA(expr, Aggref) || tle->ressortgroupref > 0))
- return local_plan;
- }
-
- /*
- * Cleared of all the charges, now take following steps
- * 1. Create a remote query node reflecting the query to be pushed to the
- * datanode
- * 2. Modify the Agg node passed in so that it reflects the aggregation
- * (collection) to be done at the coordinator based on the results sent by
- * the datanodes.
- */
- appendStringInfo(in_alias, "%s_%d", "group", root->rs_alias_index);
-
- /* Find all the relations referenced by targetlist of Agg node */
- temp_vars = pull_var_clause((Node *)agg_tlist, PVC_REJECT_PLACEHOLDERS);
- findReferencedVars(temp_vars, (Plan *)agg_left, &temp_vartlist, &in_relids);
-
- /*
- * Build partial RemoteQuery node to be used for creating the Select clause
- * to be sent to the remote node. Rest of the node will be built later
- */
- agg_remote = makeNode(RemoteQuery);
-
- /*
- * Save information about the plan we are reducing.
- * We may need this information later if more entries are added to it
- * as part of the remote expression optimization.
- */
- agg_remote->remotejoin = false;
- agg_remote->inner_alias = pstrdup(in_alias->data);
- agg_remote->inner_reduce_level = agg_left->reduce_level;
- agg_remote->inner_relids = in_relids;
- agg_remote->inner_statement = pstrdup(agg_left->sql_statement);
- agg_remote->exec_nodes = agg_left->exec_nodes;
-
- /* Don't forget to increment the index for the next time around! */
- agg_remote->reduce_level = root->rs_alias_index++;
-
- /* Generate the select clause of the remote query */
- appendStringInfoString(remote_targetlist, "SELECT");
- foreach (temp, agg_tlist)
- {
- TargetEntry *tle = lfirst(temp);
- Node *expr = (Node *)tle->expr;
-
- create_remote_expr(root, local_plan, remote_targetlist, expr, agg_remote);
-
- /* If this is not last target entry, add a comma with space */
- if (lnext(temp))
- appendStringInfoString(remote_targetlist, ",");
- }
-
- /* Generate the from clause of the remote query */
- appendStringInfo(remote_fromlist, "FROM (%s) %s",
- agg_remote->inner_statement, agg_remote->inner_alias);
-
- /*
- * Generate group by clause for the remote query and recompute the group by
- * columE.n locations
- */
- if (query->groupClause)
- {
- int cntCols;
- Assert(IsA(local_plan, Agg));
-
- /*
- * Recompute the column ids of the grouping columns,
- * the group column indexes computed earlier point in the
- * targetlists of the scan plans under this node. But now the grouping
- * column indexes will be pointing in the targetlist of the new
- * RemoteQuery, hence those need to be recomputed.
- */
- pgxc_locate_grouping_columns(root, agg_tlist, agg_plan->grpColIdx);
-
- appendStringInfoString(groupby_clause, "GROUP BY ");
- for (cntCols = 0; cntCols < agg_plan->numCols; cntCols++)
- {
- appendStringInfo(groupby_clause, "%d",
- agg_plan->grpColIdx[cntCols]);
- if (cntCols < agg_plan->numCols - 1)
- appendStringInfoString(groupby_clause, ", ");
- }
- }
-
- /* Generate the remote sql statement from the pieces */
- appendStringInfo(remote_sql_stmt, "%s %s %s", remote_targetlist->data,
- remote_fromlist->data, groupby_clause->data);
-
- /*
- * Set the base_tlist for the RemoteQuery node being created, it's used to
- * create the tuple descriptor for the result from RemoteQuery and rewrite
- * the Aggregates targetlist accept the results of the RemoteQuery.
- */
- base_tlist = add_to_flat_tlist(NIL, get_tlist_exprs(agg_tlist, true));
-
- /*
- * We need to change the return types of the aggregates. Datanodes send the
- * aggregation results in the form of transition results.
- */
- foreach (temp, base_tlist)
- {
- TargetEntry *tle = lfirst(temp);
- Node *expr = (Node *)tle->expr;
- Aggref *agg;
-
- if (IsA(expr, Aggref))
- {
- agg = (Aggref *)expr;
- agg->aggtype = agg->aggtrantype;
- }
- }
-
- /*
- * Create a dummy RTE for the remote query being created. Append the dummy
- * range table entry to the range table. Note that this modifies the master
- * copy the caller passed us, otherwise e.g EXPLAIN VERBOSE will fail to
- * find the rte the Vars built below refer to. Also create the tuple
- * descriptor for the result of this query from the base_tlist (targetlist
- * we used to generate the remote node query).
- */
- dummy_rte = makeNode(RangeTblEntry);
- dummy_rte->reltupdesc = ExecTypeFromTL(base_tlist, false);
- dummy_rte->rtekind = RTE_RELATION;
-
- /* Use a dummy relname... */
- dummy_rte->relname = "__FOREIGN_QUERY__";
- dummy_rte->eref = makeAlias("__FOREIGN_QUERY__", NIL);
-
- /* Rest will be zeroed out in makeNode() */
- root->parse->rtable = lappend(root->parse->rtable, dummy_rte);
- dummy_rtindex = list_length(root->parse->rtable);
-
- /*
- * Change the aggref nodes in the local Agg plan to accept the transition
- * results from the remote query output. Do this after we have created base
- * list, otherwise we might introduce these changes in the base list.
- * Do this after the RTE for the remote query is added to the root.
- */
- forboth (temp, agg_tlist, temp_remote, base_tlist)
- {
- TargetEntry *tle = lfirst(temp);
- Node *expr = (Node *)tle->expr;
- Aggref *agg;
- TargetEntry *tle_remote = lfirst(temp_remote);
- Node *expr_remote = (Node *)tle_remote->expr;
-
- if (IsA(expr, Aggref))
- {
- Assert(IsA(expr_remote, Aggref));
-
- /*
- * Replace the args of the local Aggref with Aggref node to be
- * included in RemoteQuery node, so that set_plan_refs can convert
- * the args into VAR pointing to the appropriate result in the tuple
- * coming from RemoteQuery node.
- * PGXCTODO: should we push this change in targetlists of plans
- * above?
- */
- agg = (Aggref *)expr;
- agg->args = list_make1(makeTargetEntry(copyObject(expr_remote), 1, NULL, false));
- }
- }
-
- /* Build rest of the RemoteQuery node and the plan there */
- agg_remote_plan = &agg_remote->scan.plan;
-
- /* The join targetlist becomes this node's tlist */
- agg_remote_plan->targetlist = base_tlist;
- agg_remote_plan->lefttree = NULL;
- agg_remote_plan->righttree = NULL;
- agg_remote->scan.scanrelid = dummy_rtindex;
- agg_remote->sql_statement = remote_sql_stmt->data;
-
- /* set_plan_refs needs this later */
- agg_remote->base_tlist = base_tlist;
- agg_remote->relname = "__FOREIGN_QUERY__";
- agg_remote->partitioned_replicated = agg_left->partitioned_replicated;
-
- /*
- * Only quals that can be pushed to the remote side the ones in the having
- * clause. Till we work out how to handle having quals in XC, we don't have
- * any quals here.
- * PGXCTODO: the RemoteQuery node that was earlier the lefttree of Agg
- * node, may have local quals. In such case, we have to aggregate and group
- * at coordinator and can not push the grouping clause to the datanodes. Is
- * there a case in XC, where we can have local quals?
- * We actually need not worry about costs since this is the final plan.
- */
- agg_remote_plan->startup_cost = agg_left->scan.plan.startup_cost;
- agg_remote_plan->total_cost = agg_left->scan.plan.total_cost;
- agg_remote_plan->plan_rows = agg_left->scan.plan.plan_rows;
- agg_remote_plan->plan_width = agg_left->scan.plan.plan_width;
-
- /*
- * Modify the passed in Agg plan according to the remote query we built.
- * Materialization is always needed for RemoteQuery in case we need to restart
- * the scan.
- */
- agg_plan->plan.lefttree = (Plan *) make_material(agg_remote_plan);
-
- /* Indicate that we should apply collection function directly */
- agg_plan->skip_trans = true;
-
- return (Plan *)agg_plan;
-}
-
-/*
- * create_remotegroup_plan
- * Given a Group plan, try to push as much of the query to the datanodes and
- * build a Group plan to combiner the results across the datanodes. The Sort
- * node under the Group plan is pushed down to RemoteQuery plan, since the
- * combiner knows how to merge the results across datanodes in sorted manner.
- * Hence there is no separate Sort node.
- *
- * This optimization is applied under following conditions
- * 1. The scan plans under the Group->Sort node is RemoteQuery
- * 2. There is not separate Sort, distinct, having clause in the query.
- *
- * PGXCTODO: we should lift up as many of these restrictions as possible or give
- * reasons why those restrictions are needed.
- */
-Plan *
-create_remotegroup_plan(PlannerInfo *root, Plan *local_plan)
-{
- Group *group_plan;
- Query *query = root->parse;
Sort *sort_plan;
RemoteQuery *remote_scan; /* remote query in the passed in plan */
RemoteQuery *remote_group; /* remote query after optimization */
Plan *remote_group_plan; /* plan portion of remote_group */
Plan *temp_plan;
- List *local_tlist; /* target list of the local plan */
List *temp_vars; /* temporarily hold the VARs */
List *temp_vartlist; /* temporarity hold tlist of VARs */
ListCell *temp;
@@ -5384,59 +5096,110 @@ create_remotegroup_plan(PlannerInfo *root, Plan *local_plan)
StringInfo orderby_clause = makeStringInfo(); /* remote query ORDER BY */
StringInfo remote_fromlist = makeStringInfo(); /* remote query FROM */
StringInfo in_alias = makeStringInfo();
- Relids in_relids;
+ Relids in_relids; /* the list of Relids referenced by lefttree */
Index dummy_rtindex;
List *base_tlist;
RangeTblEntry *dummy_rte;
- int cntCols;
+ int numGroupCols;
+ AttrNumber *grpColIdx;
- if (query->havingQual ||
+ /*
+ * We don't push aggregation and grouping to datanodes, in case there are
+ * windowing aggregates, distinct, having clause or sort clauses.
+ */
+ if (query->hasWindowFuncs ||
query->distinctClause ||
query->sortClause ||
- query->hasWindowFuncs)
+ query->havingQual)
return local_plan;
- /* For now only for Group plans are treated */
- Assert(IsA(local_plan, Group));
- group_plan = (Group *)local_plan;
- remote_scan = NULL;
- temp_plan = local_plan->lefttree;
+ /*
+ * PGXCTODO: we don't support the parameterised queries yet. So, for the
+ * time being we don't apply the optimizations for parameterised queries
+ */
+ if (root->glob->boundParams)
+ return local_plan;
+ /* for now only Agg/Group plans */
+ if (local_plan && IsA(local_plan, Agg))
+ {
+ numGroupCols = ((Agg *)local_plan)->numCols;
+ grpColIdx = ((Agg *)local_plan)->grpColIdx;
+ }
+ else if (local_plan && IsA(local_plan, Group))
+ {
+ numGroupCols = ((Group *)local_plan)->numCols;
+ grpColIdx = ((Group *)local_plan)->grpColIdx;
+ }
+ else
+ return local_plan;
/*
- * We expect plan tree as Group->Sort->{Result}?->{Material}?->RemoteQuery,
+ * We expect plan tree as Group/Agg->Sort->Result->Material->RemoteQuery,
+ * Result, Material nodes are optional. Sort is compulsory for Group but not
+ * for Agg.
* anything else is not handled right now.
*/
- if (IsA(temp_plan, Sort))
+ temp_plan = local_plan->lefttree;
+ remote_scan = NULL;
+ sort_plan = NULL;
+ if (temp_plan && IsA(temp_plan, Sort))
{
sort_plan = (Sort *)temp_plan;
temp_plan = temp_plan->lefttree;
}
- if (IsA(temp_plan, Result))
+ if (temp_plan && IsA(temp_plan, Result))
temp_plan = temp_plan->lefttree;
- if (IsA(temp_plan, Material))
+ if (temp_plan && IsA(temp_plan, Material))
temp_plan = temp_plan->lefttree;
- if (IsA(temp_plan, RemoteQuery))
+ if (temp_plan && IsA(temp_plan, RemoteQuery))
remote_scan = (RemoteQuery *)temp_plan;
- if (!remote_scan || !sort_plan)
+ if (!remote_scan)
+ return local_plan;
+ /*
+ * for Group plan we expect Sort under the Group, which is always the case,
+ * the condition below is really for some possible non-existent case
+ */
+ if (IsA(local_plan, Group) && !sort_plan)
return local_plan;
- Assert(IsA(remote_scan, RemoteQuery));
- Assert(IsA(sort_plan, Sort));
/*
- * grouping_planner will add Sort node before Group node to sort the rows
+ * Grouping_planner may add Sort node to sort the rows
* based on the columns in GROUP BY clause. Hence the columns in Sort and
* those in Group node in should be same. The columns are usually in the
* same order in both nodes, hence check the equality in order. If this
- * condition fails, we can not handle this GROUP plan for now.
+ * condition fails, we can not handle this plan for now.
*/
- if (sort_plan->numCols != group_plan->numCols)
- return local_plan;
- for (cntCols = 0; cntCols < group_plan->numCols; cntCols++)
+ if (sort_plan)
{
- if (sort_plan->sortColIdx[cntCols] != group_plan->grpColIdx[cntCols])
+ int cntCols;
+ if (sort_plan->numCols != numGroupCols)
return local_plan;
+ for (cntCols = 0; cntCols < numGroupCols; cntCols++)
+ {
+ if (sort_plan->sortColIdx[cntCols] != grpColIdx[cntCols])
+ return local_plan;
+ }
+ }
+
+ /* find all the relations referenced by targetlist of Grouping node */
+ temp_vars = pull_var_clause((Node *)local_plan->targetlist,
+ PVC_REJECT_PLACEHOLDERS);
+ findReferencedVars(temp_vars, (Plan *)remote_scan, &temp_vartlist, &in_relids);
+
+ /*
+ * process the targetlist of the grouping plan, also construct the
+ * targetlist of the query to be shipped to the remote side
+ */
+ base_tlist = pgxc_process_grouping_targetlist(root, &(local_plan->targetlist));
+ if (!base_tlist)
+ {
+ /*
+ * for some reason we can not construct a targetlist shippable to the
+ * datanode. Resort to the plan created by grouping_planner()
+ */
+ return local_plan;
}
/*
@@ -5444,16 +5207,11 @@ create_remotegroup_plan(PlannerInfo *root, Plan *local_plan)
* datanode.
* 1. Create a remote query node reflecting the query to be pushed to the
* datanode.
- * 2. Modify the Group node passed in, to accept the results sent by the
- * datanodes and group them.
+ * 2. Modify the Grouping node passed in, to accept the results sent by the
+ * Datanodes, then group and aggregate them, if needed.
*/
- local_tlist = local_plan->targetlist;
appendStringInfo(in_alias, "%s_%d", "group", root->rs_alias_index);
- /* Find all the relations referenced by targetlist of Group node */
- temp_vars = pull_var_clause((Node *)local_tlist, PVC_REJECT_PLACEHOLDERS);
- findReferencedVars(temp_vars, (Plan *)remote_scan, &temp_vartlist, &in_relids);
-
/*
* Build partial RemoteQuery node to be used for creating the Select clause
* to be sent to the remote node. Rest of the node will be built later
@@ -5471,13 +5229,12 @@ create_remotegroup_plan(PlannerInfo *root, Plan *local_plan)
remote_group->inner_relids = in_relids;
remote_group->inner_statement = pstrdup(remote_scan->sql_statement);
remote_group->exec_nodes = remote_scan->exec_nodes;
-
/* Don't forget to increment the index for the next time around! */
remote_group->reduce_level = root->rs_alias_index++;
/* Generate the select clause of the remote query */
appendStringInfoString(remote_targetlist, "SELECT");
- foreach (temp, local_tlist)
+ foreach (temp, base_tlist)
{
TargetEntry *tle = lfirst(temp);
Node *expr = (Node *)tle->expr;
@@ -5503,32 +5260,47 @@ create_remotegroup_plan(PlannerInfo *root, Plan *local_plan)
*/
if (query->groupClause)
{
- SimpleSort *remote_sort = makeNode(SimpleSort);
- char *sep = "";
+ int cntCols;
+ char *sep;
/*
- * Reuse the arrays allocated in sort_plan to create SimpleSort
- * structure. sort_plan is useless henceforth.
+ * recompute the column ids of the grouping columns,
+ * the group column indexes computed earlier point in the
+ * targetlists of the scan plans under this node. But now the grouping
+ * column indexes will be pointing in the targetlist of the new
+ * RemoteQuery, hence those need to be recomputed
*/
- remote_sort->numCols = group_plan->numCols;
- remote_sort->sortColIdx = sort_plan->sortColIdx;
- remote_sort->sortOperators = sort_plan->sortOperators;
- remote_sort->nullsFirst = sort_plan->nullsFirst;
-
- pgxc_locate_grouping_columns(root, local_tlist, group_plan->grpColIdx);
+ pgxc_locate_grouping_columns(root, base_tlist, grpColIdx);
appendStringInfoString(groupby_clause, "GROUP BY ");
- appendStringInfoString(orderby_clause, "ORDER BY ");
- for (cntCols = 0; cntCols < group_plan->numCols; cntCols++)
+ sep = "";
+ for (cntCols = 0; cntCols < numGroupCols; cntCols++)
{
- appendStringInfo(groupby_clause, "%s%d", sep,
- group_plan->grpColIdx[cntCols]);
- remote_sort->sortColIdx[cntCols] = group_plan->grpColIdx[cntCols];
- appendStringInfo(orderby_clause, "%s%d", sep,
- remote_sort->sortColIdx[cntCols]);
+ appendStringInfo(groupby_clause, "%s%d", sep, grpColIdx[cntCols]);
sep = ", ";
}
- remote_group->sort = remote_sort;
+ if (sort_plan)
+ {
+ SimpleSort *remote_sort = makeNode(SimpleSort);
+ /*
+ * reuse the arrays allocated in sort_plan to create SimpleSort
+ * structure. sort_plan is useless henceforth.
+ */
+ remote_sort->numCols = sort_plan->numCols;
+ remote_sort->sortColIdx = sort_plan->sortColIdx;
+ remote_sort->sortOperators = sort_plan->sortOperators;
+ remote_sort->nullsFirst = sort_plan->nullsFirst;
+ appendStringInfoString(orderby_clause, "ORDER BY ");
+ sep = "";
+ for (cntCols = 0; cntCols < remote_sort->numCols; cntCols++)
+ {
+ remote_sort->sortColIdx[cntCols] = grpColIdx[cntCols];
+ appendStringInfo(orderby_clause, "%s%d", sep,
+ remote_sort->sortColIdx[cntCols]);
+ sep = ", ";
+ }
+ remote_group->sort = remote_sort;
+ }
}
/* Generate the remote sql statement from the pieces */
@@ -5537,20 +5309,13 @@ create_remotegroup_plan(PlannerInfo *root, Plan *local_plan)
orderby_clause->data);
/*
- * Set the base_tlist for the RemoteQuery node being created, it's used to
- * create the tuple descriptor for the result from RemoteQuery and rewrite
- * the Aggregates targetlist accept the results of the RemoteQuery.
- */
- base_tlist = add_to_flat_tlist(NIL, get_tlist_exprs(local_tlist, true));
-
- /*
* Create a dummy RTE for the remote query being created. Append the dummy
* range table entry to the range table. Note that this modifies the master
* copy the caller passed us, otherwise e.g EXPLAIN VERBOSE will fail to
- * find the rte the Vars built below refer to.
+ * find the rte the Vars built below refer to. Also create the tuple
+ * descriptor for the result of this query from the base_tlist (targetlist
+ * we used to generate the remote node query).
*/
-
- /* Cook up the reltupdesc using this base_tlist */
dummy_rte = makeNode(RangeTblEntry);
dummy_rte->reltupdesc = ExecTypeFromTL(base_tlist, false);
dummy_rte->rtekind = RTE_RELATION;
@@ -5577,6 +5342,7 @@ create_remotegroup_plan(PlannerInfo *root, Plan *local_plan)
remote_group->base_tlist = base_tlist;
remote_group->relname = "__FOREIGN_QUERY__";
remote_group->partitioned_replicated = remote_scan->partitioned_replicated;
+ remote_group->read_only = query->commandType == CMD_SELECT;
/*
* Only quals that can be pushed to the remote side are the ones in the having
@@ -5587,21 +5353,24 @@ create_remotegroup_plan(PlannerInfo *root, Plan *local_plan)
* node, may have local quals. In such case, we have to aggregate and group
* at coordinator and can not push the grouping clause to the datanodes. Is
* there a case in XC, where we can have local quals?
+ * we actually need not worry about costs since this is the final plan
*/
-
- /* We actually do not need to worry about costs since this is the final plan */
remote_group_plan->startup_cost = remote_scan->scan.plan.startup_cost;
remote_group_plan->total_cost = remote_scan->scan.plan.total_cost;
remote_group_plan->plan_rows = remote_scan->scan.plan.plan_rows;
remote_group_plan->plan_width = remote_scan->scan.plan.plan_width;
/*
- * Modify the passed in Group plan according to the remote query we built.
- * Materialization is always need for RemoteQuery in case we need to restart
+ * Modify the passed in grouping plan according to the remote query we built
+ * Materialization is always needed for RemoteQuery in case we need to restart
* the scan.
*/
- group_plan->plan.lefttree = (Plan *) make_material(remote_group_plan);
- return (Plan *)group_plan;
+ local_plan->lefttree = (Plan *) make_material(remote_group_plan);
+ /* indicate that we should apply collection function directly */
+ if (IsA(local_plan, Agg))
+ ((Agg *)local_plan)->skip_trans = true;
+
+ return local_plan;
}
/*
@@ -5638,4 +5407,127 @@ pgxc_locate_grouping_columns(PlannerInfo *root, List *tlist,
groupColIdx[keyno++] = te->resno;
}
}
+
+/*
+ * pgxc_process_grouping_targetlist
+ * The function scans the targetlist to check if the we can push anything
+ * from the targetlist to the datanode. Following rules govern the choice
+ * 1. Either all of the aggregates are pushed to the datanode or none is pushed
+ * 2. If there are no aggregates, the targetlist is good to be shipped as is
+ * 3. If aggregates are involved in expressions, we push the aggregates to the
+ * datanodes but not the involving expressions.
+ *
+ * The function constructs the targetlist for the query to be pushed to the
+ * datanode. It modifies the local targetlist to point to the expressions in
+ * remote targetlist wherever necessary (e.g. aggregates)
+ *
+ * PGXCTODO: we should be careful while pushing the function expressions, it's
+ * better to push functions like strlen() which can be evaluated at the
+ * datanode, but we should avoid pushing functions which can only be evaluated
+ * at coordinator.
+ */
+static List *
+pgxc_process_grouping_targetlist(PlannerInfo *root, List **local_tlist)
+{
+ bool shippable_remote_tlist = true;
+ List *remote_tlist = NIL;
+ int next_resno = 1; /* resno start from 1 */
+ List *orig_local_tlist = NIL;/* Copy original local_tlist, in case it changes */
+ ListCell *temp;
+ Query *query = root->parse;
+
+ /*
+ * Walk through the target list and find out whether we can push the
+ * aggregates and grouping to datanodes. We can do so if the target list
+ * contains plain aggregates (without any expression involving those) and
+ * expressions in group by clauses only (last one to make the query legit.
+ */
+ foreach(temp, *local_tlist)
+ {
+ TargetEntry *local_tle = lfirst(temp);
+ TargetEntry *remote_tle;
+ Node *expr = (Node *)local_tle->expr;
+
+ if (IsA(expr, Aggref))
+ {
+ Aggref *aggref = (Aggref *)expr;
+ if (aggref->aggorder || aggref->aggdistinct || aggref->agglevelsup)
+ {
+ shippable_remote_tlist = false;
+ break;
+ }
+ }
+ else if (query->hasAggs && checkExprHasAggs(expr))
+ {
+ /*
+ * Targetlist expressions which have aggregates embedded inside
+ * are not handled right now.
+ * PGXCTODO: We should be able to extract those aggregates out.
+ * Add those to remote targetlist and modify the local
+ * targetlist accordingly. Thus we get those aggregates grouped
+ * and "transitioned" at the datanode.
+ */
+ shippable_remote_tlist = false;
+ break;
+ }
+
+ remote_tle = makeTargetEntry(copyObject(expr),
+ next_resno++,
+ NULL,
+ false);
+ /* Copy GROUP BY/SORT BY reference for the locating group by columns */
+ remote_tle->ressortgroupref = local_tle->ressortgroupref;
+ remote_tlist = lappend(remote_tlist, remote_tle);
+
+ /*
+ * Replace the args of the local Aggref with Aggref node to be
+ * included in RemoteQuery node, so that set_plan_refs can convert
+ * the args into VAR pointing to the appropriate result in the tuple
+ * coming from RemoteQuery node
+ * PGXCTODO: should we push this change in targetlists of plans
+ * above?
+ */
+ if (IsA(expr, Aggref))
+ {
+ Aggref *local_aggref = (Aggref *)expr;
+ Aggref *remote_aggref = (Aggref *)remote_tle->expr;
+ Assert(IsA(remote_tle->expr, Aggref));
+ remote_aggref->aggtype = remote_aggref->aggtrantype;
+ /*
+ * We are about to change the local_tlist, check if we have already
+ * copied original local_tlist, if not take a copy
+ */
+ if (!orig_local_tlist)
+ orig_local_tlist = copyObject(*local_tlist);
+ /* Is copyObject() needed here? probably yes */
+ local_aggref->args = list_make1(makeTargetEntry(copyObject(remote_tle->expr),
+ 1, NULL,
+ false));
+ }
+ }
+
+ if (!shippable_remote_tlist)
+ {
+ /*
+ * If local_tlist has changed but we didn't find anything shippable to
+ * datanode, we need to restore the local_tlist to original state,
+ */
+ if (orig_local_tlist)
+ *local_tlist = orig_local_tlist;
+ if (remote_tlist)
+ list_free_deep(remote_tlist);
+ remote_tlist = NIL;
+ }
+ else if (orig_local_tlist)
+ {
+ /*
+ * If we have changed the targetlist passed, we need to pass back the
+ * changed targetlist. Free the copy that has been created.
+ */
+ list_free_deep(orig_local_tlist);
+ }
+
+ return remote_tlist;
+}
+
#endif
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index e64e938648..5250905b59 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1334,16 +1334,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
numGroups,
agg_counts.numAggs,
result_plan);
-#ifdef PGXC
- /*
- * Grouping will certainly not increase the number of rows
- * coordinator fetches from datanode, in fact it's expected to
- * reduce the number drastically. Hence, try pushing GROUP BY
- * clauses and aggregates to the datanode, thus saving bandwidth.
- */
- if (IS_PGXC_COORDINATOR && !IsConnFromCoord())
- result_plan = create_remoteagg_plan(root, result_plan);
-#endif /* PGXC */
/* Hashed aggregation produces randomly-ordered results */
current_pathkeys = NIL;
}
@@ -1415,16 +1405,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
extract_grouping_ops(parse->groupClause),
dNumGroups,
result_plan);
-#ifdef PGXC
- /*
- * Grouping will certainly not increase the number of rows
- * coordinator fetches from datanode, in fact it's expected to
- * reduce the number drastically. Hence, try pushing GROUP BY
- * clauses and aggregates to the datanode, thus saving bandwidth.
- */
- if (IS_PGXC_COORDINATOR && !IsConnFromCoord())
- result_plan = create_remotegroup_plan(root, result_plan);
-#endif /* PGXC */
}
else if (root->hasHavingQual)
{
@@ -1445,6 +1425,17 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
parse->havingQual,
NULL);
}
+#ifdef PGXC
+ /*
+ * Grouping will certainly not increase the number of rows
+ * coordinator fetches from datanode, in fact it's expected to
+ * reduce the number drastically. Hence, try pushing GROUP BY
+ * clauses and aggregates to the datanode, thus saving bandwidth.
+ */
+ if (IS_PGXC_COORDINATOR && !IsConnFromCoord())
+ result_plan = create_remotegrouping_plan(root, result_plan);
+#endif /* PGXC */
+
} /* end of non-minmax-aggregate case */
/*
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 8005d47c64..e1662eac8d 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -137,8 +137,7 @@ extern Var *search_tlist_for_var(Var *var, List *jtlist);
extern Plan *create_remoteinsert_plan(PlannerInfo *root, Plan *topplan);
extern Plan *create_remoteupdate_plan(PlannerInfo *root, Plan *topplan);
extern Plan *create_remotedelete_plan(PlannerInfo *root, Plan *topplan);
-extern Plan *create_remotegroup_plan(PlannerInfo *root, Plan *local_plan);
-extern Plan *create_remoteagg_plan(PlannerInfo *root, Plan *agg_plan);
+extern Plan *create_remotegrouping_plan(PlannerInfo *root, Plan *local_plan);
#endif
#endif /* PLANMAIN_H */
diff --git a/src/test/regress/expected/xc_groupby.out b/src/test/regress/expected/xc_groupby.out
index e403e37d47..31e9a8cb2e 100644
--- a/src/test/regress/expected/xc_groupby.out
+++ b/src/test/regress/expected/xc_groupby.out
@@ -1,3 +1,10 @@
+-- this file contains tests for GROUP BY with combinations of following
+-- 1. enable_hashagg = on/off (to force the grouping by sorting)
+-- 2. distributed or replicated tables across the datanodes
+-- If a testcase is added to any of the combinations, please check if it's
+-- applicable in other combinations as well.
+-- Combination 1: enable_hashagg on and distributed tables
+set enable_hashagg to on;
-- create required tables and fill them with data
create table tab1 (val int, val2 int);
create table tab2 (val int, val2 int);
@@ -11,6 +18,17 @@ select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 g
3 | 11 | 3.6666666666666667 | 3.66666666666667 | 3
(3 rows)
+explain verbose select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2;
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------------
+ HashAggregate (cost=1.03..1.06 rows=1 width=8)
+ Output: count(*), sum(val), avg(val), ((sum(val))::double precision / (count(*))::double precision), val2
+ -> Materialize (cost=0.00..1.01 rows=1 width=8)
+ Output: val, val2
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: val, val2
+(6 rows)
+
-- joins and group by
select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2;
count | sum | avg | ?column? | val2 | val2
@@ -34,14 +52,14 @@ explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val
Sort Key: tab1.val2
-> Materialize (cost=0.00..1.01 rows=1 width=8)
Output: tab1.val, tab1.val2
- -> Data Node Scan (Node Count [2]) "SELECT val, val2 FROM public.tab1 tab1" on tab1 (cost=0.00..1.01 rows=1000 width=8)
+ -> Data Node Scan (Node Count [2]) on tab1 (cost=0.00..1.01 rows=1000 width=8)
Output: tab1.val, tab1.val2
-> Sort (cost=1.02..1.03 rows=1 width=8)
Output: tab2.val, tab2.val2
Sort Key: tab2.val2
-> Materialize (cost=0.00..1.01 rows=1 width=8)
Output: tab2.val, tab2.val2
- -> Data Node Scan (Node Count [2]) "SELECT val, val2 FROM public.tab2 tab2" on tab2 (cost=0.00..1.01 rows=1000 width=8)
+ -> Data Node Scan (Node Count [2]) on tab2 (cost=0.00..1.01 rows=1000 width=8)
Output: tab2.val, tab2.val2
(19 rows)
@@ -57,17 +75,16 @@ explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group
QUERY PLAN
----------------------------------------------------------------------------------------
HashAggregate (cost=1.05..1.06 rows=1 width=12)
- Output: sum((sum(tab1.val))), ((tab1.val2 % 2))
+ Output: sum((pg_catalog.sum((sum(tab1.val))))), ((tab1.val2 % 2))
-> HashAggregate (cost=1.02..1.03 rows=1 width=8)
- Output: sum(tab1.val), (tab1.val2 % 2), tab1.val2
- -> Materialize (cost=0.00..1.01 rows=1 width=8)
- Output: tab1.val, tab1.val2
+ Output: pg_catalog.sum((sum(tab1.val))), ((tab1.val2 % 2)), tab1.val2
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (sum(tab1.val)), ((tab1.val2 % 2)), tab1.val2
-> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8)
- Output: tab1.val, tab1.val2
+ Output: sum(tab1.val), (tab1.val2 % 2), tab1.val2
(8 rows)
-- group by without aggregate
-set enable_hashagg to off;
select val2 from tab1 group by val2;
val2
------
@@ -79,7 +96,7 @@ select val2 from tab1 group by val2;
explain verbose select val2 from tab1 group by val2;
QUERY PLAN
----------------------------------------------------------------------------------
- Group (cost=1.02..1.03 rows=1 width=4)
+ HashAggregate (cost=1.02..1.03 rows=1 width=4)
Output: tab1.val2
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: tab1.val2
@@ -90,18 +107,18 @@ explain verbose select val2 from tab1 group by val2;
select val + val2 from tab1 group by val + val2;
?column?
----------
- 2
- 3
4
7
+ 3
8
9
+ 2
(6 rows)
explain verbose select val + val2 from tab1 group by val + val2;
QUERY PLAN
----------------------------------------------------------------------------------
- Group (cost=1.03..1.04 rows=1 width=8)
+ HashAggregate (cost=1.02..1.03 rows=1 width=8)
Output: ((tab1.val + tab1.val2))
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: ((tab1.val + tab1.val2))
@@ -112,12 +129,12 @@ explain verbose select val + val2 from tab1 group by val + val2;
select val + val2, val, val2 from tab1 group by val, val2;
?column? | val | val2
----------+-----+------
- 2 | 1 | 1
+ 7 | 4 | 3
+ 4 | 3 | 1
4 | 1 | 3
- 3 | 2 | 1
4 | 2 | 2
- 4 | 3 | 1
- 7 | 4 | 3
+ 3 | 2 | 1
+ 2 | 1 | 1
8 | 6 | 2
9 | 6 | 3
(8 rows)
@@ -125,7 +142,7 @@ select val + val2, val, val2 from tab1 group by val, val2;
explain verbose select val + val2, val, val2 from tab1 group by val, val2;
QUERY PLAN
----------------------------------------------------------------------------------
- Group (cost=1.02..1.04 rows=1 width=8)
+ HashAggregate (cost=1.02..1.03 rows=1 width=8)
Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
@@ -136,18 +153,18 @@ explain verbose select val + val2, val, val2 from tab1 group by val, val2;
select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
?column? | val | val2
----------+-----+------
- 2 | 1 | 1
- 6 | 2 | 4
5 | 3 | 2
- 7 | 3 | 4
5 | 4 | 1
6 | 4 | 2
+ 6 | 2 | 4
+ 2 | 1 | 1
+ 7 | 3 | 4
(6 rows)
explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
QUERY PLAN
-------------------------------------------------------------------------------
- Group (cost=0.01..0.02 rows=1 width=0)
+ HashAggregate (cost=0.00..0.01 rows=1 width=0)
Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
@@ -158,16 +175,16 @@ explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2
select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
?column?
----------
- 2
- 5
6
+ 2
7
+ 5
(4 rows)
explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
QUERY PLAN
-------------------------------------------------------------------------------
- Group (cost=0.01..0.02 rows=1 width=0)
+ HashAggregate (cost=0.00..0.01 rows=1 width=0)
Output: ((tab1.val + tab2.val2))
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: ((tab1.val + tab2.val2))
@@ -175,7 +192,6 @@ explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab
Output: (tab1.val + tab2.val2)
(6 rows)
-reset enable_hashagg;
-- group by with aggregates in expression
select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
?column? | val2
@@ -206,20 +222,366 @@ select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
(3 rows)
explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
- QUERY PLAN
-----------------------------------------------------------------------------------------
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------
HashAggregate (cost=1.02..1.04 rows=1 width=8)
- Output: sum(val), avg(val), ((2 * val2))
- -> Result (cost=0.00..1.02 rows=1 width=8)
- Output: val, val2, (2 * val2)
- -> Materialize (cost=0.00..1.01 rows=1 width=8)
- Output: val, val2
- -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8)
- Output: val, val2
-(8 rows)
+ Output: pg_catalog.sum((sum(tab1.val))), pg_catalog.avg((avg(tab1.val))), ((2 * tab1.val2))
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (sum(tab1.val)), (avg(tab1.val)), ((2 * tab1.val2))
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: sum(tab1.val), avg(tab1.val), (2 * tab1.val2)
+(6 rows)
drop table tab1;
drop table tab2;
+-- some tests involving nulls, characters, float type etc.
+create table def(a int, b varchar(25));
+insert into def VALUES (NULL, NULL);
+insert into def VALUES (1, NULL);
+insert into def VALUES (NULL, 'One');
+insert into def VALUES (2, 'Two');
+insert into def VALUES (2, 'Two');
+insert into def VALUES (3, 'Three');
+insert into def VALUES (4, 'Three');
+insert into def VALUES (5, 'Three');
+insert into def VALUES (6, 'Two');
+insert into def VALUES (7, NULL);
+insert into def VALUES (8, 'Two');
+insert into def VALUES (9, 'Three');
+insert into def VALUES (10, 'Three');
+select a,count(a) from def group by a order by a;
+ a | count
+----+-------
+ 1 | 1
+ 2 | 2
+ 3 | 1
+ 4 | 1
+ 5 | 1
+ 6 | 1
+ 7 | 1
+ 8 | 1
+ 9 | 1
+ 10 | 1
+ | 0
+(11 rows)
+
+explain verbose select a,count(a) from def group by a order by a;
+ QUERY PLAN
+----------------------------------------------------------------------------------------------
+ GroupAggregate (cost=1.02..1.05 rows=1 width=4)
+ Output: a, count(a)
+ -> Sort (cost=1.02..1.03 rows=1 width=4)
+ Output: a
+ Sort Key: def.a
+ -> Result (cost=0.00..1.01 rows=1 width=4)
+ Output: a
+ -> Materialize (cost=0.00..1.01 rows=1 width=4)
+ Output: a, b
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4)
+ Output: a, b
+(11 rows)
+
+select avg(a) from def group by a;
+ avg
+------------------------
+
+ 6.0000000000000000
+ 8.0000000000000000
+ 5.0000000000000000
+ 2.0000000000000000
+ 1.00000000000000000000
+ 9.0000000000000000
+ 3.0000000000000000
+ 10.0000000000000000
+ 7.0000000000000000
+ 4.0000000000000000
+(11 rows)
+
+select avg(a) from def group by a;
+ avg
+------------------------
+
+ 6.0000000000000000
+ 8.0000000000000000
+ 5.0000000000000000
+ 1.00000000000000000000
+ 9.0000000000000000
+ 2.0000000000000000
+ 10.0000000000000000
+ 7.0000000000000000
+ 3.0000000000000000
+ 4.0000000000000000
+(11 rows)
+
+explain verbose select avg(a) from def group by a;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ HashAggregate (cost=1.02..1.03 rows=1 width=4)
+ Output: pg_catalog.avg((avg(def.a))), def.a
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (avg(def.a)), def.a
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4)
+ Output: avg(def.a), def.a
+(6 rows)
+
+select avg(a) from def group by b;
+ avg
+--------------------
+ 4.0000000000000000
+
+ 4.5000000000000000
+ 6.2000000000000000
+(4 rows)
+
+explain verbose select avg(a) from def group by b;
+ QUERY PLAN
+-----------------------------------------------------------------------------------
+ HashAggregate (cost=1.02..1.03 rows=1 width=33)
+ Output: pg_catalog.avg((avg(def.a))), def.b
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (avg(def.a)), def.b
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=33)
+ Output: avg(def.a), def.b
+(6 rows)
+
+select sum(a) from def group by b;
+ sum
+-----
+ 8
+
+ 18
+ 31
+(4 rows)
+
+explain verbose select sum(a) from def group by b;
+ QUERY PLAN
+-----------------------------------------------------------------------------------
+ HashAggregate (cost=1.02..1.03 rows=1 width=33)
+ Output: pg_catalog.sum((sum(def.a))), def.b
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (sum(def.a)), def.b
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=33)
+ Output: sum(def.a), def.b
+(6 rows)
+
+select count(*) from def group by b;
+ count
+-------
+ 3
+ 1
+ 4
+ 5
+(4 rows)
+
+explain verbose select count(*) from def group by b;
+ QUERY PLAN
+-----------------------------------------------------------------------------------
+ HashAggregate (cost=1.02..1.03 rows=1 width=29)
+ Output: pg_catalog.count(*), def.b
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (count(*)), def.b
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29)
+ Output: count(*), def.b
+(6 rows)
+
+select count(*) from def where a is not null group by a;
+ count
+-------
+ 1
+ 1
+ 1
+ 1
+ 1
+ 1
+ 2
+ 1
+ 1
+ 1
+(10 rows)
+
+explain verbose select count(*) from def where a is not null group by a;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ HashAggregate (cost=1.02..1.03 rows=1 width=4)
+ Output: pg_catalog.count(*), def.a
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (count(*)), def.a
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4)
+ Output: count(*), def.a
+(6 rows)
+
+select b from def group by b;
+ b
+-------
+
+ One
+ Two
+ Three
+(4 rows)
+
+explain verbose select b from def group by b;
+ QUERY PLAN
+-----------------------------------------------------------------------------------
+ HashAggregate (cost=1.02..1.03 rows=1 width=29)
+ Output: def.b
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: def.b
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29)
+ Output: def.b
+(6 rows)
+
+select b,count(b) from def group by b;
+ b | count
+-------+-------
+ | 0
+ One | 1
+ Two | 4
+ Three | 5
+(4 rows)
+
+explain verbose select b,count(b) from def group by b;
+ QUERY PLAN
+-----------------------------------------------------------------------------------
+ HashAggregate (cost=1.02..1.03 rows=1 width=29)
+ Output: def.b, count((count(def.b)))
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: def.b, (count(def.b))
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29)
+ Output: def.b, count(def.b)
+(6 rows)
+
+select count(*) from def where b is null group by b;
+ count
+-------
+ 3
+(1 row)
+
+explain verbose select count(*) from def where b is null group by b;
+ QUERY PLAN
+-----------------------------------------------------------------------------------
+ HashAggregate (cost=1.02..1.03 rows=1 width=29)
+ Output: pg_catalog.count(*), def.b
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (count(*)), def.b
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29)
+ Output: count(*), def.b
+(6 rows)
+
+create table g(a int, b float, c numeric);
+insert into g values(1,2.1,3.2);
+insert into g values(1,2.1,3.2);
+insert into g values(2,2.3,5.2);
+select sum(a) from g group by a;
+ sum
+-----
+ 2
+ 2
+(2 rows)
+
+explain verbose select sum(a) from g group by a;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ HashAggregate (cost=1.02..1.03 rows=1 width=4)
+ Output: pg_catalog.sum((sum(g.a))), g.a
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (sum(g.a)), g.a
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4)
+ Output: sum(g.a), g.a
+(6 rows)
+
+select sum(b) from g group by b;
+ sum
+-----
+ 2.3
+ 4.2
+(2 rows)
+
+explain verbose select sum(b) from g group by b;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ HashAggregate (cost=1.02..1.03 rows=1 width=8)
+ Output: sum((sum(g.b))), g.b
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (sum(g.b)), g.b
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: sum(g.b), g.b
+(6 rows)
+
+select sum(c) from g group by b;
+ sum
+-----
+ 5.2
+ 6.4
+(2 rows)
+
+explain verbose select sum(c) from g group by b;
+ QUERY PLAN
+-----------------------------------------------------------------------------------
+ HashAggregate (cost=1.02..1.03 rows=1 width=40)
+ Output: sum((sum(g.c))), g.b
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (sum(g.c)), g.b
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=40)
+ Output: sum(g.c), g.b
+(6 rows)
+
+select avg(a) from g group by b;
+ avg
+------------------------
+ 2.0000000000000000
+ 1.00000000000000000000
+(2 rows)
+
+explain verbose select avg(a) from g group by b;
+ QUERY PLAN
+-----------------------------------------------------------------------------------
+ HashAggregate (cost=1.02..1.03 rows=1 width=12)
+ Output: pg_catalog.avg((avg(g.a))), g.b
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (avg(g.a)), g.b
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=12)
+ Output: avg(g.a), g.b
+(6 rows)
+
+select avg(b) from g group by c;
+ avg
+-----
+ 2.3
+ 2.1
+(2 rows)
+
+explain verbose select avg(b) from g group by c;
+ QUERY PLAN
+-----------------------------------------------------------------------------------
+ HashAggregate (cost=1.02..1.03 rows=1 width=40)
+ Output: pg_catalog.avg((avg(g.b))), g.c
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (avg(g.b)), g.c
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=40)
+ Output: avg(g.b), g.c
+(6 rows)
+
+select avg(c) from g group by c;
+ avg
+--------------------
+ 5.2000000000000000
+ 3.2000000000000000
+(2 rows)
+
+explain verbose select avg(c) from g group by c;
+ QUERY PLAN
+-----------------------------------------------------------------------------------
+ HashAggregate (cost=1.02..1.03 rows=1 width=32)
+ Output: pg_catalog.avg((avg(g.c))), g.c
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (avg(g.c)), g.c
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=32)
+ Output: avg(g.c), g.c
+(6 rows)
+
+drop table def;
+drop table g;
+-- Combination 2, enable_hashagg on and replicated tables.
-- repeat the same tests for replicated tables
-- create required tables and fill them with data
create table tab1 (val int, val2 int) distribute by replication;
@@ -268,14 +630,14 @@ explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val
Sort Key: tab1.val2
-> Materialize (cost=0.00..1.01 rows=1 width=8)
Output: tab1.val, tab1.val2
- -> Data Node Scan (Node Count [1]) "SELECT val, val2 FROM public.tab1 tab1" on tab1 (cost=0.00..1.01 rows=1000 width=8)
+ -> Data Node Scan (Node Count [1]) on tab1 (cost=0.00..1.01 rows=1000 width=8)
Output: tab1.val, tab1.val2
-> Sort (cost=1.02..1.03 rows=1 width=8)
Output: tab2.val, tab2.val2
Sort Key: tab2.val2
-> Materialize (cost=0.00..1.01 rows=1 width=8)
Output: tab2.val, tab2.val2
- -> Data Node Scan (Node Count [1]) "SELECT val, val2 FROM public.tab2 tab2" on tab2 (cost=0.00..1.01 rows=1000 width=8)
+ -> Data Node Scan (Node Count [1]) on tab2 (cost=0.00..1.01 rows=1000 width=8)
Output: tab2.val, tab2.val2
(19 rows)
@@ -291,17 +653,16 @@ explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group
QUERY PLAN
----------------------------------------------------------------------------------------
HashAggregate (cost=1.05..1.06 rows=1 width=12)
- Output: sum((sum(tab1.val))), ((tab1.val2 % 2))
+ Output: sum((pg_catalog.sum((sum(tab1.val))))), ((tab1.val2 % 2))
-> HashAggregate (cost=1.02..1.03 rows=1 width=8)
- Output: sum(tab1.val), (tab1.val2 % 2), tab1.val2
- -> Materialize (cost=0.00..1.01 rows=1 width=8)
- Output: tab1.val, tab1.val2
+ Output: pg_catalog.sum((sum(tab1.val))), ((tab1.val2 % 2)), tab1.val2
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (sum(tab1.val)), ((tab1.val2 % 2)), tab1.val2
-> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8)
- Output: tab1.val, tab1.val2
+ Output: sum(tab1.val), (tab1.val2 % 2), tab1.val2
(8 rows)
-- group by without aggregate
-set enable_hashagg to off;
select val2 from tab1 group by val2;
val2
------
@@ -313,7 +674,7 @@ select val2 from tab1 group by val2;
explain verbose select val2 from tab1 group by val2;
QUERY PLAN
----------------------------------------------------------------------------------
- Group (cost=1.02..1.03 rows=1 width=4)
+ HashAggregate (cost=1.02..1.03 rows=1 width=4)
Output: tab1.val2
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: tab1.val2
@@ -324,18 +685,18 @@ explain verbose select val2 from tab1 group by val2;
select val + val2 from tab1 group by val + val2;
?column?
----------
- 2
- 3
4
+ 3
7
8
9
+ 2
(6 rows)
explain verbose select val + val2 from tab1 group by val + val2;
QUERY PLAN
----------------------------------------------------------------------------------
- Group (cost=1.03..1.04 rows=1 width=8)
+ HashAggregate (cost=1.02..1.03 rows=1 width=8)
Output: ((tab1.val + tab1.val2))
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: ((tab1.val + tab1.val2))
@@ -346,12 +707,12 @@ explain verbose select val + val2 from tab1 group by val + val2;
select val + val2, val, val2 from tab1 group by val, val2;
?column? | val | val2
----------+-----+------
- 2 | 1 | 1
+ 7 | 4 | 3
+ 4 | 3 | 1
+ 4 | 2 | 2
4 | 1 | 3
3 | 2 | 1
- 4 | 2 | 2
- 4 | 3 | 1
- 7 | 4 | 3
+ 2 | 1 | 1
8 | 6 | 2
9 | 6 | 3
(8 rows)
@@ -359,7 +720,7 @@ select val + val2, val, val2 from tab1 group by val, val2;
explain verbose select val + val2, val, val2 from tab1 group by val, val2;
QUERY PLAN
----------------------------------------------------------------------------------
- Group (cost=1.02..1.04 rows=1 width=8)
+ HashAggregate (cost=1.02..1.03 rows=1 width=8)
Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
@@ -370,18 +731,18 @@ explain verbose select val + val2, val, val2 from tab1 group by val, val2;
select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
?column? | val | val2
----------+-----+------
- 2 | 1 | 1
- 6 | 2 | 4
5 | 3 | 2
- 7 | 3 | 4
5 | 4 | 1
6 | 4 | 2
+ 2 | 1 | 1
+ 6 | 2 | 4
+ 7 | 3 | 4
(6 rows)
explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
QUERY PLAN
-------------------------------------------------------------------------------
- Group (cost=0.01..0.02 rows=1 width=0)
+ HashAggregate (cost=0.00..0.01 rows=1 width=0)
Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
@@ -392,16 +753,16 @@ explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2
select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
?column?
----------
- 2
- 5
6
+ 2
7
+ 5
(4 rows)
explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
QUERY PLAN
-------------------------------------------------------------------------------
- Group (cost=0.01..0.02 rows=1 width=0)
+ HashAggregate (cost=0.00..0.01 rows=1 width=0)
Output: ((tab1.val + tab2.val2))
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: ((tab1.val + tab2.val2))
@@ -409,7 +770,6 @@ explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab
Output: (tab1.val + tab2.val2)
(6 rows)
-reset enable_hashagg;
-- group by with aggregates in expression
select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
?column? | val2
@@ -440,22 +800,20 @@ select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
(3 rows)
explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
- QUERY PLAN
-----------------------------------------------------------------------------------------
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------
HashAggregate (cost=1.02..1.04 rows=1 width=8)
- Output: sum(val), avg(val), ((2 * val2))
- -> Result (cost=0.00..1.02 rows=1 width=8)
- Output: val, val2, (2 * val2)
- -> Materialize (cost=0.00..1.01 rows=1 width=8)
- Output: val, val2
- -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8)
- Output: val, val2
-(8 rows)
+ Output: pg_catalog.sum((sum(tab1.val))), pg_catalog.avg((avg(tab1.val))), ((2 * tab1.val2))
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (sum(tab1.val)), (avg(tab1.val)), ((2 * tab1.val2))
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: sum(tab1.val), avg(tab1.val), (2 * tab1.val2)
+(6 rows)
drop table tab1;
drop table tab2;
-- some tests involving nulls, characters, float type etc.
-create table def(a int, b varchar(25));
+create table def(a int, b varchar(25)) distribute by replication;
insert into def VALUES (NULL, NULL);
insert into def VALUES (1, NULL);
insert into def VALUES (NULL, 'One');
@@ -497,7 +855,7 @@ explain verbose select a,count(a) from def group by a order by a;
Output: a
-> Materialize (cost=0.00..1.01 rows=1 width=4)
Output: a, b
- -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4)
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=4)
Output: a, b
(11 rows)
@@ -508,15 +866,26 @@ select avg(a) from def group by a;
6.0000000000000000
8.0000000000000000
5.0000000000000000
- 2.0000000000000000
1.00000000000000000000
9.0000000000000000
+ 2.0000000000000000
3.0000000000000000
10.0000000000000000
7.0000000000000000
4.0000000000000000
(11 rows)
+explain verbose select avg(a) from def group by a;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ HashAggregate (cost=1.02..1.03 rows=1 width=4)
+ Output: pg_catalog.avg((avg(def.a))), def.a
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (avg(def.a)), def.a
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=4)
+ Output: avg(def.a), def.a
+(6 rows)
+
select avg(a) from def group by a;
avg
------------------------
@@ -527,9 +896,9 @@ select avg(a) from def group by a;
1.00000000000000000000
9.0000000000000000
2.0000000000000000
+ 3.0000000000000000
10.0000000000000000
7.0000000000000000
- 3.0000000000000000
4.0000000000000000
(11 rows)
@@ -540,7 +909,7 @@ explain verbose select avg(a) from def group by a;
Output: pg_catalog.avg((avg(def.a))), def.a
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (avg(def.a)), def.a
- -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4)
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=4)
Output: avg(def.a), def.a
(6 rows)
@@ -560,7 +929,7 @@ explain verbose select avg(a) from def group by b;
Output: pg_catalog.avg((avg(def.a))), def.b
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (avg(def.a)), def.b
- -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=33)
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=33)
Output: avg(def.a), def.b
(6 rows)
@@ -580,7 +949,7 @@ explain verbose select sum(a) from def group by b;
Output: pg_catalog.sum((sum(def.a))), def.b
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (sum(def.a)), def.b
- -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=33)
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=33)
Output: sum(def.a), def.b
(6 rows)
@@ -600,7 +969,7 @@ explain verbose select count(*) from def group by b;
Output: pg_catalog.count(*), def.b
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (count(*)), def.b
- -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29)
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=29)
Output: count(*), def.b
(6 rows)
@@ -626,7 +995,7 @@ explain verbose select count(*) from def where a is not null group by a;
Output: pg_catalog.count(*), def.a
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (count(*)), def.a
- -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4)
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=4)
Output: count(*), def.a
(6 rows)
@@ -646,7 +1015,7 @@ explain verbose select b from def group by b;
Output: def.b
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: def.b
- -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29)
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=29)
Output: def.b
(6 rows)
@@ -666,7 +1035,7 @@ explain verbose select b,count(b) from def group by b;
Output: def.b, count((count(def.b)))
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: def.b, (count(def.b))
- -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29)
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=29)
Output: def.b, count(def.b)
(6 rows)
@@ -683,11 +1052,11 @@ explain verbose select count(*) from def where b is null group by b;
Output: pg_catalog.count(*), def.b
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (count(*)), def.b
- -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29)
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=29)
Output: count(*), def.b
(6 rows)
-create table g(a int, b float, c numeric);
+create table g(a int, b float, c numeric) distribute by replication;
insert into g values(1,2.1,3.2);
insert into g values(1,2.1,3.2);
insert into g values(2,2.3,5.2);
@@ -705,7 +1074,7 @@ explain verbose select sum(a) from g group by a;
Output: pg_catalog.sum((sum(g.a))), g.a
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (sum(g.a)), g.a
- -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4)
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=4)
Output: sum(g.a), g.a
(6 rows)
@@ -723,7 +1092,7 @@ explain verbose select sum(b) from g group by b;
Output: sum((sum(g.b))), g.b
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (sum(g.b)), g.b
- -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8)
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8)
Output: sum(g.b), g.b
(6 rows)
@@ -741,7 +1110,7 @@ explain verbose select sum(c) from g group by b;
Output: sum((sum(g.c))), g.b
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (sum(g.c)), g.b
- -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=40)
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=40)
Output: sum(g.c), g.b
(6 rows)
@@ -759,7 +1128,7 @@ explain verbose select avg(a) from g group by b;
Output: pg_catalog.avg((avg(g.a))), g.b
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (avg(g.a)), g.b
- -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=12)
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=12)
Output: avg(g.a), g.b
(6 rows)
@@ -777,7 +1146,7 @@ explain verbose select avg(b) from g group by c;
Output: pg_catalog.avg((avg(g.b))), g.c
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (avg(g.b)), g.c
- -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=40)
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=40)
Output: avg(g.b), g.c
(6 rows)
@@ -795,14 +1164,264 @@ explain verbose select avg(c) from g group by c;
Output: pg_catalog.avg((avg(g.c))), g.c
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (avg(g.c)), g.c
- -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=32)
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=32)
Output: avg(g.c), g.c
(6 rows)
drop table def;
drop table g;
--- same test with replicated tables
-create table def(a int, b varchar(25)) distribute by replication;
+reset enable_hashagg;
+-- Combination 3 enable_hashagg off and distributed tables
+set enable_hashagg to off;
+-- create required tables and fill them with data
+create table tab1 (val int, val2 int);
+create table tab2 (val int, val2 int);
+insert into tab1 values (1, 1), (2, 1), (3, 1), (2, 2), (6, 2), (4, 3), (1, 3), (6, 3);
+insert into tab2 values (1, 1), (4, 1), (8, 1), (2, 4), (9, 4), (3, 4), (4, 2), (5, 2), (3, 2);
+select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2;
+ count | sum | avg | ?column? | val2
+-------+-----+--------------------+------------------+------
+ 3 | 6 | 2.0000000000000000 | 2 | 1
+ 2 | 8 | 4.0000000000000000 | 4 | 2
+ 3 | 11 | 3.6666666666666667 | 3.66666666666667 | 3
+(3 rows)
+
+explain verbose select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2;
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------------
+ GroupAggregate (cost=1.02..1.07 rows=1 width=8)
+ Output: count(*), sum(val), avg(val), ((sum(val))::double precision / (count(*))::double precision), val2
+ -> Sort (cost=1.02..1.03 rows=1 width=8)
+ Output: val, val2
+ Sort Key: tab1.val2
+ -> Result (cost=0.00..1.01 rows=1 width=8)
+ Output: val, val2
+ -> Materialize (cost=0.00..1.01 rows=1 width=8)
+ Output: val, val2
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: val, val2
+(11 rows)
+
+-- joins and group by
+select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2;
+ count | sum | avg | ?column? | val2 | val2
+-------+-----+---------------------+------------------+------+------
+ 9 | 78 | 8.6666666666666667 | 8.66666666666667 | 1 | 1
+ 6 | 96 | 16.0000000000000000 | 16 | 2 | 2
+ 3 | | | | 3 |
+ 3 | | | | | 4
+(4 rows)
+
+explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2;
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ GroupAggregate (cost=2.08..2.14 rows=1 width=16)
+ Output: count(*), sum((tab1.val * tab2.val)), avg((tab1.val * tab2.val)), ((sum((tab1.val * tab2.val)))::double precision / (count(*))::double precision), tab1.val2, tab2.val2
+ -> Sort (cost=2.08..2.09 rows=1 width=16)
+ Output: tab1.val, tab2.val, tab1.val2, tab2.val2
+ Sort Key: tab1.val2, tab2.val2
+ -> Merge Full Join (cost=2.05..2.07 rows=1 width=16)
+ Output: tab1.val, tab2.val, tab1.val2, tab2.val2
+ Merge Cond: (tab1.val2 = tab2.val2)
+ -> Sort (cost=1.02..1.03 rows=1 width=8)
+ Output: tab1.val, tab1.val2
+ Sort Key: tab1.val2
+ -> Materialize (cost=0.00..1.01 rows=1 width=8)
+ Output: tab1.val, tab1.val2
+ -> Data Node Scan (Node Count [2]) on tab1 (cost=0.00..1.01 rows=1000 width=8)
+ Output: tab1.val, tab1.val2
+ -> Sort (cost=1.02..1.03 rows=1 width=8)
+ Output: tab2.val, tab2.val2
+ Sort Key: tab2.val2
+ -> Materialize (cost=0.00..1.01 rows=1 width=8)
+ Output: tab2.val, tab2.val2
+ -> Data Node Scan (Node Count [2]) on tab2 (cost=0.00..1.01 rows=1000 width=8)
+ Output: tab2.val, tab2.val2
+(22 rows)
+
+-- aggregates over aggregates
+select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
+ sum
+-----
+ 8
+ 17
+(2 rows)
+
+explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
+ QUERY PLAN
+----------------------------------------------------------------------------------------------------
+ GroupAggregate (cost=1.07..1.09 rows=1 width=12)
+ Output: sum(q1.y), q1.x
+ -> Sort (cost=1.07..1.07 rows=1 width=12)
+ Output: q1.y, q1.x
+ Sort Key: q1.x
+ -> Subquery Scan on q1 (cost=1.02..1.06 rows=1 width=12)
+ Output: q1.y, q1.x
+ -> GroupAggregate (cost=1.02..1.05 rows=1 width=8)
+ Output: pg_catalog.sum((sum(tab1.val))), ((tab1.val2 % 2)), tab1.val2
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (sum(tab1.val)), ((tab1.val2 % 2)), tab1.val2
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: sum(tab1.val), (tab1.val2 % 2), tab1.val2
+(13 rows)
+
+-- group by without aggregate
+select val2 from tab1 group by val2;
+ val2
+------
+ 1
+ 2
+ 3
+(3 rows)
+
+explain verbose select val2 from tab1 group by val2;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ Group (cost=1.02..1.03 rows=1 width=4)
+ Output: tab1.val2
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: tab1.val2
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4)
+ Output: tab1.val2
+(6 rows)
+
+select val + val2 from tab1 group by val + val2;
+ ?column?
+----------
+ 2
+ 3
+ 4
+ 7
+ 8
+ 9
+(6 rows)
+
+explain verbose select val + val2 from tab1 group by val + val2;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ Group (cost=1.03..1.04 rows=1 width=8)
+ Output: ((tab1.val + tab1.val2))
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab1.val2))
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: (tab1.val + tab1.val2)
+(6 rows)
+
+select val + val2, val, val2 from tab1 group by val, val2;
+ ?column? | val | val2
+----------+-----+------
+ 2 | 1 | 1
+ 4 | 1 | 3
+ 3 | 2 | 1
+ 4 | 2 | 2
+ 4 | 3 | 1
+ 7 | 4 | 3
+ 8 | 6 | 2
+ 9 | 6 | 3
+(8 rows)
+
+explain verbose select val + val2, val, val2 from tab1 group by val, val2;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ Group (cost=1.02..1.04 rows=1 width=8)
+ Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: (tab1.val + tab1.val2), tab1.val, tab1.val2
+(6 rows)
+
+select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+ ?column? | val | val2
+----------+-----+------
+ 2 | 1 | 1
+ 6 | 2 | 4
+ 5 | 3 | 2
+ 7 | 3 | 4
+ 5 | 4 | 1
+ 6 | 4 | 2
+(6 rows)
+
+explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ Group (cost=0.01..0.02 rows=1 width=0)
+ Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1 width=4)
+ Output: (tab1.val + tab2.val2), tab1.val, tab2.val2
+(6 rows)
+
+select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+ ?column?
+----------
+ 2
+ 5
+ 6
+ 7
+(4 rows)
+
+explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ Group (cost=0.01..0.02 rows=1 width=0)
+ Output: ((tab1.val + tab2.val2))
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab2.val2))
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1 width=4)
+ Output: (tab1.val + tab2.val2)
+(6 rows)
+
+-- group by with aggregates in expression
+select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
+ ?column? | val2
+---------------------+------
+ 11.0000000000000000 | 1
+ 14.0000000000000000 | 2
+ 17.6666666666666667 | 3
+(3 rows)
+
+explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
+ QUERY PLAN
+----------------------------------------------------------------------------------------------
+ GroupAggregate (cost=1.02..1.06 rows=1 width=8)
+ Output: (((count(*) + sum(val)))::numeric + avg(val)), val2
+ -> Sort (cost=1.02..1.03 rows=1 width=8)
+ Output: val, val2
+ Sort Key: tab1.val2
+ -> Result (cost=0.00..1.01 rows=1 width=8)
+ Output: val, val2
+ -> Materialize (cost=0.00..1.01 rows=1 width=8)
+ Output: val, val2
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: val, val2
+(11 rows)
+
+-- group by with expressions in group by clause
+select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
+ sum | avg | ?column?
+-----+--------------------+----------
+ 6 | 2.0000000000000000 | 2
+ 8 | 4.0000000000000000 | 4
+ 11 | 3.6666666666666667 | 6
+(3 rows)
+
+explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------
+ GroupAggregate (cost=1.03..1.06 rows=1 width=8)
+ Output: pg_catalog.sum((sum(tab1.val))), pg_catalog.avg((avg(tab1.val))), ((2 * tab1.val2))
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (sum(tab1.val)), (avg(tab1.val)), ((2 * tab1.val2))
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: sum(tab1.val), avg(tab1.val), (2 * tab1.val2)
+(6 rows)
+
+drop table tab1;
+drop table tab2;
+-- some tests involving nulls, characters, float type etc.
+create table def(a int, b varchar(25));
insert into def VALUES (NULL, NULL);
insert into def VALUES (1, NULL);
insert into def VALUES (NULL, 'One');
@@ -844,30 +1463,626 @@ explain verbose select a,count(a) from def group by a order by a;
Output: a
-> Materialize (cost=0.00..1.01 rows=1 width=4)
Output: a, b
- -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=4)
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4)
Output: a, b
(11 rows)
select avg(a) from def group by a;
avg
------------------------
-
+ 1.00000000000000000000
+ 2.0000000000000000
+ 3.0000000000000000
+ 4.0000000000000000
+ 5.0000000000000000
6.0000000000000000
+ 7.0000000000000000
8.0000000000000000
- 5.0000000000000000
- 1.00000000000000000000
9.0000000000000000
+ 10.0000000000000000
+
+(11 rows)
+
+select avg(a) from def group by a;
+ avg
+------------------------
+ 1.00000000000000000000
2.0000000000000000
3.0000000000000000
- 10.0000000000000000
+ 4.0000000000000000
+ 5.0000000000000000
+ 6.0000000000000000
7.0000000000000000
+ 8.0000000000000000
+ 9.0000000000000000
+ 10.0000000000000000
+
+(11 rows)
+
+explain verbose select avg(a) from def group by a;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ GroupAggregate (cost=1.02..1.05 rows=1 width=4)
+ Output: pg_catalog.avg((avg(def.a))), def.a
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (avg(def.a)), def.a
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4)
+ Output: avg(def.a), def.a
+(6 rows)
+
+select avg(a) from def group by b;
+ avg
+--------------------
+
+ 6.2000000000000000
+ 4.5000000000000000
+ 4.0000000000000000
+(4 rows)
+
+explain verbose select avg(a) from def group by b;
+ QUERY PLAN
+-----------------------------------------------------------------------------------
+ GroupAggregate (cost=1.02..1.05 rows=1 width=33)
+ Output: pg_catalog.avg((avg(def.a))), def.b
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (avg(def.a)), def.b
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=33)
+ Output: avg(def.a), def.b
+(6 rows)
+
+select sum(a) from def group by b;
+ sum
+-----
+
+ 31
+ 18
+ 8
+(4 rows)
+
+explain verbose select sum(a) from def group by b;
+ QUERY PLAN
+-----------------------------------------------------------------------------------
+ GroupAggregate (cost=1.02..1.05 rows=1 width=33)
+ Output: pg_catalog.sum((sum(def.a))), def.b
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (sum(def.a)), def.b
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=33)
+ Output: sum(def.a), def.b
+(6 rows)
+
+select count(*) from def group by b;
+ count
+-------
+ 1
+ 5
+ 4
+ 3
+(4 rows)
+
+explain verbose select count(*) from def group by b;
+ QUERY PLAN
+-----------------------------------------------------------------------------------
+ GroupAggregate (cost=1.02..1.05 rows=1 width=29)
+ Output: pg_catalog.count(*), def.b
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (count(*)), def.b
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29)
+ Output: count(*), def.b
+(6 rows)
+
+select count(*) from def where a is not null group by a;
+ count
+-------
+ 1
+ 2
+ 1
+ 1
+ 1
+ 1
+ 1
+ 1
+ 1
+ 1
+(10 rows)
+
+explain verbose select count(*) from def where a is not null group by a;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ GroupAggregate (cost=1.02..1.05 rows=1 width=4)
+ Output: pg_catalog.count(*), def.a
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (count(*)), def.a
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4)
+ Output: count(*), def.a
+(6 rows)
+
+select b from def group by b;
+ b
+-------
+ One
+ Three
+ Two
+
+(4 rows)
+
+explain verbose select b from def group by b;
+ QUERY PLAN
+-----------------------------------------------------------------------------------
+ Group (cost=1.02..1.03 rows=1 width=29)
+ Output: def.b
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: def.b
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29)
+ Output: def.b
+(6 rows)
+
+select b,count(b) from def group by b;
+ b | count
+-------+-------
+ One | 1
+ Three | 5
+ Two | 4
+ | 0
+(4 rows)
+
+explain verbose select b,count(b) from def group by b;
+ QUERY PLAN
+-----------------------------------------------------------------------------------
+ GroupAggregate (cost=1.02..1.05 rows=1 width=29)
+ Output: def.b, count((count(def.b)))
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: def.b, (count(def.b))
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29)
+ Output: def.b, count(def.b)
+(6 rows)
+
+select count(*) from def where b is null group by b;
+ count
+-------
+ 3
+(1 row)
+
+explain verbose select count(*) from def where b is null group by b;
+ QUERY PLAN
+-----------------------------------------------------------------------------------
+ GroupAggregate (cost=1.02..1.05 rows=1 width=29)
+ Output: pg_catalog.count(*), def.b
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (count(*)), def.b
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29)
+ Output: count(*), def.b
+(6 rows)
+
+create table g(a int, b float, c numeric);
+insert into g values(1,2.1,3.2);
+insert into g values(1,2.1,3.2);
+insert into g values(2,2.3,5.2);
+select sum(a) from g group by a;
+ sum
+-----
+ 2
+ 2
+(2 rows)
+
+explain verbose select sum(a) from g group by a;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ GroupAggregate (cost=1.02..1.05 rows=1 width=4)
+ Output: pg_catalog.sum((sum(g.a))), g.a
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (sum(g.a)), g.a
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4)
+ Output: sum(g.a), g.a
+(6 rows)
+
+select sum(b) from g group by b;
+ sum
+-----
+ 4.2
+ 2.3
+(2 rows)
+
+explain verbose select sum(b) from g group by b;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ GroupAggregate (cost=1.02..1.05 rows=1 width=8)
+ Output: sum((sum(g.b))), g.b
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (sum(g.b)), g.b
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: sum(g.b), g.b
+(6 rows)
+
+select sum(c) from g group by b;
+ sum
+-----
+ 6.4
+ 5.2
+(2 rows)
+
+explain verbose select sum(c) from g group by b;
+ QUERY PLAN
+-----------------------------------------------------------------------------------
+ GroupAggregate (cost=1.02..1.05 rows=1 width=40)
+ Output: sum((sum(g.c))), g.b
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (sum(g.c)), g.b
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=40)
+ Output: sum(g.c), g.b
+(6 rows)
+
+select avg(a) from g group by b;
+ avg
+------------------------
+ 1.00000000000000000000
+ 2.0000000000000000
+(2 rows)
+
+explain verbose select avg(a) from g group by b;
+ QUERY PLAN
+-----------------------------------------------------------------------------------
+ GroupAggregate (cost=1.02..1.05 rows=1 width=12)
+ Output: pg_catalog.avg((avg(g.a))), g.b
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (avg(g.a)), g.b
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=12)
+ Output: avg(g.a), g.b
+(6 rows)
+
+select avg(b) from g group by c;
+ avg
+-----
+ 2.1
+ 2.3
+(2 rows)
+
+explain verbose select avg(b) from g group by c;
+ QUERY PLAN
+-----------------------------------------------------------------------------------
+ GroupAggregate (cost=1.02..1.05 rows=1 width=40)
+ Output: pg_catalog.avg((avg(g.b))), g.c
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (avg(g.b)), g.c
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=40)
+ Output: avg(g.b), g.c
+(6 rows)
+
+select avg(c) from g group by c;
+ avg
+--------------------
+ 3.2000000000000000
+ 5.2000000000000000
+(2 rows)
+
+explain verbose select avg(c) from g group by c;
+ QUERY PLAN
+-----------------------------------------------------------------------------------
+ GroupAggregate (cost=1.02..1.05 rows=1 width=32)
+ Output: pg_catalog.avg((avg(g.c))), g.c
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (avg(g.c)), g.c
+ -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=32)
+ Output: avg(g.c), g.c
+(6 rows)
+
+drop table def;
+drop table g;
+-- Combination 4 enable_hashagg off and replicated tables.
+-- repeat the same tests for replicated tables
+-- create required tables and fill them with data
+create table tab1 (val int, val2 int) distribute by replication;
+create table tab2 (val int, val2 int) distribute by replication;
+insert into tab1 values (1, 1), (2, 1), (3, 1), (2, 2), (6, 2), (4, 3), (1, 3), (6, 3);
+insert into tab2 values (1, 1), (4, 1), (8, 1), (2, 4), (9, 4), (3, 4), (4, 2), (5, 2), (3, 2);
+select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2;
+ count | sum | avg | ?column? | val2
+-------+-----+--------------------+------------------+------
+ 3 | 6 | 2.0000000000000000 | 2 | 1
+ 2 | 8 | 4.0000000000000000 | 4 | 2
+ 3 | 11 | 3.6666666666666667 | 3.66666666666667 | 3
+(3 rows)
+
+explain verbose select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2;
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------------
+ GroupAggregate (cost=1.02..1.07 rows=1 width=8)
+ Output: count(*), sum(val), avg(val), ((sum(val))::double precision / (count(*))::double precision), val2
+ -> Sort (cost=1.02..1.03 rows=1 width=8)
+ Output: val, val2
+ Sort Key: tab1.val2
+ -> Result (cost=0.00..1.01 rows=1 width=8)
+ Output: val, val2
+ -> Materialize (cost=0.00..1.01 rows=1 width=8)
+ Output: val, val2
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: val, val2
+(11 rows)
+
+-- joins and group by
+select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2;
+ count | sum | avg | ?column? | val2 | val2
+-------+-----+---------------------+------------------+------+------
+ 9 | 78 | 8.6666666666666667 | 8.66666666666667 | 1 | 1
+ 6 | 96 | 16.0000000000000000 | 16 | 2 | 2
+ 3 | | | | 3 |
+ 3 | | | | | 4
+(4 rows)
+
+explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2;
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ GroupAggregate (cost=2.08..2.14 rows=1 width=16)
+ Output: count(*), sum((tab1.val * tab2.val)), avg((tab1.val * tab2.val)), ((sum((tab1.val * tab2.val)))::double precision / (count(*))::double precision), tab1.val2, tab2.val2
+ -> Sort (cost=2.08..2.09 rows=1 width=16)
+ Output: tab1.val, tab2.val, tab1.val2, tab2.val2
+ Sort Key: tab1.val2, tab2.val2
+ -> Merge Full Join (cost=2.05..2.07 rows=1 width=16)
+ Output: tab1.val, tab2.val, tab1.val2, tab2.val2
+ Merge Cond: (tab1.val2 = tab2.val2)
+ -> Sort (cost=1.02..1.03 rows=1 width=8)
+ Output: tab1.val, tab1.val2
+ Sort Key: tab1.val2
+ -> Materialize (cost=0.00..1.01 rows=1 width=8)
+ Output: tab1.val, tab1.val2
+ -> Data Node Scan (Node Count [1]) on tab1 (cost=0.00..1.01 rows=1000 width=8)
+ Output: tab1.val, tab1.val2
+ -> Sort (cost=1.02..1.03 rows=1 width=8)
+ Output: tab2.val, tab2.val2
+ Sort Key: tab2.val2
+ -> Materialize (cost=0.00..1.01 rows=1 width=8)
+ Output: tab2.val, tab2.val2
+ -> Data Node Scan (Node Count [1]) on tab2 (cost=0.00..1.01 rows=1000 width=8)
+ Output: tab2.val, tab2.val2
+(22 rows)
+
+-- aggregates over aggregates
+select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
+ sum
+-----
+ 8
+ 17
+(2 rows)
+
+explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
+ QUERY PLAN
+----------------------------------------------------------------------------------------------------
+ GroupAggregate (cost=1.07..1.09 rows=1 width=12)
+ Output: sum(q1.y), q1.x
+ -> Sort (cost=1.07..1.07 rows=1 width=12)
+ Output: q1.y, q1.x
+ Sort Key: q1.x
+ -> Subquery Scan on q1 (cost=1.02..1.06 rows=1 width=12)
+ Output: q1.y, q1.x
+ -> GroupAggregate (cost=1.02..1.05 rows=1 width=8)
+ Output: pg_catalog.sum((sum(tab1.val))), ((tab1.val2 % 2)), tab1.val2
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (sum(tab1.val)), ((tab1.val2 % 2)), tab1.val2
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: sum(tab1.val), (tab1.val2 % 2), tab1.val2
+(13 rows)
+
+-- group by without aggregate
+select val2 from tab1 group by val2;
+ val2
+------
+ 1
+ 2
+ 3
+(3 rows)
+
+explain verbose select val2 from tab1 group by val2;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ Group (cost=1.02..1.03 rows=1 width=4)
+ Output: tab1.val2
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: tab1.val2
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=4)
+ Output: tab1.val2
+(6 rows)
+
+select val + val2 from tab1 group by val + val2;
+ ?column?
+----------
+ 2
+ 3
+ 4
+ 7
+ 8
+ 9
+(6 rows)
+
+explain verbose select val + val2 from tab1 group by val + val2;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ Group (cost=1.03..1.04 rows=1 width=8)
+ Output: ((tab1.val + tab1.val2))
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab1.val2))
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: (tab1.val + tab1.val2)
+(6 rows)
+
+select val + val2, val, val2 from tab1 group by val, val2;
+ ?column? | val | val2
+----------+-----+------
+ 2 | 1 | 1
+ 4 | 1 | 3
+ 3 | 2 | 1
+ 4 | 2 | 2
+ 4 | 3 | 1
+ 7 | 4 | 3
+ 8 | 6 | 2
+ 9 | 6 | 3
+(8 rows)
+
+explain verbose select val + val2, val, val2 from tab1 group by val, val2;
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ Group (cost=1.02..1.04 rows=1 width=8)
+ Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: (tab1.val + tab1.val2), tab1.val, tab1.val2
+(6 rows)
+
+select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+ ?column? | val | val2
+----------+-----+------
+ 2 | 1 | 1
+ 6 | 2 | 4
+ 5 | 3 | 2
+ 7 | 3 | 4
+ 5 | 4 | 1
+ 6 | 4 | 2
+(6 rows)
+
+explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ Group (cost=0.01..0.02 rows=1 width=0)
+ Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1 width=4)
+ Output: (tab1.val + tab2.val2), tab1.val, tab2.val2
+(6 rows)
+
+select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+ ?column?
+----------
+ 2
+ 5
+ 6
+ 7
+(4 rows)
+
+explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ Group (cost=0.01..0.02 rows=1 width=0)
+ Output: ((tab1.val + tab2.val2))
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: ((tab1.val + tab2.val2))
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1 width=4)
+ Output: (tab1.val + tab2.val2)
+(6 rows)
+
+-- group by with aggregates in expression
+select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
+ ?column? | val2
+---------------------+------
+ 11.0000000000000000 | 1
+ 14.0000000000000000 | 2
+ 17.6666666666666667 | 3
+(3 rows)
+
+explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
+ QUERY PLAN
+----------------------------------------------------------------------------------------------
+ GroupAggregate (cost=1.02..1.06 rows=1 width=8)
+ Output: (((count(*) + sum(val)))::numeric + avg(val)), val2
+ -> Sort (cost=1.02..1.03 rows=1 width=8)
+ Output: val, val2
+ Sort Key: tab1.val2
+ -> Result (cost=0.00..1.01 rows=1 width=8)
+ Output: val, val2
+ -> Materialize (cost=0.00..1.01 rows=1 width=8)
+ Output: val, val2
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: val, val2
+(11 rows)
+
+-- group by with expressions in group by clause
+select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
+ sum | avg | ?column?
+-----+--------------------+----------
+ 6 | 2.0000000000000000 | 2
+ 8 | 4.0000000000000000 | 4
+ 11 | 3.6666666666666667 | 6
+(3 rows)
+
+explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------
+ GroupAggregate (cost=1.03..1.06 rows=1 width=8)
+ Output: pg_catalog.sum((sum(tab1.val))), pg_catalog.avg((avg(tab1.val))), ((2 * tab1.val2))
+ -> Materialize (cost=0.00..0.00 rows=0 width=0)
+ Output: (sum(tab1.val)), (avg(tab1.val)), ((2 * tab1.val2))
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8)
+ Output: sum(tab1.val), avg(tab1.val), (2 * tab1.val2)
+(6 rows)
+
+drop table tab1;
+drop table tab2;
+-- some tests involving nulls, characters, float type etc.
+create table def(a int, b varchar(25)) distribute by replication;
+insert into def VALUES (NULL, NULL);
+insert into def VALUES (1, NULL);
+insert into def VALUES (NULL, 'One');
+insert into def VALUES (2, 'Two');
+insert into def VALUES (2, 'Two');
+insert into def VALUES (3, 'Three');
+insert into def VALUES (4, 'Three');
+insert into def VALUES (5, 'Three');
+insert into def VALUES (6, 'Two');
+insert into def VALUES (7, NULL);
+insert into def VALUES (8, 'Two');
+insert into def VALUES (9, 'Three');
+insert into def VALUES (10, 'Three');
+select a,count(a) from def group by a order by a;
+ a | count
+----+-------
+ 1 | 1
+ 2 | 2
+ 3 | 1
+ 4 | 1
+ 5 | 1
+ 6 | 1
+ 7 | 1
+ 8 | 1
+ 9 | 1
+ 10 | 1
+ | 0
+(11 rows)
+
+explain verbose select a,count(a) from def group by a order by a;
+ QUERY PLAN
+----------------------------------------------------------------------------------------------
+ GroupAggregate (cost=1.02..1.05 rows=1 width=4)
+ Output: a, count(a)
+ -> Sort (cost=1.02..1.03 rows=1 width=4)
+ Output: a
+ Sort Key: def.a
+ -> Result (cost=0.00..1.01 rows=1 width=4)
+ Output: a
+ -> Materialize (cost=0.00..1.01 rows=1 width=4)
+ Output: a, b
+ -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=4)
+ Output: a, b
+(11 rows)
+
+select avg(a) from def group by a;
+ avg
+------------------------
+ 1.00000000000000000000
+ 2.0000000000000000
+ 3.0000000000000000
4.0000000000000000
+ 5.0000000000000000
+ 6.0000000000000000
+ 7.0000000000000000
+ 8.0000000000000000
+ 9.0000000000000000
+ 10.0000000000000000
+
(11 rows)
explain verbose select avg(a) from def group by a;
QUERY PLAN
----------------------------------------------------------------------------------
- HashAggregate (cost=1.02..1.03 rows=1 width=4)
+ GroupAggregate (cost=1.02..1.05 rows=1 width=4)
Output: pg_catalog.avg((avg(def.a))), def.a
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (avg(def.a)), def.a
@@ -878,23 +2093,23 @@ explain verbose select avg(a) from def group by a;
select avg(a) from def group by a;
avg
------------------------
-
- 6.0000000000000000
- 8.0000000000000000
- 5.0000000000000000
1.00000000000000000000
- 9.0000000000000000
2.0000000000000000
3.0000000000000000
- 10.0000000000000000
- 7.0000000000000000
4.0000000000000000
+ 5.0000000000000000
+ 6.0000000000000000
+ 7.0000000000000000
+ 8.0000000000000000
+ 9.0000000000000000
+ 10.0000000000000000
+
(11 rows)
explain verbose select avg(a) from def group by a;
QUERY PLAN
----------------------------------------------------------------------------------
- HashAggregate (cost=1.02..1.03 rows=1 width=4)
+ GroupAggregate (cost=1.02..1.05 rows=1 width=4)
Output: pg_catalog.avg((avg(def.a))), def.a
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (avg(def.a)), def.a
@@ -905,16 +2120,16 @@ explain verbose select avg(a) from def group by a;
select avg(a) from def group by b;
avg
--------------------
- 4.0000000000000000
- 4.5000000000000000
6.2000000000000000
+ 4.5000000000000000
+ 4.0000000000000000
(4 rows)
explain verbose select avg(a) from def group by b;
QUERY PLAN
-----------------------------------------------------------------------------------
- HashAggregate (cost=1.02..1.03 rows=1 width=33)
+ GroupAggregate (cost=1.02..1.05 rows=1 width=33)
Output: pg_catalog.avg((avg(def.a))), def.b
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (avg(def.a)), def.b
@@ -925,16 +2140,16 @@ explain verbose select avg(a) from def group by b;
select sum(a) from def group by b;
sum
-----
- 8
- 18
31
+ 18
+ 8
(4 rows)
explain verbose select sum(a) from def group by b;
QUERY PLAN
-----------------------------------------------------------------------------------
- HashAggregate (cost=1.02..1.03 rows=1 width=33)
+ GroupAggregate (cost=1.02..1.05 rows=1 width=33)
Output: pg_catalog.sum((sum(def.a))), def.b
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (sum(def.a)), def.b
@@ -945,16 +2160,16 @@ explain verbose select sum(a) from def group by b;
select count(*) from def group by b;
count
-------
- 3
1
- 4
5
+ 4
+ 3
(4 rows)
explain verbose select count(*) from def group by b;
QUERY PLAN
-----------------------------------------------------------------------------------
- HashAggregate (cost=1.02..1.03 rows=1 width=29)
+ GroupAggregate (cost=1.02..1.05 rows=1 width=29)
Output: pg_catalog.count(*), def.b
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (count(*)), def.b
@@ -966,12 +2181,12 @@ select count(*) from def where a is not null group by a;
count
-------
1
+ 2
1
1
1
1
1
- 2
1
1
1
@@ -980,7 +2195,7 @@ select count(*) from def where a is not null group by a;
explain verbose select count(*) from def where a is not null group by a;
QUERY PLAN
----------------------------------------------------------------------------------
- HashAggregate (cost=1.02..1.03 rows=1 width=4)
+ GroupAggregate (cost=1.02..1.05 rows=1 width=4)
Output: pg_catalog.count(*), def.a
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (count(*)), def.a
@@ -991,16 +2206,16 @@ explain verbose select count(*) from def where a is not null group by a;
select b from def group by b;
b
-------
-
One
- Two
Three
+ Two
+
(4 rows)
explain verbose select b from def group by b;
QUERY PLAN
-----------------------------------------------------------------------------------
- HashAggregate (cost=1.02..1.03 rows=1 width=29)
+ Group (cost=1.02..1.03 rows=1 width=29)
Output: def.b
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: def.b
@@ -1011,16 +2226,16 @@ explain verbose select b from def group by b;
select b,count(b) from def group by b;
b | count
-------+-------
- | 0
One | 1
- Two | 4
Three | 5
+ Two | 4
+ | 0
(4 rows)
explain verbose select b,count(b) from def group by b;
QUERY PLAN
-----------------------------------------------------------------------------------
- HashAggregate (cost=1.02..1.03 rows=1 width=29)
+ GroupAggregate (cost=1.02..1.05 rows=1 width=29)
Output: def.b, count((count(def.b)))
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: def.b, (count(def.b))
@@ -1037,7 +2252,7 @@ select count(*) from def where b is null group by b;
explain verbose select count(*) from def where b is null group by b;
QUERY PLAN
-----------------------------------------------------------------------------------
- HashAggregate (cost=1.02..1.03 rows=1 width=29)
+ GroupAggregate (cost=1.02..1.05 rows=1 width=29)
Output: pg_catalog.count(*), def.b
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (count(*)), def.b
@@ -1059,7 +2274,7 @@ select sum(a) from g group by a;
explain verbose select sum(a) from g group by a;
QUERY PLAN
----------------------------------------------------------------------------------
- HashAggregate (cost=1.02..1.03 rows=1 width=4)
+ GroupAggregate (cost=1.02..1.05 rows=1 width=4)
Output: pg_catalog.sum((sum(g.a))), g.a
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (sum(g.a)), g.a
@@ -1070,14 +2285,14 @@ explain verbose select sum(a) from g group by a;
select sum(b) from g group by b;
sum
-----
- 2.3
4.2
+ 2.3
(2 rows)
explain verbose select sum(b) from g group by b;
QUERY PLAN
----------------------------------------------------------------------------------
- HashAggregate (cost=1.02..1.03 rows=1 width=8)
+ GroupAggregate (cost=1.02..1.05 rows=1 width=8)
Output: sum((sum(g.b))), g.b
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (sum(g.b)), g.b
@@ -1088,14 +2303,14 @@ explain verbose select sum(b) from g group by b;
select sum(c) from g group by b;
sum
-----
- 5.2
6.4
+ 5.2
(2 rows)
explain verbose select sum(c) from g group by b;
QUERY PLAN
-----------------------------------------------------------------------------------
- HashAggregate (cost=1.02..1.03 rows=1 width=40)
+ GroupAggregate (cost=1.02..1.05 rows=1 width=40)
Output: sum((sum(g.c))), g.b
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (sum(g.c)), g.b
@@ -1106,14 +2321,14 @@ explain verbose select sum(c) from g group by b;
select avg(a) from g group by b;
avg
------------------------
- 2.0000000000000000
1.00000000000000000000
+ 2.0000000000000000
(2 rows)
explain verbose select avg(a) from g group by b;
QUERY PLAN
-----------------------------------------------------------------------------------
- HashAggregate (cost=1.02..1.03 rows=1 width=12)
+ GroupAggregate (cost=1.02..1.05 rows=1 width=12)
Output: pg_catalog.avg((avg(g.a))), g.b
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (avg(g.a)), g.b
@@ -1124,14 +2339,14 @@ explain verbose select avg(a) from g group by b;
select avg(b) from g group by c;
avg
-----
- 2.3
2.1
+ 2.3
(2 rows)
explain verbose select avg(b) from g group by c;
QUERY PLAN
-----------------------------------------------------------------------------------
- HashAggregate (cost=1.02..1.03 rows=1 width=40)
+ GroupAggregate (cost=1.02..1.05 rows=1 width=40)
Output: pg_catalog.avg((avg(g.b))), g.c
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (avg(g.b)), g.c
@@ -1142,14 +2357,14 @@ explain verbose select avg(b) from g group by c;
select avg(c) from g group by c;
avg
--------------------
- 5.2000000000000000
3.2000000000000000
+ 5.2000000000000000
(2 rows)
explain verbose select avg(c) from g group by c;
QUERY PLAN
-----------------------------------------------------------------------------------
- HashAggregate (cost=1.02..1.03 rows=1 width=32)
+ GroupAggregate (cost=1.02..1.05 rows=1 width=32)
Output: pg_catalog.avg((avg(g.c))), g.c
-> Materialize (cost=0.00..0.00 rows=0 width=0)
Output: (avg(g.c)), g.c
@@ -1159,3 +2374,4 @@ explain verbose select avg(c) from g group by c;
drop table def;
drop table g;
+reset enable_hashagg;
diff --git a/src/test/regress/sql/xc_groupby.sql b/src/test/regress/sql/xc_groupby.sql
index 1fd0f50e11..f262d7281e 100644
--- a/src/test/regress/sql/xc_groupby.sql
+++ b/src/test/regress/sql/xc_groupby.sql
@@ -1,9 +1,18 @@
+-- this file contains tests for GROUP BY with combinations of following
+-- 1. enable_hashagg = on/off (to force the grouping by sorting)
+-- 2. distributed or replicated tables across the datanodes
+-- If a testcase is added to any of the combinations, please check if it's
+-- applicable in other combinations as well.
+
+-- Combination 1: enable_hashagg on and distributed tables
+set enable_hashagg to on;
-- create required tables and fill them with data
create table tab1 (val int, val2 int);
create table tab2 (val int, val2 int);
insert into tab1 values (1, 1), (2, 1), (3, 1), (2, 2), (6, 2), (4, 3), (1, 3), (6, 3);
insert into tab2 values (1, 1), (4, 1), (8, 1), (2, 4), (9, 4), (3, 4), (4, 2), (5, 2), (3, 2);
select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2;
+explain verbose select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2;
-- joins and group by
select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2;
explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2;
@@ -11,7 +20,6 @@ explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val
select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
-- group by without aggregate
-set enable_hashagg to off;
select val2 from tab1 group by val2;
explain verbose select val2 from tab1 group by val2;
select val + val2 from tab1 group by val + val2;
@@ -22,7 +30,6 @@ select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val
explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
-reset enable_hashagg;
-- group by with aggregates in expression
select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
@@ -32,6 +39,66 @@ explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
drop table tab1;
drop table tab2;
+-- some tests involving nulls, characters, float type etc.
+create table def(a int, b varchar(25));
+insert into def VALUES (NULL, NULL);
+insert into def VALUES (1, NULL);
+insert into def VALUES (NULL, 'One');
+insert into def VALUES (2, 'Two');
+insert into def VALUES (2, 'Two');
+insert into def VALUES (3, 'Three');
+insert into def VALUES (4, 'Three');
+insert into def VALUES (5, 'Three');
+insert into def VALUES (6, 'Two');
+insert into def VALUES (7, NULL);
+insert into def VALUES (8, 'Two');
+insert into def VALUES (9, 'Three');
+insert into def VALUES (10, 'Three');
+
+select a,count(a) from def group by a order by a;
+explain verbose select a,count(a) from def group by a order by a;
+select avg(a) from def group by a;
+select avg(a) from def group by a;
+explain verbose select avg(a) from def group by a;
+select avg(a) from def group by b;
+explain verbose select avg(a) from def group by b;
+select sum(a) from def group by b;
+explain verbose select sum(a) from def group by b;
+select count(*) from def group by b;
+explain verbose select count(*) from def group by b;
+select count(*) from def where a is not null group by a;
+explain verbose select count(*) from def where a is not null group by a;
+
+select b from def group by b;
+explain verbose select b from def group by b;
+select b,count(b) from def group by b;
+explain verbose select b,count(b) from def group by b;
+select count(*) from def where b is null group by b;
+explain verbose select count(*) from def where b is null group by b;
+
+create table g(a int, b float, c numeric);
+insert into g values(1,2.1,3.2);
+insert into g values(1,2.1,3.2);
+insert into g values(2,2.3,5.2);
+
+select sum(a) from g group by a;
+explain verbose select sum(a) from g group by a;
+select sum(b) from g group by b;
+explain verbose select sum(b) from g group by b;
+select sum(c) from g group by b;
+explain verbose select sum(c) from g group by b;
+
+select avg(a) from g group by b;
+explain verbose select avg(a) from g group by b;
+select avg(b) from g group by c;
+explain verbose select avg(b) from g group by c;
+select avg(c) from g group by c;
+explain verbose select avg(c) from g group by c;
+
+drop table def;
+drop table g;
+
+-- Combination 2, enable_hashagg on and replicated tables.
-- repeat the same tests for replicated tables
-- create required tables and fill them with data
create table tab1 (val int, val2 int) distribute by replication;
@@ -47,7 +114,6 @@ explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val
select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
-- group by without aggregate
-set enable_hashagg to off;
select val2 from tab1 group by val2;
explain verbose select val2 from tab1 group by val2;
select val + val2 from tab1 group by val + val2;
@@ -58,7 +124,102 @@ select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val
explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+-- group by with aggregates in expression
+select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
+explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
+-- group by with expressions in group by clause
+select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
+explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
+drop table tab1;
+drop table tab2;
+
+-- some tests involving nulls, characters, float type etc.
+create table def(a int, b varchar(25)) distribute by replication;
+insert into def VALUES (NULL, NULL);
+insert into def VALUES (1, NULL);
+insert into def VALUES (NULL, 'One');
+insert into def VALUES (2, 'Two');
+insert into def VALUES (2, 'Two');
+insert into def VALUES (3, 'Three');
+insert into def VALUES (4, 'Three');
+insert into def VALUES (5, 'Three');
+insert into def VALUES (6, 'Two');
+insert into def VALUES (7, NULL);
+insert into def VALUES (8, 'Two');
+insert into def VALUES (9, 'Three');
+insert into def VALUES (10, 'Three');
+
+select a,count(a) from def group by a order by a;
+explain verbose select a,count(a) from def group by a order by a;
+select avg(a) from def group by a;
+explain verbose select avg(a) from def group by a;
+select avg(a) from def group by a;
+explain verbose select avg(a) from def group by a;
+select avg(a) from def group by b;
+explain verbose select avg(a) from def group by b;
+select sum(a) from def group by b;
+explain verbose select sum(a) from def group by b;
+select count(*) from def group by b;
+explain verbose select count(*) from def group by b;
+select count(*) from def where a is not null group by a;
+explain verbose select count(*) from def where a is not null group by a;
+
+select b from def group by b;
+explain verbose select b from def group by b;
+select b,count(b) from def group by b;
+explain verbose select b,count(b) from def group by b;
+select count(*) from def where b is null group by b;
+explain verbose select count(*) from def where b is null group by b;
+
+create table g(a int, b float, c numeric) distribute by replication;
+insert into g values(1,2.1,3.2);
+insert into g values(1,2.1,3.2);
+insert into g values(2,2.3,5.2);
+
+select sum(a) from g group by a;
+explain verbose select sum(a) from g group by a;
+select sum(b) from g group by b;
+explain verbose select sum(b) from g group by b;
+select sum(c) from g group by b;
+explain verbose select sum(c) from g group by b;
+
+select avg(a) from g group by b;
+explain verbose select avg(a) from g group by b;
+select avg(b) from g group by c;
+explain verbose select avg(b) from g group by c;
+select avg(c) from g group by c;
+explain verbose select avg(c) from g group by c;
+
+drop table def;
+drop table g;
reset enable_hashagg;
+
+-- Combination 3 enable_hashagg off and distributed tables
+set enable_hashagg to off;
+-- create required tables and fill them with data
+create table tab1 (val int, val2 int);
+create table tab2 (val int, val2 int);
+insert into tab1 values (1, 1), (2, 1), (3, 1), (2, 2), (6, 2), (4, 3), (1, 3), (6, 3);
+insert into tab2 values (1, 1), (4, 1), (8, 1), (2, 4), (9, 4), (3, 4), (4, 2), (5, 2), (3, 2);
+select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2;
+explain verbose select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2;
+-- joins and group by
+select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2;
+explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2;
+-- aggregates over aggregates
+select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
+explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
+-- group by without aggregate
+select val2 from tab1 group by val2;
+explain verbose select val2 from tab1 group by val2;
+select val + val2 from tab1 group by val + val2;
+explain verbose select val + val2 from tab1 group by val + val2;
+select val + val2, val, val2 from tab1 group by val, val2;
+explain verbose select val + val2, val, val2 from tab1 group by val, val2;
+select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
-- group by with aggregates in expression
select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
@@ -127,7 +288,42 @@ explain verbose select avg(c) from g group by c;
drop table def;
drop table g;
--- same test with replicated tables
+-- Combination 4 enable_hashagg off and replicated tables.
+-- repeat the same tests for replicated tables
+-- create required tables and fill them with data
+create table tab1 (val int, val2 int) distribute by replication;
+create table tab2 (val int, val2 int) distribute by replication;
+insert into tab1 values (1, 1), (2, 1), (3, 1), (2, 2), (6, 2), (4, 3), (1, 3), (6, 3);
+insert into tab2 values (1, 1), (4, 1), (8, 1), (2, 4), (9, 4), (3, 4), (4, 2), (5, 2), (3, 2);
+select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2;
+explain verbose select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2;
+-- joins and group by
+select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2;
+explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2;
+-- aggregates over aggregates
+select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
+explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x;
+-- group by without aggregate
+select val2 from tab1 group by val2;
+explain verbose select val2 from tab1 group by val2;
+select val + val2 from tab1 group by val + val2;
+explain verbose select val + val2 from tab1 group by val + val2;
+select val + val2, val, val2 from tab1 group by val, val2;
+explain verbose select val + val2, val, val2 from tab1 group by val, val2;
+select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2;
+select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2;
+-- group by with aggregates in expression
+select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
+explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2;
+-- group by with expressions in group by clause
+select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
+explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2;
+drop table tab1;
+drop table tab2;
+
+-- some tests involving nulls, characters, float type etc.
create table def(a int, b varchar(25)) distribute by replication;
insert into def VALUES (NULL, NULL);
insert into def VALUES (1, NULL);
@@ -186,3 +382,4 @@ explain verbose select avg(c) from g group by c;
drop table def;
drop table g;
+reset enable_hashagg;