diff options
| -rw-r--r-- | src/backend/optimizer/plan/createplan.c | 612 | ||||
| -rw-r--r-- | src/backend/optimizer/plan/planner.c | 31 | ||||
| -rw-r--r-- | src/include/optimizer/planmain.h | 3 | ||||
| -rw-r--r-- | src/test/regress/expected/xc_groupby.out | 1484 | ||||
| -rw-r--r-- | src/test/regress/sql/xc_groupby.sql | 205 |
5 files changed, 1815 insertions, 520 deletions
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 97016f9723..a666914d7a 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -41,6 +41,7 @@ #include "catalog/pg_proc.h" #include "catalog/pg_type.h" #include "executor/executor.h" +#include "rewrite/rewriteManip.h" #endif #include "utils/lsyscache.h" @@ -89,6 +90,8 @@ static Alias *generate_remote_rte_alias(RangeTblEntry *rte, int varno, char *aliasname, int reduce_level); static void pgxc_locate_grouping_columns(PlannerInfo *root, List *tlist, AttrNumber *grpColIdx); +static List *pgxc_process_grouping_targetlist(PlannerInfo *root, + List **local_tlist); #endif static NestLoop *create_nestloop_plan(PlannerInfo *root, NestPath *best_path, Plan *outer_plan, Plan *inner_plan); @@ -5056,7 +5059,7 @@ create_remotedelete_plan(PlannerInfo *root, Plan *topplan) } /* - * create_remoteagg_plan + * create_remotegrouping_plan * Check if the grouping and aggregates can be pushed down to the * datanodes. * Right now we can push with following restrictions @@ -5064,6 +5067,7 @@ create_remotedelete_plan(PlannerInfo *root, Plan *topplan) * expressions in group by clauses * 2. No distinct or order by clauses * 3. No windowing clause + * 4. No having clause * * Inputs * root - planerInfo root for this query @@ -5075,306 +5079,14 @@ create_remotedelete_plan(PlannerInfo *root, Plan *topplan) * node in case there are no local clauses. */ Plan * -create_remoteagg_plan(PlannerInfo *root, Plan *local_plan) +create_remotegrouping_plan(PlannerInfo *root, Plan *local_plan) { Query *query = root->parse; - RemoteQuery *agg_left; - Plan *temp_plan = local_plan->lefttree; - List *agg_tlist = local_plan->targetlist; - StringInfo remote_sql_stmt = makeStringInfo(); - StringInfo remote_targetlist = makeStringInfo(); - StringInfo remote_fromlist = makeStringInfo(); - StringInfo groupby_clause = makeStringInfo(); - StringInfo in_alias = makeStringInfo(); - ListCell *temp; - ListCell *temp_remote; - RemoteQuery *agg_remote; - Plan *agg_remote_plan; - RangeTblEntry *dummy_rte; - Index dummy_rtindex; - List *base_tlist; - Agg *agg_plan = NULL; - List *temp_vars; /* temporarily hold the VARs */ - List *temp_vartlist; /* temporarity hold tlist of VARs */ - Relids in_relids; /* the list of Relids referenced by - * the Agg plan - */ - - /* For now only Agg plans */ - Assert(IsA(local_plan, Agg)); - agg_plan = (Agg *)local_plan; - - /* - * We don't push aggregation and grouping to datanodes, in case there are - * windowing aggregates, distinct, having clause or sort clauses. - */ - if (query->hasWindowFuncs || - query->distinctClause || - query->sortClause || - query->havingQual) - return local_plan; - - /* - * Optimize if only the tree underneath is reduced to RemoteQuery, any other - * node there indicates that the scans can not be completely pushed to the - * remote data nodes. - * RemoteQuery is hidden underneath Material plan, take it out. - */ - if (IsA(temp_plan, Material)) - temp_plan = temp_plan->lefttree; - if (!IsA(temp_plan, RemoteQuery)) - return local_plan; - else - agg_left = (RemoteQuery *)temp_plan; - - /* - * Walk through the target list and find out whether we can push the - * aggregates and grouping to datanodes. We can do so if the target list - * contains plain aggregates (without any expression involving those) and - * expressions in group by clauses only (last one to make the query legit. - */ - foreach(temp, agg_tlist) - { - TargetEntry *tle = lfirst(temp); - Node *expr = (Node *)tle->expr; - - /* - * PGXCTODO: once we allow sort clauses to be pushed to data nodes, - * along with group by clause, this condition will need to be changed. - */ - if (!(IsA(expr, Aggref) || tle->ressortgroupref > 0)) - return local_plan; - } - - /* - * Cleared of all the charges, now take following steps - * 1. Create a remote query node reflecting the query to be pushed to the - * datanode - * 2. Modify the Agg node passed in so that it reflects the aggregation - * (collection) to be done at the coordinator based on the results sent by - * the datanodes. - */ - appendStringInfo(in_alias, "%s_%d", "group", root->rs_alias_index); - - /* Find all the relations referenced by targetlist of Agg node */ - temp_vars = pull_var_clause((Node *)agg_tlist, PVC_REJECT_PLACEHOLDERS); - findReferencedVars(temp_vars, (Plan *)agg_left, &temp_vartlist, &in_relids); - - /* - * Build partial RemoteQuery node to be used for creating the Select clause - * to be sent to the remote node. Rest of the node will be built later - */ - agg_remote = makeNode(RemoteQuery); - - /* - * Save information about the plan we are reducing. - * We may need this information later if more entries are added to it - * as part of the remote expression optimization. - */ - agg_remote->remotejoin = false; - agg_remote->inner_alias = pstrdup(in_alias->data); - agg_remote->inner_reduce_level = agg_left->reduce_level; - agg_remote->inner_relids = in_relids; - agg_remote->inner_statement = pstrdup(agg_left->sql_statement); - agg_remote->exec_nodes = agg_left->exec_nodes; - - /* Don't forget to increment the index for the next time around! */ - agg_remote->reduce_level = root->rs_alias_index++; - - /* Generate the select clause of the remote query */ - appendStringInfoString(remote_targetlist, "SELECT"); - foreach (temp, agg_tlist) - { - TargetEntry *tle = lfirst(temp); - Node *expr = (Node *)tle->expr; - - create_remote_expr(root, local_plan, remote_targetlist, expr, agg_remote); - - /* If this is not last target entry, add a comma with space */ - if (lnext(temp)) - appendStringInfoString(remote_targetlist, ","); - } - - /* Generate the from clause of the remote query */ - appendStringInfo(remote_fromlist, "FROM (%s) %s", - agg_remote->inner_statement, agg_remote->inner_alias); - - /* - * Generate group by clause for the remote query and recompute the group by - * columE.n locations - */ - if (query->groupClause) - { - int cntCols; - Assert(IsA(local_plan, Agg)); - - /* - * Recompute the column ids of the grouping columns, - * the group column indexes computed earlier point in the - * targetlists of the scan plans under this node. But now the grouping - * column indexes will be pointing in the targetlist of the new - * RemoteQuery, hence those need to be recomputed. - */ - pgxc_locate_grouping_columns(root, agg_tlist, agg_plan->grpColIdx); - - appendStringInfoString(groupby_clause, "GROUP BY "); - for (cntCols = 0; cntCols < agg_plan->numCols; cntCols++) - { - appendStringInfo(groupby_clause, "%d", - agg_plan->grpColIdx[cntCols]); - if (cntCols < agg_plan->numCols - 1) - appendStringInfoString(groupby_clause, ", "); - } - } - - /* Generate the remote sql statement from the pieces */ - appendStringInfo(remote_sql_stmt, "%s %s %s", remote_targetlist->data, - remote_fromlist->data, groupby_clause->data); - - /* - * Set the base_tlist for the RemoteQuery node being created, it's used to - * create the tuple descriptor for the result from RemoteQuery and rewrite - * the Aggregates targetlist accept the results of the RemoteQuery. - */ - base_tlist = add_to_flat_tlist(NIL, get_tlist_exprs(agg_tlist, true)); - - /* - * We need to change the return types of the aggregates. Datanodes send the - * aggregation results in the form of transition results. - */ - foreach (temp, base_tlist) - { - TargetEntry *tle = lfirst(temp); - Node *expr = (Node *)tle->expr; - Aggref *agg; - - if (IsA(expr, Aggref)) - { - agg = (Aggref *)expr; - agg->aggtype = agg->aggtrantype; - } - } - - /* - * Create a dummy RTE for the remote query being created. Append the dummy - * range table entry to the range table. Note that this modifies the master - * copy the caller passed us, otherwise e.g EXPLAIN VERBOSE will fail to - * find the rte the Vars built below refer to. Also create the tuple - * descriptor for the result of this query from the base_tlist (targetlist - * we used to generate the remote node query). - */ - dummy_rte = makeNode(RangeTblEntry); - dummy_rte->reltupdesc = ExecTypeFromTL(base_tlist, false); - dummy_rte->rtekind = RTE_RELATION; - - /* Use a dummy relname... */ - dummy_rte->relname = "__FOREIGN_QUERY__"; - dummy_rte->eref = makeAlias("__FOREIGN_QUERY__", NIL); - - /* Rest will be zeroed out in makeNode() */ - root->parse->rtable = lappend(root->parse->rtable, dummy_rte); - dummy_rtindex = list_length(root->parse->rtable); - - /* - * Change the aggref nodes in the local Agg plan to accept the transition - * results from the remote query output. Do this after we have created base - * list, otherwise we might introduce these changes in the base list. - * Do this after the RTE for the remote query is added to the root. - */ - forboth (temp, agg_tlist, temp_remote, base_tlist) - { - TargetEntry *tle = lfirst(temp); - Node *expr = (Node *)tle->expr; - Aggref *agg; - TargetEntry *tle_remote = lfirst(temp_remote); - Node *expr_remote = (Node *)tle_remote->expr; - - if (IsA(expr, Aggref)) - { - Assert(IsA(expr_remote, Aggref)); - - /* - * Replace the args of the local Aggref with Aggref node to be - * included in RemoteQuery node, so that set_plan_refs can convert - * the args into VAR pointing to the appropriate result in the tuple - * coming from RemoteQuery node. - * PGXCTODO: should we push this change in targetlists of plans - * above? - */ - agg = (Aggref *)expr; - agg->args = list_make1(makeTargetEntry(copyObject(expr_remote), 1, NULL, false)); - } - } - - /* Build rest of the RemoteQuery node and the plan there */ - agg_remote_plan = &agg_remote->scan.plan; - - /* The join targetlist becomes this node's tlist */ - agg_remote_plan->targetlist = base_tlist; - agg_remote_plan->lefttree = NULL; - agg_remote_plan->righttree = NULL; - agg_remote->scan.scanrelid = dummy_rtindex; - agg_remote->sql_statement = remote_sql_stmt->data; - - /* set_plan_refs needs this later */ - agg_remote->base_tlist = base_tlist; - agg_remote->relname = "__FOREIGN_QUERY__"; - agg_remote->partitioned_replicated = agg_left->partitioned_replicated; - - /* - * Only quals that can be pushed to the remote side the ones in the having - * clause. Till we work out how to handle having quals in XC, we don't have - * any quals here. - * PGXCTODO: the RemoteQuery node that was earlier the lefttree of Agg - * node, may have local quals. In such case, we have to aggregate and group - * at coordinator and can not push the grouping clause to the datanodes. Is - * there a case in XC, where we can have local quals? - * We actually need not worry about costs since this is the final plan. - */ - agg_remote_plan->startup_cost = agg_left->scan.plan.startup_cost; - agg_remote_plan->total_cost = agg_left->scan.plan.total_cost; - agg_remote_plan->plan_rows = agg_left->scan.plan.plan_rows; - agg_remote_plan->plan_width = agg_left->scan.plan.plan_width; - - /* - * Modify the passed in Agg plan according to the remote query we built. - * Materialization is always needed for RemoteQuery in case we need to restart - * the scan. - */ - agg_plan->plan.lefttree = (Plan *) make_material(agg_remote_plan); - - /* Indicate that we should apply collection function directly */ - agg_plan->skip_trans = true; - - return (Plan *)agg_plan; -} - -/* - * create_remotegroup_plan - * Given a Group plan, try to push as much of the query to the datanodes and - * build a Group plan to combiner the results across the datanodes. The Sort - * node under the Group plan is pushed down to RemoteQuery plan, since the - * combiner knows how to merge the results across datanodes in sorted manner. - * Hence there is no separate Sort node. - * - * This optimization is applied under following conditions - * 1. The scan plans under the Group->Sort node is RemoteQuery - * 2. There is not separate Sort, distinct, having clause in the query. - * - * PGXCTODO: we should lift up as many of these restrictions as possible or give - * reasons why those restrictions are needed. - */ -Plan * -create_remotegroup_plan(PlannerInfo *root, Plan *local_plan) -{ - Group *group_plan; - Query *query = root->parse; Sort *sort_plan; RemoteQuery *remote_scan; /* remote query in the passed in plan */ RemoteQuery *remote_group; /* remote query after optimization */ Plan *remote_group_plan; /* plan portion of remote_group */ Plan *temp_plan; - List *local_tlist; /* target list of the local plan */ List *temp_vars; /* temporarily hold the VARs */ List *temp_vartlist; /* temporarity hold tlist of VARs */ ListCell *temp; @@ -5384,59 +5096,110 @@ create_remotegroup_plan(PlannerInfo *root, Plan *local_plan) StringInfo orderby_clause = makeStringInfo(); /* remote query ORDER BY */ StringInfo remote_fromlist = makeStringInfo(); /* remote query FROM */ StringInfo in_alias = makeStringInfo(); - Relids in_relids; + Relids in_relids; /* the list of Relids referenced by lefttree */ Index dummy_rtindex; List *base_tlist; RangeTblEntry *dummy_rte; - int cntCols; + int numGroupCols; + AttrNumber *grpColIdx; - if (query->havingQual || + /* + * We don't push aggregation and grouping to datanodes, in case there are + * windowing aggregates, distinct, having clause or sort clauses. + */ + if (query->hasWindowFuncs || query->distinctClause || query->sortClause || - query->hasWindowFuncs) + query->havingQual) return local_plan; - /* For now only for Group plans are treated */ - Assert(IsA(local_plan, Group)); - group_plan = (Group *)local_plan; - remote_scan = NULL; - temp_plan = local_plan->lefttree; + /* + * PGXCTODO: we don't support the parameterised queries yet. So, for the + * time being we don't apply the optimizations for parameterised queries + */ + if (root->glob->boundParams) + return local_plan; + /* for now only Agg/Group plans */ + if (local_plan && IsA(local_plan, Agg)) + { + numGroupCols = ((Agg *)local_plan)->numCols; + grpColIdx = ((Agg *)local_plan)->grpColIdx; + } + else if (local_plan && IsA(local_plan, Group)) + { + numGroupCols = ((Group *)local_plan)->numCols; + grpColIdx = ((Group *)local_plan)->grpColIdx; + } + else + return local_plan; /* - * We expect plan tree as Group->Sort->{Result}?->{Material}?->RemoteQuery, + * We expect plan tree as Group/Agg->Sort->Result->Material->RemoteQuery, + * Result, Material nodes are optional. Sort is compulsory for Group but not + * for Agg. * anything else is not handled right now. */ - if (IsA(temp_plan, Sort)) + temp_plan = local_plan->lefttree; + remote_scan = NULL; + sort_plan = NULL; + if (temp_plan && IsA(temp_plan, Sort)) { sort_plan = (Sort *)temp_plan; temp_plan = temp_plan->lefttree; } - if (IsA(temp_plan, Result)) + if (temp_plan && IsA(temp_plan, Result)) temp_plan = temp_plan->lefttree; - if (IsA(temp_plan, Material)) + if (temp_plan && IsA(temp_plan, Material)) temp_plan = temp_plan->lefttree; - if (IsA(temp_plan, RemoteQuery)) + if (temp_plan && IsA(temp_plan, RemoteQuery)) remote_scan = (RemoteQuery *)temp_plan; - if (!remote_scan || !sort_plan) + if (!remote_scan) + return local_plan; + /* + * for Group plan we expect Sort under the Group, which is always the case, + * the condition below is really for some possible non-existent case + */ + if (IsA(local_plan, Group) && !sort_plan) return local_plan; - Assert(IsA(remote_scan, RemoteQuery)); - Assert(IsA(sort_plan, Sort)); /* - * grouping_planner will add Sort node before Group node to sort the rows + * Grouping_planner may add Sort node to sort the rows * based on the columns in GROUP BY clause. Hence the columns in Sort and * those in Group node in should be same. The columns are usually in the * same order in both nodes, hence check the equality in order. If this - * condition fails, we can not handle this GROUP plan for now. + * condition fails, we can not handle this plan for now. */ - if (sort_plan->numCols != group_plan->numCols) - return local_plan; - for (cntCols = 0; cntCols < group_plan->numCols; cntCols++) + if (sort_plan) { - if (sort_plan->sortColIdx[cntCols] != group_plan->grpColIdx[cntCols]) + int cntCols; + if (sort_plan->numCols != numGroupCols) return local_plan; + for (cntCols = 0; cntCols < numGroupCols; cntCols++) + { + if (sort_plan->sortColIdx[cntCols] != grpColIdx[cntCols]) + return local_plan; + } + } + + /* find all the relations referenced by targetlist of Grouping node */ + temp_vars = pull_var_clause((Node *)local_plan->targetlist, + PVC_REJECT_PLACEHOLDERS); + findReferencedVars(temp_vars, (Plan *)remote_scan, &temp_vartlist, &in_relids); + + /* + * process the targetlist of the grouping plan, also construct the + * targetlist of the query to be shipped to the remote side + */ + base_tlist = pgxc_process_grouping_targetlist(root, &(local_plan->targetlist)); + if (!base_tlist) + { + /* + * for some reason we can not construct a targetlist shippable to the + * datanode. Resort to the plan created by grouping_planner() + */ + return local_plan; } /* @@ -5444,16 +5207,11 @@ create_remotegroup_plan(PlannerInfo *root, Plan *local_plan) * datanode. * 1. Create a remote query node reflecting the query to be pushed to the * datanode. - * 2. Modify the Group node passed in, to accept the results sent by the - * datanodes and group them. + * 2. Modify the Grouping node passed in, to accept the results sent by the + * Datanodes, then group and aggregate them, if needed. */ - local_tlist = local_plan->targetlist; appendStringInfo(in_alias, "%s_%d", "group", root->rs_alias_index); - /* Find all the relations referenced by targetlist of Group node */ - temp_vars = pull_var_clause((Node *)local_tlist, PVC_REJECT_PLACEHOLDERS); - findReferencedVars(temp_vars, (Plan *)remote_scan, &temp_vartlist, &in_relids); - /* * Build partial RemoteQuery node to be used for creating the Select clause * to be sent to the remote node. Rest of the node will be built later @@ -5471,13 +5229,12 @@ create_remotegroup_plan(PlannerInfo *root, Plan *local_plan) remote_group->inner_relids = in_relids; remote_group->inner_statement = pstrdup(remote_scan->sql_statement); remote_group->exec_nodes = remote_scan->exec_nodes; - /* Don't forget to increment the index for the next time around! */ remote_group->reduce_level = root->rs_alias_index++; /* Generate the select clause of the remote query */ appendStringInfoString(remote_targetlist, "SELECT"); - foreach (temp, local_tlist) + foreach (temp, base_tlist) { TargetEntry *tle = lfirst(temp); Node *expr = (Node *)tle->expr; @@ -5503,32 +5260,47 @@ create_remotegroup_plan(PlannerInfo *root, Plan *local_plan) */ if (query->groupClause) { - SimpleSort *remote_sort = makeNode(SimpleSort); - char *sep = ""; + int cntCols; + char *sep; /* - * Reuse the arrays allocated in sort_plan to create SimpleSort - * structure. sort_plan is useless henceforth. + * recompute the column ids of the grouping columns, + * the group column indexes computed earlier point in the + * targetlists of the scan plans under this node. But now the grouping + * column indexes will be pointing in the targetlist of the new + * RemoteQuery, hence those need to be recomputed */ - remote_sort->numCols = group_plan->numCols; - remote_sort->sortColIdx = sort_plan->sortColIdx; - remote_sort->sortOperators = sort_plan->sortOperators; - remote_sort->nullsFirst = sort_plan->nullsFirst; - - pgxc_locate_grouping_columns(root, local_tlist, group_plan->grpColIdx); + pgxc_locate_grouping_columns(root, base_tlist, grpColIdx); appendStringInfoString(groupby_clause, "GROUP BY "); - appendStringInfoString(orderby_clause, "ORDER BY "); - for (cntCols = 0; cntCols < group_plan->numCols; cntCols++) + sep = ""; + for (cntCols = 0; cntCols < numGroupCols; cntCols++) { - appendStringInfo(groupby_clause, "%s%d", sep, - group_plan->grpColIdx[cntCols]); - remote_sort->sortColIdx[cntCols] = group_plan->grpColIdx[cntCols]; - appendStringInfo(orderby_clause, "%s%d", sep, - remote_sort->sortColIdx[cntCols]); + appendStringInfo(groupby_clause, "%s%d", sep, grpColIdx[cntCols]); sep = ", "; } - remote_group->sort = remote_sort; + if (sort_plan) + { + SimpleSort *remote_sort = makeNode(SimpleSort); + /* + * reuse the arrays allocated in sort_plan to create SimpleSort + * structure. sort_plan is useless henceforth. + */ + remote_sort->numCols = sort_plan->numCols; + remote_sort->sortColIdx = sort_plan->sortColIdx; + remote_sort->sortOperators = sort_plan->sortOperators; + remote_sort->nullsFirst = sort_plan->nullsFirst; + appendStringInfoString(orderby_clause, "ORDER BY "); + sep = ""; + for (cntCols = 0; cntCols < remote_sort->numCols; cntCols++) + { + remote_sort->sortColIdx[cntCols] = grpColIdx[cntCols]; + appendStringInfo(orderby_clause, "%s%d", sep, + remote_sort->sortColIdx[cntCols]); + sep = ", "; + } + remote_group->sort = remote_sort; + } } /* Generate the remote sql statement from the pieces */ @@ -5537,20 +5309,13 @@ create_remotegroup_plan(PlannerInfo *root, Plan *local_plan) orderby_clause->data); /* - * Set the base_tlist for the RemoteQuery node being created, it's used to - * create the tuple descriptor for the result from RemoteQuery and rewrite - * the Aggregates targetlist accept the results of the RemoteQuery. - */ - base_tlist = add_to_flat_tlist(NIL, get_tlist_exprs(local_tlist, true)); - - /* * Create a dummy RTE for the remote query being created. Append the dummy * range table entry to the range table. Note that this modifies the master * copy the caller passed us, otherwise e.g EXPLAIN VERBOSE will fail to - * find the rte the Vars built below refer to. + * find the rte the Vars built below refer to. Also create the tuple + * descriptor for the result of this query from the base_tlist (targetlist + * we used to generate the remote node query). */ - - /* Cook up the reltupdesc using this base_tlist */ dummy_rte = makeNode(RangeTblEntry); dummy_rte->reltupdesc = ExecTypeFromTL(base_tlist, false); dummy_rte->rtekind = RTE_RELATION; @@ -5577,6 +5342,7 @@ create_remotegroup_plan(PlannerInfo *root, Plan *local_plan) remote_group->base_tlist = base_tlist; remote_group->relname = "__FOREIGN_QUERY__"; remote_group->partitioned_replicated = remote_scan->partitioned_replicated; + remote_group->read_only = query->commandType == CMD_SELECT; /* * Only quals that can be pushed to the remote side are the ones in the having @@ -5587,21 +5353,24 @@ create_remotegroup_plan(PlannerInfo *root, Plan *local_plan) * node, may have local quals. In such case, we have to aggregate and group * at coordinator and can not push the grouping clause to the datanodes. Is * there a case in XC, where we can have local quals? + * we actually need not worry about costs since this is the final plan */ - - /* We actually do not need to worry about costs since this is the final plan */ remote_group_plan->startup_cost = remote_scan->scan.plan.startup_cost; remote_group_plan->total_cost = remote_scan->scan.plan.total_cost; remote_group_plan->plan_rows = remote_scan->scan.plan.plan_rows; remote_group_plan->plan_width = remote_scan->scan.plan.plan_width; /* - * Modify the passed in Group plan according to the remote query we built. - * Materialization is always need for RemoteQuery in case we need to restart + * Modify the passed in grouping plan according to the remote query we built + * Materialization is always needed for RemoteQuery in case we need to restart * the scan. */ - group_plan->plan.lefttree = (Plan *) make_material(remote_group_plan); - return (Plan *)group_plan; + local_plan->lefttree = (Plan *) make_material(remote_group_plan); + /* indicate that we should apply collection function directly */ + if (IsA(local_plan, Agg)) + ((Agg *)local_plan)->skip_trans = true; + + return local_plan; } /* @@ -5638,4 +5407,127 @@ pgxc_locate_grouping_columns(PlannerInfo *root, List *tlist, groupColIdx[keyno++] = te->resno; } } + +/* + * pgxc_process_grouping_targetlist + * The function scans the targetlist to check if the we can push anything + * from the targetlist to the datanode. Following rules govern the choice + * 1. Either all of the aggregates are pushed to the datanode or none is pushed + * 2. If there are no aggregates, the targetlist is good to be shipped as is + * 3. If aggregates are involved in expressions, we push the aggregates to the + * datanodes but not the involving expressions. + * + * The function constructs the targetlist for the query to be pushed to the + * datanode. It modifies the local targetlist to point to the expressions in + * remote targetlist wherever necessary (e.g. aggregates) + * + * PGXCTODO: we should be careful while pushing the function expressions, it's + * better to push functions like strlen() which can be evaluated at the + * datanode, but we should avoid pushing functions which can only be evaluated + * at coordinator. + */ +static List * +pgxc_process_grouping_targetlist(PlannerInfo *root, List **local_tlist) +{ + bool shippable_remote_tlist = true; + List *remote_tlist = NIL; + int next_resno = 1; /* resno start from 1 */ + List *orig_local_tlist = NIL;/* Copy original local_tlist, in case it changes */ + ListCell *temp; + Query *query = root->parse; + + /* + * Walk through the target list and find out whether we can push the + * aggregates and grouping to datanodes. We can do so if the target list + * contains plain aggregates (without any expression involving those) and + * expressions in group by clauses only (last one to make the query legit. + */ + foreach(temp, *local_tlist) + { + TargetEntry *local_tle = lfirst(temp); + TargetEntry *remote_tle; + Node *expr = (Node *)local_tle->expr; + + if (IsA(expr, Aggref)) + { + Aggref *aggref = (Aggref *)expr; + if (aggref->aggorder || aggref->aggdistinct || aggref->agglevelsup) + { + shippable_remote_tlist = false; + break; + } + } + else if (query->hasAggs && checkExprHasAggs(expr)) + { + /* + * Targetlist expressions which have aggregates embedded inside + * are not handled right now. + * PGXCTODO: We should be able to extract those aggregates out. + * Add those to remote targetlist and modify the local + * targetlist accordingly. Thus we get those aggregates grouped + * and "transitioned" at the datanode. + */ + shippable_remote_tlist = false; + break; + } + + remote_tle = makeTargetEntry(copyObject(expr), + next_resno++, + NULL, + false); + /* Copy GROUP BY/SORT BY reference for the locating group by columns */ + remote_tle->ressortgroupref = local_tle->ressortgroupref; + remote_tlist = lappend(remote_tlist, remote_tle); + + /* + * Replace the args of the local Aggref with Aggref node to be + * included in RemoteQuery node, so that set_plan_refs can convert + * the args into VAR pointing to the appropriate result in the tuple + * coming from RemoteQuery node + * PGXCTODO: should we push this change in targetlists of plans + * above? + */ + if (IsA(expr, Aggref)) + { + Aggref *local_aggref = (Aggref *)expr; + Aggref *remote_aggref = (Aggref *)remote_tle->expr; + Assert(IsA(remote_tle->expr, Aggref)); + remote_aggref->aggtype = remote_aggref->aggtrantype; + /* + * We are about to change the local_tlist, check if we have already + * copied original local_tlist, if not take a copy + */ + if (!orig_local_tlist) + orig_local_tlist = copyObject(*local_tlist); + /* Is copyObject() needed here? probably yes */ + local_aggref->args = list_make1(makeTargetEntry(copyObject(remote_tle->expr), + 1, NULL, + false)); + } + } + + if (!shippable_remote_tlist) + { + /* + * If local_tlist has changed but we didn't find anything shippable to + * datanode, we need to restore the local_tlist to original state, + */ + if (orig_local_tlist) + *local_tlist = orig_local_tlist; + if (remote_tlist) + list_free_deep(remote_tlist); + remote_tlist = NIL; + } + else if (orig_local_tlist) + { + /* + * If we have changed the targetlist passed, we need to pass back the + * changed targetlist. Free the copy that has been created. + */ + list_free_deep(orig_local_tlist); + } + + return remote_tlist; +} + #endif diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index e64e938648..5250905b59 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1334,16 +1334,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) numGroups, agg_counts.numAggs, result_plan); -#ifdef PGXC - /* - * Grouping will certainly not increase the number of rows - * coordinator fetches from datanode, in fact it's expected to - * reduce the number drastically. Hence, try pushing GROUP BY - * clauses and aggregates to the datanode, thus saving bandwidth. - */ - if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) - result_plan = create_remoteagg_plan(root, result_plan); -#endif /* PGXC */ /* Hashed aggregation produces randomly-ordered results */ current_pathkeys = NIL; } @@ -1415,16 +1405,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) extract_grouping_ops(parse->groupClause), dNumGroups, result_plan); -#ifdef PGXC - /* - * Grouping will certainly not increase the number of rows - * coordinator fetches from datanode, in fact it's expected to - * reduce the number drastically. Hence, try pushing GROUP BY - * clauses and aggregates to the datanode, thus saving bandwidth. - */ - if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) - result_plan = create_remotegroup_plan(root, result_plan); -#endif /* PGXC */ } else if (root->hasHavingQual) { @@ -1445,6 +1425,17 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) parse->havingQual, NULL); } +#ifdef PGXC + /* + * Grouping will certainly not increase the number of rows + * coordinator fetches from datanode, in fact it's expected to + * reduce the number drastically. Hence, try pushing GROUP BY + * clauses and aggregates to the datanode, thus saving bandwidth. + */ + if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) + result_plan = create_remotegrouping_plan(root, result_plan); +#endif /* PGXC */ + } /* end of non-minmax-aggregate case */ /* diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 8005d47c64..e1662eac8d 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -137,8 +137,7 @@ extern Var *search_tlist_for_var(Var *var, List *jtlist); extern Plan *create_remoteinsert_plan(PlannerInfo *root, Plan *topplan); extern Plan *create_remoteupdate_plan(PlannerInfo *root, Plan *topplan); extern Plan *create_remotedelete_plan(PlannerInfo *root, Plan *topplan); -extern Plan *create_remotegroup_plan(PlannerInfo *root, Plan *local_plan); -extern Plan *create_remoteagg_plan(PlannerInfo *root, Plan *agg_plan); +extern Plan *create_remotegrouping_plan(PlannerInfo *root, Plan *local_plan); #endif #endif /* PLANMAIN_H */ diff --git a/src/test/regress/expected/xc_groupby.out b/src/test/regress/expected/xc_groupby.out index e403e37d47..31e9a8cb2e 100644 --- a/src/test/regress/expected/xc_groupby.out +++ b/src/test/regress/expected/xc_groupby.out @@ -1,3 +1,10 @@ +-- this file contains tests for GROUP BY with combinations of following +-- 1. enable_hashagg = on/off (to force the grouping by sorting) +-- 2. distributed or replicated tables across the datanodes +-- If a testcase is added to any of the combinations, please check if it's +-- applicable in other combinations as well. +-- Combination 1: enable_hashagg on and distributed tables +set enable_hashagg to on; -- create required tables and fill them with data create table tab1 (val int, val2 int); create table tab2 (val int, val2 int); @@ -11,6 +18,17 @@ select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 g 3 | 11 | 3.6666666666666667 | 3.66666666666667 | 3 (3 rows) +explain verbose select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2; + QUERY PLAN +------------------------------------------------------------------------------------------------------------- + HashAggregate (cost=1.03..1.06 rows=1 width=8) + Output: count(*), sum(val), avg(val), ((sum(val))::double precision / (count(*))::double precision), val2 + -> Materialize (cost=0.00..1.01 rows=1 width=8) + Output: val, val2 + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8) + Output: val, val2 +(6 rows) + -- joins and group by select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2; count | sum | avg | ?column? | val2 | val2 @@ -34,14 +52,14 @@ explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val Sort Key: tab1.val2 -> Materialize (cost=0.00..1.01 rows=1 width=8) Output: tab1.val, tab1.val2 - -> Data Node Scan (Node Count [2]) "SELECT val, val2 FROM public.tab1 tab1" on tab1 (cost=0.00..1.01 rows=1000 width=8) + -> Data Node Scan (Node Count [2]) on tab1 (cost=0.00..1.01 rows=1000 width=8) Output: tab1.val, tab1.val2 -> Sort (cost=1.02..1.03 rows=1 width=8) Output: tab2.val, tab2.val2 Sort Key: tab2.val2 -> Materialize (cost=0.00..1.01 rows=1 width=8) Output: tab2.val, tab2.val2 - -> Data Node Scan (Node Count [2]) "SELECT val, val2 FROM public.tab2 tab2" on tab2 (cost=0.00..1.01 rows=1000 width=8) + -> Data Node Scan (Node Count [2]) on tab2 (cost=0.00..1.01 rows=1000 width=8) Output: tab2.val, tab2.val2 (19 rows) @@ -57,17 +75,16 @@ explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group QUERY PLAN ---------------------------------------------------------------------------------------- HashAggregate (cost=1.05..1.06 rows=1 width=12) - Output: sum((sum(tab1.val))), ((tab1.val2 % 2)) + Output: sum((pg_catalog.sum((sum(tab1.val))))), ((tab1.val2 % 2)) -> HashAggregate (cost=1.02..1.03 rows=1 width=8) - Output: sum(tab1.val), (tab1.val2 % 2), tab1.val2 - -> Materialize (cost=0.00..1.01 rows=1 width=8) - Output: tab1.val, tab1.val2 + Output: pg_catalog.sum((sum(tab1.val))), ((tab1.val2 % 2)), tab1.val2 + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(tab1.val)), ((tab1.val2 % 2)), tab1.val2 -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8) - Output: tab1.val, tab1.val2 + Output: sum(tab1.val), (tab1.val2 % 2), tab1.val2 (8 rows) -- group by without aggregate -set enable_hashagg to off; select val2 from tab1 group by val2; val2 ------ @@ -79,7 +96,7 @@ select val2 from tab1 group by val2; explain verbose select val2 from tab1 group by val2; QUERY PLAN ---------------------------------------------------------------------------------- - Group (cost=1.02..1.03 rows=1 width=4) + HashAggregate (cost=1.02..1.03 rows=1 width=4) Output: tab1.val2 -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: tab1.val2 @@ -90,18 +107,18 @@ explain verbose select val2 from tab1 group by val2; select val + val2 from tab1 group by val + val2; ?column? ---------- - 2 - 3 4 7 + 3 8 9 + 2 (6 rows) explain verbose select val + val2 from tab1 group by val + val2; QUERY PLAN ---------------------------------------------------------------------------------- - Group (cost=1.03..1.04 rows=1 width=8) + HashAggregate (cost=1.02..1.03 rows=1 width=8) Output: ((tab1.val + tab1.val2)) -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: ((tab1.val + tab1.val2)) @@ -112,12 +129,12 @@ explain verbose select val + val2 from tab1 group by val + val2; select val + val2, val, val2 from tab1 group by val, val2; ?column? | val | val2 ----------+-----+------ - 2 | 1 | 1 + 7 | 4 | 3 + 4 | 3 | 1 4 | 1 | 3 - 3 | 2 | 1 4 | 2 | 2 - 4 | 3 | 1 - 7 | 4 | 3 + 3 | 2 | 1 + 2 | 1 | 1 8 | 6 | 2 9 | 6 | 3 (8 rows) @@ -125,7 +142,7 @@ select val + val2, val, val2 from tab1 group by val, val2; explain verbose select val + val2, val, val2 from tab1 group by val, val2; QUERY PLAN ---------------------------------------------------------------------------------- - Group (cost=1.02..1.04 rows=1 width=8) + HashAggregate (cost=1.02..1.03 rows=1 width=8) Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2 -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2 @@ -136,18 +153,18 @@ explain verbose select val + val2, val, val2 from tab1 group by val, val2; select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2; ?column? | val | val2 ----------+-----+------ - 2 | 1 | 1 - 6 | 2 | 4 5 | 3 | 2 - 7 | 3 | 4 5 | 4 | 1 6 | 4 | 2 + 6 | 2 | 4 + 2 | 1 | 1 + 7 | 3 | 4 (6 rows) explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2; QUERY PLAN ------------------------------------------------------------------------------- - Group (cost=0.01..0.02 rows=1 width=0) + HashAggregate (cost=0.00..0.01 rows=1 width=0) Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2 -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2 @@ -158,16 +175,16 @@ explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2; ?column? ---------- - 2 - 5 6 + 2 7 + 5 (4 rows) explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2; QUERY PLAN ------------------------------------------------------------------------------- - Group (cost=0.01..0.02 rows=1 width=0) + HashAggregate (cost=0.00..0.01 rows=1 width=0) Output: ((tab1.val + tab2.val2)) -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: ((tab1.val + tab2.val2)) @@ -175,7 +192,6 @@ explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab Output: (tab1.val + tab2.val2) (6 rows) -reset enable_hashagg; -- group by with aggregates in expression select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; ?column? | val2 @@ -206,20 +222,366 @@ select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2; (3 rows) explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2; - QUERY PLAN ----------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------------- HashAggregate (cost=1.02..1.04 rows=1 width=8) - Output: sum(val), avg(val), ((2 * val2)) - -> Result (cost=0.00..1.02 rows=1 width=8) - Output: val, val2, (2 * val2) - -> Materialize (cost=0.00..1.01 rows=1 width=8) - Output: val, val2 - -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8) - Output: val, val2 -(8 rows) + Output: pg_catalog.sum((sum(tab1.val))), pg_catalog.avg((avg(tab1.val))), ((2 * tab1.val2)) + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(tab1.val)), (avg(tab1.val)), ((2 * tab1.val2)) + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8) + Output: sum(tab1.val), avg(tab1.val), (2 * tab1.val2) +(6 rows) drop table tab1; drop table tab2; +-- some tests involving nulls, characters, float type etc. +create table def(a int, b varchar(25)); +insert into def VALUES (NULL, NULL); +insert into def VALUES (1, NULL); +insert into def VALUES (NULL, 'One'); +insert into def VALUES (2, 'Two'); +insert into def VALUES (2, 'Two'); +insert into def VALUES (3, 'Three'); +insert into def VALUES (4, 'Three'); +insert into def VALUES (5, 'Three'); +insert into def VALUES (6, 'Two'); +insert into def VALUES (7, NULL); +insert into def VALUES (8, 'Two'); +insert into def VALUES (9, 'Three'); +insert into def VALUES (10, 'Three'); +select a,count(a) from def group by a order by a; + a | count +----+------- + 1 | 1 + 2 | 2 + 3 | 1 + 4 | 1 + 5 | 1 + 6 | 1 + 7 | 1 + 8 | 1 + 9 | 1 + 10 | 1 + | 0 +(11 rows) + +explain verbose select a,count(a) from def group by a order by a; + QUERY PLAN +---------------------------------------------------------------------------------------------- + GroupAggregate (cost=1.02..1.05 rows=1 width=4) + Output: a, count(a) + -> Sort (cost=1.02..1.03 rows=1 width=4) + Output: a + Sort Key: def.a + -> Result (cost=0.00..1.01 rows=1 width=4) + Output: a + -> Materialize (cost=0.00..1.01 rows=1 width=4) + Output: a, b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4) + Output: a, b +(11 rows) + +select avg(a) from def group by a; + avg +------------------------ + + 6.0000000000000000 + 8.0000000000000000 + 5.0000000000000000 + 2.0000000000000000 + 1.00000000000000000000 + 9.0000000000000000 + 3.0000000000000000 + 10.0000000000000000 + 7.0000000000000000 + 4.0000000000000000 +(11 rows) + +select avg(a) from def group by a; + avg +------------------------ + + 6.0000000000000000 + 8.0000000000000000 + 5.0000000000000000 + 1.00000000000000000000 + 9.0000000000000000 + 2.0000000000000000 + 10.0000000000000000 + 7.0000000000000000 + 3.0000000000000000 + 4.0000000000000000 +(11 rows) + +explain verbose select avg(a) from def group by a; + QUERY PLAN +---------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=4) + Output: pg_catalog.avg((avg(def.a))), def.a + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (avg(def.a)), def.a + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4) + Output: avg(def.a), def.a +(6 rows) + +select avg(a) from def group by b; + avg +-------------------- + 4.0000000000000000 + + 4.5000000000000000 + 6.2000000000000000 +(4 rows) + +explain verbose select avg(a) from def group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=33) + Output: pg_catalog.avg((avg(def.a))), def.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (avg(def.a)), def.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=33) + Output: avg(def.a), def.b +(6 rows) + +select sum(a) from def group by b; + sum +----- + 8 + + 18 + 31 +(4 rows) + +explain verbose select sum(a) from def group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=33) + Output: pg_catalog.sum((sum(def.a))), def.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(def.a)), def.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=33) + Output: sum(def.a), def.b +(6 rows) + +select count(*) from def group by b; + count +------- + 3 + 1 + 4 + 5 +(4 rows) + +explain verbose select count(*) from def group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=29) + Output: pg_catalog.count(*), def.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (count(*)), def.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29) + Output: count(*), def.b +(6 rows) + +select count(*) from def where a is not null group by a; + count +------- + 1 + 1 + 1 + 1 + 1 + 1 + 2 + 1 + 1 + 1 +(10 rows) + +explain verbose select count(*) from def where a is not null group by a; + QUERY PLAN +---------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=4) + Output: pg_catalog.count(*), def.a + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (count(*)), def.a + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4) + Output: count(*), def.a +(6 rows) + +select b from def group by b; + b +------- + + One + Two + Three +(4 rows) + +explain verbose select b from def group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=29) + Output: def.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: def.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29) + Output: def.b +(6 rows) + +select b,count(b) from def group by b; + b | count +-------+------- + | 0 + One | 1 + Two | 4 + Three | 5 +(4 rows) + +explain verbose select b,count(b) from def group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=29) + Output: def.b, count((count(def.b))) + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: def.b, (count(def.b)) + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29) + Output: def.b, count(def.b) +(6 rows) + +select count(*) from def where b is null group by b; + count +------- + 3 +(1 row) + +explain verbose select count(*) from def where b is null group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=29) + Output: pg_catalog.count(*), def.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (count(*)), def.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29) + Output: count(*), def.b +(6 rows) + +create table g(a int, b float, c numeric); +insert into g values(1,2.1,3.2); +insert into g values(1,2.1,3.2); +insert into g values(2,2.3,5.2); +select sum(a) from g group by a; + sum +----- + 2 + 2 +(2 rows) + +explain verbose select sum(a) from g group by a; + QUERY PLAN +---------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=4) + Output: pg_catalog.sum((sum(g.a))), g.a + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(g.a)), g.a + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4) + Output: sum(g.a), g.a +(6 rows) + +select sum(b) from g group by b; + sum +----- + 2.3 + 4.2 +(2 rows) + +explain verbose select sum(b) from g group by b; + QUERY PLAN +---------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=8) + Output: sum((sum(g.b))), g.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(g.b)), g.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8) + Output: sum(g.b), g.b +(6 rows) + +select sum(c) from g group by b; + sum +----- + 5.2 + 6.4 +(2 rows) + +explain verbose select sum(c) from g group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=40) + Output: sum((sum(g.c))), g.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(g.c)), g.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=40) + Output: sum(g.c), g.b +(6 rows) + +select avg(a) from g group by b; + avg +------------------------ + 2.0000000000000000 + 1.00000000000000000000 +(2 rows) + +explain verbose select avg(a) from g group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=12) + Output: pg_catalog.avg((avg(g.a))), g.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (avg(g.a)), g.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=12) + Output: avg(g.a), g.b +(6 rows) + +select avg(b) from g group by c; + avg +----- + 2.3 + 2.1 +(2 rows) + +explain verbose select avg(b) from g group by c; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=40) + Output: pg_catalog.avg((avg(g.b))), g.c + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (avg(g.b)), g.c + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=40) + Output: avg(g.b), g.c +(6 rows) + +select avg(c) from g group by c; + avg +-------------------- + 5.2000000000000000 + 3.2000000000000000 +(2 rows) + +explain verbose select avg(c) from g group by c; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=32) + Output: pg_catalog.avg((avg(g.c))), g.c + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (avg(g.c)), g.c + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=32) + Output: avg(g.c), g.c +(6 rows) + +drop table def; +drop table g; +-- Combination 2, enable_hashagg on and replicated tables. -- repeat the same tests for replicated tables -- create required tables and fill them with data create table tab1 (val int, val2 int) distribute by replication; @@ -268,14 +630,14 @@ explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val Sort Key: tab1.val2 -> Materialize (cost=0.00..1.01 rows=1 width=8) Output: tab1.val, tab1.val2 - -> Data Node Scan (Node Count [1]) "SELECT val, val2 FROM public.tab1 tab1" on tab1 (cost=0.00..1.01 rows=1000 width=8) + -> Data Node Scan (Node Count [1]) on tab1 (cost=0.00..1.01 rows=1000 width=8) Output: tab1.val, tab1.val2 -> Sort (cost=1.02..1.03 rows=1 width=8) Output: tab2.val, tab2.val2 Sort Key: tab2.val2 -> Materialize (cost=0.00..1.01 rows=1 width=8) Output: tab2.val, tab2.val2 - -> Data Node Scan (Node Count [1]) "SELECT val, val2 FROM public.tab2 tab2" on tab2 (cost=0.00..1.01 rows=1000 width=8) + -> Data Node Scan (Node Count [1]) on tab2 (cost=0.00..1.01 rows=1000 width=8) Output: tab2.val, tab2.val2 (19 rows) @@ -291,17 +653,16 @@ explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group QUERY PLAN ---------------------------------------------------------------------------------------- HashAggregate (cost=1.05..1.06 rows=1 width=12) - Output: sum((sum(tab1.val))), ((tab1.val2 % 2)) + Output: sum((pg_catalog.sum((sum(tab1.val))))), ((tab1.val2 % 2)) -> HashAggregate (cost=1.02..1.03 rows=1 width=8) - Output: sum(tab1.val), (tab1.val2 % 2), tab1.val2 - -> Materialize (cost=0.00..1.01 rows=1 width=8) - Output: tab1.val, tab1.val2 + Output: pg_catalog.sum((sum(tab1.val))), ((tab1.val2 % 2)), tab1.val2 + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(tab1.val)), ((tab1.val2 % 2)), tab1.val2 -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8) - Output: tab1.val, tab1.val2 + Output: sum(tab1.val), (tab1.val2 % 2), tab1.val2 (8 rows) -- group by without aggregate -set enable_hashagg to off; select val2 from tab1 group by val2; val2 ------ @@ -313,7 +674,7 @@ select val2 from tab1 group by val2; explain verbose select val2 from tab1 group by val2; QUERY PLAN ---------------------------------------------------------------------------------- - Group (cost=1.02..1.03 rows=1 width=4) + HashAggregate (cost=1.02..1.03 rows=1 width=4) Output: tab1.val2 -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: tab1.val2 @@ -324,18 +685,18 @@ explain verbose select val2 from tab1 group by val2; select val + val2 from tab1 group by val + val2; ?column? ---------- - 2 - 3 4 + 3 7 8 9 + 2 (6 rows) explain verbose select val + val2 from tab1 group by val + val2; QUERY PLAN ---------------------------------------------------------------------------------- - Group (cost=1.03..1.04 rows=1 width=8) + HashAggregate (cost=1.02..1.03 rows=1 width=8) Output: ((tab1.val + tab1.val2)) -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: ((tab1.val + tab1.val2)) @@ -346,12 +707,12 @@ explain verbose select val + val2 from tab1 group by val + val2; select val + val2, val, val2 from tab1 group by val, val2; ?column? | val | val2 ----------+-----+------ - 2 | 1 | 1 + 7 | 4 | 3 + 4 | 3 | 1 + 4 | 2 | 2 4 | 1 | 3 3 | 2 | 1 - 4 | 2 | 2 - 4 | 3 | 1 - 7 | 4 | 3 + 2 | 1 | 1 8 | 6 | 2 9 | 6 | 3 (8 rows) @@ -359,7 +720,7 @@ select val + val2, val, val2 from tab1 group by val, val2; explain verbose select val + val2, val, val2 from tab1 group by val, val2; QUERY PLAN ---------------------------------------------------------------------------------- - Group (cost=1.02..1.04 rows=1 width=8) + HashAggregate (cost=1.02..1.03 rows=1 width=8) Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2 -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2 @@ -370,18 +731,18 @@ explain verbose select val + val2, val, val2 from tab1 group by val, val2; select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2; ?column? | val | val2 ----------+-----+------ - 2 | 1 | 1 - 6 | 2 | 4 5 | 3 | 2 - 7 | 3 | 4 5 | 4 | 1 6 | 4 | 2 + 2 | 1 | 1 + 6 | 2 | 4 + 7 | 3 | 4 (6 rows) explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2; QUERY PLAN ------------------------------------------------------------------------------- - Group (cost=0.01..0.02 rows=1 width=0) + HashAggregate (cost=0.00..0.01 rows=1 width=0) Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2 -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2 @@ -392,16 +753,16 @@ explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2; ?column? ---------- - 2 - 5 6 + 2 7 + 5 (4 rows) explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2; QUERY PLAN ------------------------------------------------------------------------------- - Group (cost=0.01..0.02 rows=1 width=0) + HashAggregate (cost=0.00..0.01 rows=1 width=0) Output: ((tab1.val + tab2.val2)) -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: ((tab1.val + tab2.val2)) @@ -409,7 +770,6 @@ explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab Output: (tab1.val + tab2.val2) (6 rows) -reset enable_hashagg; -- group by with aggregates in expression select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; ?column? | val2 @@ -440,22 +800,20 @@ select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2; (3 rows) explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2; - QUERY PLAN ----------------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------------------------- HashAggregate (cost=1.02..1.04 rows=1 width=8) - Output: sum(val), avg(val), ((2 * val2)) - -> Result (cost=0.00..1.02 rows=1 width=8) - Output: val, val2, (2 * val2) - -> Materialize (cost=0.00..1.01 rows=1 width=8) - Output: val, val2 - -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8) - Output: val, val2 -(8 rows) + Output: pg_catalog.sum((sum(tab1.val))), pg_catalog.avg((avg(tab1.val))), ((2 * tab1.val2)) + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(tab1.val)), (avg(tab1.val)), ((2 * tab1.val2)) + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8) + Output: sum(tab1.val), avg(tab1.val), (2 * tab1.val2) +(6 rows) drop table tab1; drop table tab2; -- some tests involving nulls, characters, float type etc. -create table def(a int, b varchar(25)); +create table def(a int, b varchar(25)) distribute by replication; insert into def VALUES (NULL, NULL); insert into def VALUES (1, NULL); insert into def VALUES (NULL, 'One'); @@ -497,7 +855,7 @@ explain verbose select a,count(a) from def group by a order by a; Output: a -> Materialize (cost=0.00..1.01 rows=1 width=4) Output: a, b - -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4) + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=4) Output: a, b (11 rows) @@ -508,15 +866,26 @@ select avg(a) from def group by a; 6.0000000000000000 8.0000000000000000 5.0000000000000000 - 2.0000000000000000 1.00000000000000000000 9.0000000000000000 + 2.0000000000000000 3.0000000000000000 10.0000000000000000 7.0000000000000000 4.0000000000000000 (11 rows) +explain verbose select avg(a) from def group by a; + QUERY PLAN +---------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=4) + Output: pg_catalog.avg((avg(def.a))), def.a + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (avg(def.a)), def.a + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=4) + Output: avg(def.a), def.a +(6 rows) + select avg(a) from def group by a; avg ------------------------ @@ -527,9 +896,9 @@ select avg(a) from def group by a; 1.00000000000000000000 9.0000000000000000 2.0000000000000000 + 3.0000000000000000 10.0000000000000000 7.0000000000000000 - 3.0000000000000000 4.0000000000000000 (11 rows) @@ -540,7 +909,7 @@ explain verbose select avg(a) from def group by a; Output: pg_catalog.avg((avg(def.a))), def.a -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (avg(def.a)), def.a - -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4) + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=4) Output: avg(def.a), def.a (6 rows) @@ -560,7 +929,7 @@ explain verbose select avg(a) from def group by b; Output: pg_catalog.avg((avg(def.a))), def.b -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (avg(def.a)), def.b - -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=33) + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=33) Output: avg(def.a), def.b (6 rows) @@ -580,7 +949,7 @@ explain verbose select sum(a) from def group by b; Output: pg_catalog.sum((sum(def.a))), def.b -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (sum(def.a)), def.b - -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=33) + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=33) Output: sum(def.a), def.b (6 rows) @@ -600,7 +969,7 @@ explain verbose select count(*) from def group by b; Output: pg_catalog.count(*), def.b -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (count(*)), def.b - -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29) + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=29) Output: count(*), def.b (6 rows) @@ -626,7 +995,7 @@ explain verbose select count(*) from def where a is not null group by a; Output: pg_catalog.count(*), def.a -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (count(*)), def.a - -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4) + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=4) Output: count(*), def.a (6 rows) @@ -646,7 +1015,7 @@ explain verbose select b from def group by b; Output: def.b -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: def.b - -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29) + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=29) Output: def.b (6 rows) @@ -666,7 +1035,7 @@ explain verbose select b,count(b) from def group by b; Output: def.b, count((count(def.b))) -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: def.b, (count(def.b)) - -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29) + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=29) Output: def.b, count(def.b) (6 rows) @@ -683,11 +1052,11 @@ explain verbose select count(*) from def where b is null group by b; Output: pg_catalog.count(*), def.b -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (count(*)), def.b - -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29) + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=29) Output: count(*), def.b (6 rows) -create table g(a int, b float, c numeric); +create table g(a int, b float, c numeric) distribute by replication; insert into g values(1,2.1,3.2); insert into g values(1,2.1,3.2); insert into g values(2,2.3,5.2); @@ -705,7 +1074,7 @@ explain verbose select sum(a) from g group by a; Output: pg_catalog.sum((sum(g.a))), g.a -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (sum(g.a)), g.a - -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4) + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=4) Output: sum(g.a), g.a (6 rows) @@ -723,7 +1092,7 @@ explain verbose select sum(b) from g group by b; Output: sum((sum(g.b))), g.b -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (sum(g.b)), g.b - -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8) + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8) Output: sum(g.b), g.b (6 rows) @@ -741,7 +1110,7 @@ explain verbose select sum(c) from g group by b; Output: sum((sum(g.c))), g.b -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (sum(g.c)), g.b - -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=40) + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=40) Output: sum(g.c), g.b (6 rows) @@ -759,7 +1128,7 @@ explain verbose select avg(a) from g group by b; Output: pg_catalog.avg((avg(g.a))), g.b -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (avg(g.a)), g.b - -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=12) + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=12) Output: avg(g.a), g.b (6 rows) @@ -777,7 +1146,7 @@ explain verbose select avg(b) from g group by c; Output: pg_catalog.avg((avg(g.b))), g.c -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (avg(g.b)), g.c - -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=40) + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=40) Output: avg(g.b), g.c (6 rows) @@ -795,14 +1164,264 @@ explain verbose select avg(c) from g group by c; Output: pg_catalog.avg((avg(g.c))), g.c -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (avg(g.c)), g.c - -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=32) + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=32) Output: avg(g.c), g.c (6 rows) drop table def; drop table g; --- same test with replicated tables -create table def(a int, b varchar(25)) distribute by replication; +reset enable_hashagg; +-- Combination 3 enable_hashagg off and distributed tables +set enable_hashagg to off; +-- create required tables and fill them with data +create table tab1 (val int, val2 int); +create table tab2 (val int, val2 int); +insert into tab1 values (1, 1), (2, 1), (3, 1), (2, 2), (6, 2), (4, 3), (1, 3), (6, 3); +insert into tab2 values (1, 1), (4, 1), (8, 1), (2, 4), (9, 4), (3, 4), (4, 2), (5, 2), (3, 2); +select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2; + count | sum | avg | ?column? | val2 +-------+-----+--------------------+------------------+------ + 3 | 6 | 2.0000000000000000 | 2 | 1 + 2 | 8 | 4.0000000000000000 | 4 | 2 + 3 | 11 | 3.6666666666666667 | 3.66666666666667 | 3 +(3 rows) + +explain verbose select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2; + QUERY PLAN +------------------------------------------------------------------------------------------------------------- + GroupAggregate (cost=1.02..1.07 rows=1 width=8) + Output: count(*), sum(val), avg(val), ((sum(val))::double precision / (count(*))::double precision), val2 + -> Sort (cost=1.02..1.03 rows=1 width=8) + Output: val, val2 + Sort Key: tab1.val2 + -> Result (cost=0.00..1.01 rows=1 width=8) + Output: val, val2 + -> Materialize (cost=0.00..1.01 rows=1 width=8) + Output: val, val2 + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8) + Output: val, val2 +(11 rows) + +-- joins and group by +select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2; + count | sum | avg | ?column? | val2 | val2 +-------+-----+---------------------+------------------+------+------ + 9 | 78 | 8.6666666666666667 | 8.66666666666667 | 1 | 1 + 6 | 96 | 16.0000000000000000 | 16 | 2 | 2 + 3 | | | | 3 | + 3 | | | | | 4 +(4 rows) + +explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + GroupAggregate (cost=2.08..2.14 rows=1 width=16) + Output: count(*), sum((tab1.val * tab2.val)), avg((tab1.val * tab2.val)), ((sum((tab1.val * tab2.val)))::double precision / (count(*))::double precision), tab1.val2, tab2.val2 + -> Sort (cost=2.08..2.09 rows=1 width=16) + Output: tab1.val, tab2.val, tab1.val2, tab2.val2 + Sort Key: tab1.val2, tab2.val2 + -> Merge Full Join (cost=2.05..2.07 rows=1 width=16) + Output: tab1.val, tab2.val, tab1.val2, tab2.val2 + Merge Cond: (tab1.val2 = tab2.val2) + -> Sort (cost=1.02..1.03 rows=1 width=8) + Output: tab1.val, tab1.val2 + Sort Key: tab1.val2 + -> Materialize (cost=0.00..1.01 rows=1 width=8) + Output: tab1.val, tab1.val2 + -> Data Node Scan (Node Count [2]) on tab1 (cost=0.00..1.01 rows=1000 width=8) + Output: tab1.val, tab1.val2 + -> Sort (cost=1.02..1.03 rows=1 width=8) + Output: tab2.val, tab2.val2 + Sort Key: tab2.val2 + -> Materialize (cost=0.00..1.01 rows=1 width=8) + Output: tab2.val, tab2.val2 + -> Data Node Scan (Node Count [2]) on tab2 (cost=0.00..1.01 rows=1000 width=8) + Output: tab2.val, tab2.val2 +(22 rows) + +-- aggregates over aggregates +select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x; + sum +----- + 8 + 17 +(2 rows) + +explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x; + QUERY PLAN +---------------------------------------------------------------------------------------------------- + GroupAggregate (cost=1.07..1.09 rows=1 width=12) + Output: sum(q1.y), q1.x + -> Sort (cost=1.07..1.07 rows=1 width=12) + Output: q1.y, q1.x + Sort Key: q1.x + -> Subquery Scan on q1 (cost=1.02..1.06 rows=1 width=12) + Output: q1.y, q1.x + -> GroupAggregate (cost=1.02..1.05 rows=1 width=8) + Output: pg_catalog.sum((sum(tab1.val))), ((tab1.val2 % 2)), tab1.val2 + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(tab1.val)), ((tab1.val2 % 2)), tab1.val2 + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8) + Output: sum(tab1.val), (tab1.val2 % 2), tab1.val2 +(13 rows) + +-- group by without aggregate +select val2 from tab1 group by val2; + val2 +------ + 1 + 2 + 3 +(3 rows) + +explain verbose select val2 from tab1 group by val2; + QUERY PLAN +---------------------------------------------------------------------------------- + Group (cost=1.02..1.03 rows=1 width=4) + Output: tab1.val2 + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: tab1.val2 + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4) + Output: tab1.val2 +(6 rows) + +select val + val2 from tab1 group by val + val2; + ?column? +---------- + 2 + 3 + 4 + 7 + 8 + 9 +(6 rows) + +explain verbose select val + val2 from tab1 group by val + val2; + QUERY PLAN +---------------------------------------------------------------------------------- + Group (cost=1.03..1.04 rows=1 width=8) + Output: ((tab1.val + tab1.val2)) + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: ((tab1.val + tab1.val2)) + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8) + Output: (tab1.val + tab1.val2) +(6 rows) + +select val + val2, val, val2 from tab1 group by val, val2; + ?column? | val | val2 +----------+-----+------ + 2 | 1 | 1 + 4 | 1 | 3 + 3 | 2 | 1 + 4 | 2 | 2 + 4 | 3 | 1 + 7 | 4 | 3 + 8 | 6 | 2 + 9 | 6 | 3 +(8 rows) + +explain verbose select val + val2, val, val2 from tab1 group by val, val2; + QUERY PLAN +---------------------------------------------------------------------------------- + Group (cost=1.02..1.04 rows=1 width=8) + Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2 + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2 + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8) + Output: (tab1.val + tab1.val2), tab1.val, tab1.val2 +(6 rows) + +select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2; + ?column? | val | val2 +----------+-----+------ + 2 | 1 | 1 + 6 | 2 | 4 + 5 | 3 | 2 + 7 | 3 | 4 + 5 | 4 | 1 + 6 | 4 | 2 +(6 rows) + +explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2; + QUERY PLAN +------------------------------------------------------------------------------- + Group (cost=0.01..0.02 rows=1 width=0) + Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2 + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2 + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1 width=4) + Output: (tab1.val + tab2.val2), tab1.val, tab2.val2 +(6 rows) + +select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2; + ?column? +---------- + 2 + 5 + 6 + 7 +(4 rows) + +explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2; + QUERY PLAN +------------------------------------------------------------------------------- + Group (cost=0.01..0.02 rows=1 width=0) + Output: ((tab1.val + tab2.val2)) + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: ((tab1.val + tab2.val2)) + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1 width=4) + Output: (tab1.val + tab2.val2) +(6 rows) + +-- group by with aggregates in expression +select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; + ?column? | val2 +---------------------+------ + 11.0000000000000000 | 1 + 14.0000000000000000 | 2 + 17.6666666666666667 | 3 +(3 rows) + +explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; + QUERY PLAN +---------------------------------------------------------------------------------------------- + GroupAggregate (cost=1.02..1.06 rows=1 width=8) + Output: (((count(*) + sum(val)))::numeric + avg(val)), val2 + -> Sort (cost=1.02..1.03 rows=1 width=8) + Output: val, val2 + Sort Key: tab1.val2 + -> Result (cost=0.00..1.01 rows=1 width=8) + Output: val, val2 + -> Materialize (cost=0.00..1.01 rows=1 width=8) + Output: val, val2 + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8) + Output: val, val2 +(11 rows) + +-- group by with expressions in group by clause +select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2; + sum | avg | ?column? +-----+--------------------+---------- + 6 | 2.0000000000000000 | 2 + 8 | 4.0000000000000000 | 4 + 11 | 3.6666666666666667 | 6 +(3 rows) + +explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2; + QUERY PLAN +----------------------------------------------------------------------------------------------- + GroupAggregate (cost=1.03..1.06 rows=1 width=8) + Output: pg_catalog.sum((sum(tab1.val))), pg_catalog.avg((avg(tab1.val))), ((2 * tab1.val2)) + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(tab1.val)), (avg(tab1.val)), ((2 * tab1.val2)) + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8) + Output: sum(tab1.val), avg(tab1.val), (2 * tab1.val2) +(6 rows) + +drop table tab1; +drop table tab2; +-- some tests involving nulls, characters, float type etc. +create table def(a int, b varchar(25)); insert into def VALUES (NULL, NULL); insert into def VALUES (1, NULL); insert into def VALUES (NULL, 'One'); @@ -844,30 +1463,626 @@ explain verbose select a,count(a) from def group by a order by a; Output: a -> Materialize (cost=0.00..1.01 rows=1 width=4) Output: a, b - -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=4) + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4) Output: a, b (11 rows) select avg(a) from def group by a; avg ------------------------ - + 1.00000000000000000000 + 2.0000000000000000 + 3.0000000000000000 + 4.0000000000000000 + 5.0000000000000000 6.0000000000000000 + 7.0000000000000000 8.0000000000000000 - 5.0000000000000000 - 1.00000000000000000000 9.0000000000000000 + 10.0000000000000000 + +(11 rows) + +select avg(a) from def group by a; + avg +------------------------ + 1.00000000000000000000 2.0000000000000000 3.0000000000000000 - 10.0000000000000000 + 4.0000000000000000 + 5.0000000000000000 + 6.0000000000000000 7.0000000000000000 + 8.0000000000000000 + 9.0000000000000000 + 10.0000000000000000 + +(11 rows) + +explain verbose select avg(a) from def group by a; + QUERY PLAN +---------------------------------------------------------------------------------- + GroupAggregate (cost=1.02..1.05 rows=1 width=4) + Output: pg_catalog.avg((avg(def.a))), def.a + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (avg(def.a)), def.a + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4) + Output: avg(def.a), def.a +(6 rows) + +select avg(a) from def group by b; + avg +-------------------- + + 6.2000000000000000 + 4.5000000000000000 + 4.0000000000000000 +(4 rows) + +explain verbose select avg(a) from def group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + GroupAggregate (cost=1.02..1.05 rows=1 width=33) + Output: pg_catalog.avg((avg(def.a))), def.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (avg(def.a)), def.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=33) + Output: avg(def.a), def.b +(6 rows) + +select sum(a) from def group by b; + sum +----- + + 31 + 18 + 8 +(4 rows) + +explain verbose select sum(a) from def group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + GroupAggregate (cost=1.02..1.05 rows=1 width=33) + Output: pg_catalog.sum((sum(def.a))), def.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(def.a)), def.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=33) + Output: sum(def.a), def.b +(6 rows) + +select count(*) from def group by b; + count +------- + 1 + 5 + 4 + 3 +(4 rows) + +explain verbose select count(*) from def group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + GroupAggregate (cost=1.02..1.05 rows=1 width=29) + Output: pg_catalog.count(*), def.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (count(*)), def.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29) + Output: count(*), def.b +(6 rows) + +select count(*) from def where a is not null group by a; + count +------- + 1 + 2 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 +(10 rows) + +explain verbose select count(*) from def where a is not null group by a; + QUERY PLAN +---------------------------------------------------------------------------------- + GroupAggregate (cost=1.02..1.05 rows=1 width=4) + Output: pg_catalog.count(*), def.a + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (count(*)), def.a + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4) + Output: count(*), def.a +(6 rows) + +select b from def group by b; + b +------- + One + Three + Two + +(4 rows) + +explain verbose select b from def group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + Group (cost=1.02..1.03 rows=1 width=29) + Output: def.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: def.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29) + Output: def.b +(6 rows) + +select b,count(b) from def group by b; + b | count +-------+------- + One | 1 + Three | 5 + Two | 4 + | 0 +(4 rows) + +explain verbose select b,count(b) from def group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + GroupAggregate (cost=1.02..1.05 rows=1 width=29) + Output: def.b, count((count(def.b))) + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: def.b, (count(def.b)) + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29) + Output: def.b, count(def.b) +(6 rows) + +select count(*) from def where b is null group by b; + count +------- + 3 +(1 row) + +explain verbose select count(*) from def where b is null group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + GroupAggregate (cost=1.02..1.05 rows=1 width=29) + Output: pg_catalog.count(*), def.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (count(*)), def.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29) + Output: count(*), def.b +(6 rows) + +create table g(a int, b float, c numeric); +insert into g values(1,2.1,3.2); +insert into g values(1,2.1,3.2); +insert into g values(2,2.3,5.2); +select sum(a) from g group by a; + sum +----- + 2 + 2 +(2 rows) + +explain verbose select sum(a) from g group by a; + QUERY PLAN +---------------------------------------------------------------------------------- + GroupAggregate (cost=1.02..1.05 rows=1 width=4) + Output: pg_catalog.sum((sum(g.a))), g.a + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(g.a)), g.a + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4) + Output: sum(g.a), g.a +(6 rows) + +select sum(b) from g group by b; + sum +----- + 4.2 + 2.3 +(2 rows) + +explain verbose select sum(b) from g group by b; + QUERY PLAN +---------------------------------------------------------------------------------- + GroupAggregate (cost=1.02..1.05 rows=1 width=8) + Output: sum((sum(g.b))), g.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(g.b)), g.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8) + Output: sum(g.b), g.b +(6 rows) + +select sum(c) from g group by b; + sum +----- + 6.4 + 5.2 +(2 rows) + +explain verbose select sum(c) from g group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + GroupAggregate (cost=1.02..1.05 rows=1 width=40) + Output: sum((sum(g.c))), g.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(g.c)), g.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=40) + Output: sum(g.c), g.b +(6 rows) + +select avg(a) from g group by b; + avg +------------------------ + 1.00000000000000000000 + 2.0000000000000000 +(2 rows) + +explain verbose select avg(a) from g group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + GroupAggregate (cost=1.02..1.05 rows=1 width=12) + Output: pg_catalog.avg((avg(g.a))), g.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (avg(g.a)), g.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=12) + Output: avg(g.a), g.b +(6 rows) + +select avg(b) from g group by c; + avg +----- + 2.1 + 2.3 +(2 rows) + +explain verbose select avg(b) from g group by c; + QUERY PLAN +----------------------------------------------------------------------------------- + GroupAggregate (cost=1.02..1.05 rows=1 width=40) + Output: pg_catalog.avg((avg(g.b))), g.c + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (avg(g.b)), g.c + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=40) + Output: avg(g.b), g.c +(6 rows) + +select avg(c) from g group by c; + avg +-------------------- + 3.2000000000000000 + 5.2000000000000000 +(2 rows) + +explain verbose select avg(c) from g group by c; + QUERY PLAN +----------------------------------------------------------------------------------- + GroupAggregate (cost=1.02..1.05 rows=1 width=32) + Output: pg_catalog.avg((avg(g.c))), g.c + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (avg(g.c)), g.c + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=32) + Output: avg(g.c), g.c +(6 rows) + +drop table def; +drop table g; +-- Combination 4 enable_hashagg off and replicated tables. +-- repeat the same tests for replicated tables +-- create required tables and fill them with data +create table tab1 (val int, val2 int) distribute by replication; +create table tab2 (val int, val2 int) distribute by replication; +insert into tab1 values (1, 1), (2, 1), (3, 1), (2, 2), (6, 2), (4, 3), (1, 3), (6, 3); +insert into tab2 values (1, 1), (4, 1), (8, 1), (2, 4), (9, 4), (3, 4), (4, 2), (5, 2), (3, 2); +select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2; + count | sum | avg | ?column? | val2 +-------+-----+--------------------+------------------+------ + 3 | 6 | 2.0000000000000000 | 2 | 1 + 2 | 8 | 4.0000000000000000 | 4 | 2 + 3 | 11 | 3.6666666666666667 | 3.66666666666667 | 3 +(3 rows) + +explain verbose select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2; + QUERY PLAN +------------------------------------------------------------------------------------------------------------- + GroupAggregate (cost=1.02..1.07 rows=1 width=8) + Output: count(*), sum(val), avg(val), ((sum(val))::double precision / (count(*))::double precision), val2 + -> Sort (cost=1.02..1.03 rows=1 width=8) + Output: val, val2 + Sort Key: tab1.val2 + -> Result (cost=0.00..1.01 rows=1 width=8) + Output: val, val2 + -> Materialize (cost=0.00..1.01 rows=1 width=8) + Output: val, val2 + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8) + Output: val, val2 +(11 rows) + +-- joins and group by +select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2; + count | sum | avg | ?column? | val2 | val2 +-------+-----+---------------------+------------------+------+------ + 9 | 78 | 8.6666666666666667 | 8.66666666666667 | 1 | 1 + 6 | 96 | 16.0000000000000000 | 16 | 2 | 2 + 3 | | | | 3 | + 3 | | | | | 4 +(4 rows) + +explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + GroupAggregate (cost=2.08..2.14 rows=1 width=16) + Output: count(*), sum((tab1.val * tab2.val)), avg((tab1.val * tab2.val)), ((sum((tab1.val * tab2.val)))::double precision / (count(*))::double precision), tab1.val2, tab2.val2 + -> Sort (cost=2.08..2.09 rows=1 width=16) + Output: tab1.val, tab2.val, tab1.val2, tab2.val2 + Sort Key: tab1.val2, tab2.val2 + -> Merge Full Join (cost=2.05..2.07 rows=1 width=16) + Output: tab1.val, tab2.val, tab1.val2, tab2.val2 + Merge Cond: (tab1.val2 = tab2.val2) + -> Sort (cost=1.02..1.03 rows=1 width=8) + Output: tab1.val, tab1.val2 + Sort Key: tab1.val2 + -> Materialize (cost=0.00..1.01 rows=1 width=8) + Output: tab1.val, tab1.val2 + -> Data Node Scan (Node Count [1]) on tab1 (cost=0.00..1.01 rows=1000 width=8) + Output: tab1.val, tab1.val2 + -> Sort (cost=1.02..1.03 rows=1 width=8) + Output: tab2.val, tab2.val2 + Sort Key: tab2.val2 + -> Materialize (cost=0.00..1.01 rows=1 width=8) + Output: tab2.val, tab2.val2 + -> Data Node Scan (Node Count [1]) on tab2 (cost=0.00..1.01 rows=1000 width=8) + Output: tab2.val, tab2.val2 +(22 rows) + +-- aggregates over aggregates +select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x; + sum +----- + 8 + 17 +(2 rows) + +explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x; + QUERY PLAN +---------------------------------------------------------------------------------------------------- + GroupAggregate (cost=1.07..1.09 rows=1 width=12) + Output: sum(q1.y), q1.x + -> Sort (cost=1.07..1.07 rows=1 width=12) + Output: q1.y, q1.x + Sort Key: q1.x + -> Subquery Scan on q1 (cost=1.02..1.06 rows=1 width=12) + Output: q1.y, q1.x + -> GroupAggregate (cost=1.02..1.05 rows=1 width=8) + Output: pg_catalog.sum((sum(tab1.val))), ((tab1.val2 % 2)), tab1.val2 + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(tab1.val)), ((tab1.val2 % 2)), tab1.val2 + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8) + Output: sum(tab1.val), (tab1.val2 % 2), tab1.val2 +(13 rows) + +-- group by without aggregate +select val2 from tab1 group by val2; + val2 +------ + 1 + 2 + 3 +(3 rows) + +explain verbose select val2 from tab1 group by val2; + QUERY PLAN +---------------------------------------------------------------------------------- + Group (cost=1.02..1.03 rows=1 width=4) + Output: tab1.val2 + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: tab1.val2 + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=4) + Output: tab1.val2 +(6 rows) + +select val + val2 from tab1 group by val + val2; + ?column? +---------- + 2 + 3 + 4 + 7 + 8 + 9 +(6 rows) + +explain verbose select val + val2 from tab1 group by val + val2; + QUERY PLAN +---------------------------------------------------------------------------------- + Group (cost=1.03..1.04 rows=1 width=8) + Output: ((tab1.val + tab1.val2)) + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: ((tab1.val + tab1.val2)) + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8) + Output: (tab1.val + tab1.val2) +(6 rows) + +select val + val2, val, val2 from tab1 group by val, val2; + ?column? | val | val2 +----------+-----+------ + 2 | 1 | 1 + 4 | 1 | 3 + 3 | 2 | 1 + 4 | 2 | 2 + 4 | 3 | 1 + 7 | 4 | 3 + 8 | 6 | 2 + 9 | 6 | 3 +(8 rows) + +explain verbose select val + val2, val, val2 from tab1 group by val, val2; + QUERY PLAN +---------------------------------------------------------------------------------- + Group (cost=1.02..1.04 rows=1 width=8) + Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2 + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: ((tab1.val + tab1.val2)), tab1.val, tab1.val2 + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8) + Output: (tab1.val + tab1.val2), tab1.val, tab1.val2 +(6 rows) + +select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2; + ?column? | val | val2 +----------+-----+------ + 2 | 1 | 1 + 6 | 2 | 4 + 5 | 3 | 2 + 7 | 3 | 4 + 5 | 4 | 1 + 6 | 4 | 2 +(6 rows) + +explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2; + QUERY PLAN +------------------------------------------------------------------------------- + Group (cost=0.01..0.02 rows=1 width=0) + Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2 + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: ((tab1.val + tab2.val2)), tab1.val, tab2.val2 + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1 width=4) + Output: (tab1.val + tab2.val2), tab1.val, tab2.val2 +(6 rows) + +select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2; + ?column? +---------- + 2 + 5 + 6 + 7 +(4 rows) + +explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2; + QUERY PLAN +------------------------------------------------------------------------------- + Group (cost=0.01..0.02 rows=1 width=0) + Output: ((tab1.val + tab2.val2)) + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: ((tab1.val + tab2.val2)) + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1 width=4) + Output: (tab1.val + tab2.val2) +(6 rows) + +-- group by with aggregates in expression +select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; + ?column? | val2 +---------------------+------ + 11.0000000000000000 | 1 + 14.0000000000000000 | 2 + 17.6666666666666667 | 3 +(3 rows) + +explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; + QUERY PLAN +---------------------------------------------------------------------------------------------- + GroupAggregate (cost=1.02..1.06 rows=1 width=8) + Output: (((count(*) + sum(val)))::numeric + avg(val)), val2 + -> Sort (cost=1.02..1.03 rows=1 width=8) + Output: val, val2 + Sort Key: tab1.val2 + -> Result (cost=0.00..1.01 rows=1 width=8) + Output: val, val2 + -> Materialize (cost=0.00..1.01 rows=1 width=8) + Output: val, val2 + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8) + Output: val, val2 +(11 rows) + +-- group by with expressions in group by clause +select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2; + sum | avg | ?column? +-----+--------------------+---------- + 6 | 2.0000000000000000 | 2 + 8 | 4.0000000000000000 | 4 + 11 | 3.6666666666666667 | 6 +(3 rows) + +explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2; + QUERY PLAN +----------------------------------------------------------------------------------------------- + GroupAggregate (cost=1.03..1.06 rows=1 width=8) + Output: pg_catalog.sum((sum(tab1.val))), pg_catalog.avg((avg(tab1.val))), ((2 * tab1.val2)) + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(tab1.val)), (avg(tab1.val)), ((2 * tab1.val2)) + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8) + Output: sum(tab1.val), avg(tab1.val), (2 * tab1.val2) +(6 rows) + +drop table tab1; +drop table tab2; +-- some tests involving nulls, characters, float type etc. +create table def(a int, b varchar(25)) distribute by replication; +insert into def VALUES (NULL, NULL); +insert into def VALUES (1, NULL); +insert into def VALUES (NULL, 'One'); +insert into def VALUES (2, 'Two'); +insert into def VALUES (2, 'Two'); +insert into def VALUES (3, 'Three'); +insert into def VALUES (4, 'Three'); +insert into def VALUES (5, 'Three'); +insert into def VALUES (6, 'Two'); +insert into def VALUES (7, NULL); +insert into def VALUES (8, 'Two'); +insert into def VALUES (9, 'Three'); +insert into def VALUES (10, 'Three'); +select a,count(a) from def group by a order by a; + a | count +----+------- + 1 | 1 + 2 | 2 + 3 | 1 + 4 | 1 + 5 | 1 + 6 | 1 + 7 | 1 + 8 | 1 + 9 | 1 + 10 | 1 + | 0 +(11 rows) + +explain verbose select a,count(a) from def group by a order by a; + QUERY PLAN +---------------------------------------------------------------------------------------------- + GroupAggregate (cost=1.02..1.05 rows=1 width=4) + Output: a, count(a) + -> Sort (cost=1.02..1.03 rows=1 width=4) + Output: a + Sort Key: def.a + -> Result (cost=0.00..1.01 rows=1 width=4) + Output: a + -> Materialize (cost=0.00..1.01 rows=1 width=4) + Output: a, b + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=4) + Output: a, b +(11 rows) + +select avg(a) from def group by a; + avg +------------------------ + 1.00000000000000000000 + 2.0000000000000000 + 3.0000000000000000 4.0000000000000000 + 5.0000000000000000 + 6.0000000000000000 + 7.0000000000000000 + 8.0000000000000000 + 9.0000000000000000 + 10.0000000000000000 + (11 rows) explain verbose select avg(a) from def group by a; QUERY PLAN ---------------------------------------------------------------------------------- - HashAggregate (cost=1.02..1.03 rows=1 width=4) + GroupAggregate (cost=1.02..1.05 rows=1 width=4) Output: pg_catalog.avg((avg(def.a))), def.a -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (avg(def.a)), def.a @@ -878,23 +2093,23 @@ explain verbose select avg(a) from def group by a; select avg(a) from def group by a; avg ------------------------ - - 6.0000000000000000 - 8.0000000000000000 - 5.0000000000000000 1.00000000000000000000 - 9.0000000000000000 2.0000000000000000 3.0000000000000000 - 10.0000000000000000 - 7.0000000000000000 4.0000000000000000 + 5.0000000000000000 + 6.0000000000000000 + 7.0000000000000000 + 8.0000000000000000 + 9.0000000000000000 + 10.0000000000000000 + (11 rows) explain verbose select avg(a) from def group by a; QUERY PLAN ---------------------------------------------------------------------------------- - HashAggregate (cost=1.02..1.03 rows=1 width=4) + GroupAggregate (cost=1.02..1.05 rows=1 width=4) Output: pg_catalog.avg((avg(def.a))), def.a -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (avg(def.a)), def.a @@ -905,16 +2120,16 @@ explain verbose select avg(a) from def group by a; select avg(a) from def group by b; avg -------------------- - 4.0000000000000000 - 4.5000000000000000 6.2000000000000000 + 4.5000000000000000 + 4.0000000000000000 (4 rows) explain verbose select avg(a) from def group by b; QUERY PLAN ----------------------------------------------------------------------------------- - HashAggregate (cost=1.02..1.03 rows=1 width=33) + GroupAggregate (cost=1.02..1.05 rows=1 width=33) Output: pg_catalog.avg((avg(def.a))), def.b -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (avg(def.a)), def.b @@ -925,16 +2140,16 @@ explain verbose select avg(a) from def group by b; select sum(a) from def group by b; sum ----- - 8 - 18 31 + 18 + 8 (4 rows) explain verbose select sum(a) from def group by b; QUERY PLAN ----------------------------------------------------------------------------------- - HashAggregate (cost=1.02..1.03 rows=1 width=33) + GroupAggregate (cost=1.02..1.05 rows=1 width=33) Output: pg_catalog.sum((sum(def.a))), def.b -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (sum(def.a)), def.b @@ -945,16 +2160,16 @@ explain verbose select sum(a) from def group by b; select count(*) from def group by b; count ------- - 3 1 - 4 5 + 4 + 3 (4 rows) explain verbose select count(*) from def group by b; QUERY PLAN ----------------------------------------------------------------------------------- - HashAggregate (cost=1.02..1.03 rows=1 width=29) + GroupAggregate (cost=1.02..1.05 rows=1 width=29) Output: pg_catalog.count(*), def.b -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (count(*)), def.b @@ -966,12 +2181,12 @@ select count(*) from def where a is not null group by a; count ------- 1 + 2 1 1 1 1 1 - 2 1 1 1 @@ -980,7 +2195,7 @@ select count(*) from def where a is not null group by a; explain verbose select count(*) from def where a is not null group by a; QUERY PLAN ---------------------------------------------------------------------------------- - HashAggregate (cost=1.02..1.03 rows=1 width=4) + GroupAggregate (cost=1.02..1.05 rows=1 width=4) Output: pg_catalog.count(*), def.a -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (count(*)), def.a @@ -991,16 +2206,16 @@ explain verbose select count(*) from def where a is not null group by a; select b from def group by b; b ------- - One - Two Three + Two + (4 rows) explain verbose select b from def group by b; QUERY PLAN ----------------------------------------------------------------------------------- - HashAggregate (cost=1.02..1.03 rows=1 width=29) + Group (cost=1.02..1.03 rows=1 width=29) Output: def.b -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: def.b @@ -1011,16 +2226,16 @@ explain verbose select b from def group by b; select b,count(b) from def group by b; b | count -------+------- - | 0 One | 1 - Two | 4 Three | 5 + Two | 4 + | 0 (4 rows) explain verbose select b,count(b) from def group by b; QUERY PLAN ----------------------------------------------------------------------------------- - HashAggregate (cost=1.02..1.03 rows=1 width=29) + GroupAggregate (cost=1.02..1.05 rows=1 width=29) Output: def.b, count((count(def.b))) -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: def.b, (count(def.b)) @@ -1037,7 +2252,7 @@ select count(*) from def where b is null group by b; explain verbose select count(*) from def where b is null group by b; QUERY PLAN ----------------------------------------------------------------------------------- - HashAggregate (cost=1.02..1.03 rows=1 width=29) + GroupAggregate (cost=1.02..1.05 rows=1 width=29) Output: pg_catalog.count(*), def.b -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (count(*)), def.b @@ -1059,7 +2274,7 @@ select sum(a) from g group by a; explain verbose select sum(a) from g group by a; QUERY PLAN ---------------------------------------------------------------------------------- - HashAggregate (cost=1.02..1.03 rows=1 width=4) + GroupAggregate (cost=1.02..1.05 rows=1 width=4) Output: pg_catalog.sum((sum(g.a))), g.a -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (sum(g.a)), g.a @@ -1070,14 +2285,14 @@ explain verbose select sum(a) from g group by a; select sum(b) from g group by b; sum ----- - 2.3 4.2 + 2.3 (2 rows) explain verbose select sum(b) from g group by b; QUERY PLAN ---------------------------------------------------------------------------------- - HashAggregate (cost=1.02..1.03 rows=1 width=8) + GroupAggregate (cost=1.02..1.05 rows=1 width=8) Output: sum((sum(g.b))), g.b -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (sum(g.b)), g.b @@ -1088,14 +2303,14 @@ explain verbose select sum(b) from g group by b; select sum(c) from g group by b; sum ----- - 5.2 6.4 + 5.2 (2 rows) explain verbose select sum(c) from g group by b; QUERY PLAN ----------------------------------------------------------------------------------- - HashAggregate (cost=1.02..1.03 rows=1 width=40) + GroupAggregate (cost=1.02..1.05 rows=1 width=40) Output: sum((sum(g.c))), g.b -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (sum(g.c)), g.b @@ -1106,14 +2321,14 @@ explain verbose select sum(c) from g group by b; select avg(a) from g group by b; avg ------------------------ - 2.0000000000000000 1.00000000000000000000 + 2.0000000000000000 (2 rows) explain verbose select avg(a) from g group by b; QUERY PLAN ----------------------------------------------------------------------------------- - HashAggregate (cost=1.02..1.03 rows=1 width=12) + GroupAggregate (cost=1.02..1.05 rows=1 width=12) Output: pg_catalog.avg((avg(g.a))), g.b -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (avg(g.a)), g.b @@ -1124,14 +2339,14 @@ explain verbose select avg(a) from g group by b; select avg(b) from g group by c; avg ----- - 2.3 2.1 + 2.3 (2 rows) explain verbose select avg(b) from g group by c; QUERY PLAN ----------------------------------------------------------------------------------- - HashAggregate (cost=1.02..1.03 rows=1 width=40) + GroupAggregate (cost=1.02..1.05 rows=1 width=40) Output: pg_catalog.avg((avg(g.b))), g.c -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (avg(g.b)), g.c @@ -1142,14 +2357,14 @@ explain verbose select avg(b) from g group by c; select avg(c) from g group by c; avg -------------------- - 5.2000000000000000 3.2000000000000000 + 5.2000000000000000 (2 rows) explain verbose select avg(c) from g group by c; QUERY PLAN ----------------------------------------------------------------------------------- - HashAggregate (cost=1.02..1.03 rows=1 width=32) + GroupAggregate (cost=1.02..1.05 rows=1 width=32) Output: pg_catalog.avg((avg(g.c))), g.c -> Materialize (cost=0.00..0.00 rows=0 width=0) Output: (avg(g.c)), g.c @@ -1159,3 +2374,4 @@ explain verbose select avg(c) from g group by c; drop table def; drop table g; +reset enable_hashagg; diff --git a/src/test/regress/sql/xc_groupby.sql b/src/test/regress/sql/xc_groupby.sql index 1fd0f50e11..f262d7281e 100644 --- a/src/test/regress/sql/xc_groupby.sql +++ b/src/test/regress/sql/xc_groupby.sql @@ -1,9 +1,18 @@ +-- this file contains tests for GROUP BY with combinations of following +-- 1. enable_hashagg = on/off (to force the grouping by sorting) +-- 2. distributed or replicated tables across the datanodes +-- If a testcase is added to any of the combinations, please check if it's +-- applicable in other combinations as well. + +-- Combination 1: enable_hashagg on and distributed tables +set enable_hashagg to on; -- create required tables and fill them with data create table tab1 (val int, val2 int); create table tab2 (val int, val2 int); insert into tab1 values (1, 1), (2, 1), (3, 1), (2, 2), (6, 2), (4, 3), (1, 3), (6, 3); insert into tab2 values (1, 1), (4, 1), (8, 1), (2, 4), (9, 4), (3, 4), (4, 2), (5, 2), (3, 2); select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2; +explain verbose select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2; -- joins and group by select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2; explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2; @@ -11,7 +20,6 @@ explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x; explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x; -- group by without aggregate -set enable_hashagg to off; select val2 from tab1 group by val2; explain verbose select val2 from tab1 group by val2; select val + val2 from tab1 group by val + val2; @@ -22,7 +30,6 @@ select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2; select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2; explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2; -reset enable_hashagg; -- group by with aggregates in expression select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; @@ -32,6 +39,66 @@ explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2; drop table tab1; drop table tab2; +-- some tests involving nulls, characters, float type etc. +create table def(a int, b varchar(25)); +insert into def VALUES (NULL, NULL); +insert into def VALUES (1, NULL); +insert into def VALUES (NULL, 'One'); +insert into def VALUES (2, 'Two'); +insert into def VALUES (2, 'Two'); +insert into def VALUES (3, 'Three'); +insert into def VALUES (4, 'Three'); +insert into def VALUES (5, 'Three'); +insert into def VALUES (6, 'Two'); +insert into def VALUES (7, NULL); +insert into def VALUES (8, 'Two'); +insert into def VALUES (9, 'Three'); +insert into def VALUES (10, 'Three'); + +select a,count(a) from def group by a order by a; +explain verbose select a,count(a) from def group by a order by a; +select avg(a) from def group by a; +select avg(a) from def group by a; +explain verbose select avg(a) from def group by a; +select avg(a) from def group by b; +explain verbose select avg(a) from def group by b; +select sum(a) from def group by b; +explain verbose select sum(a) from def group by b; +select count(*) from def group by b; +explain verbose select count(*) from def group by b; +select count(*) from def where a is not null group by a; +explain verbose select count(*) from def where a is not null group by a; + +select b from def group by b; +explain verbose select b from def group by b; +select b,count(b) from def group by b; +explain verbose select b,count(b) from def group by b; +select count(*) from def where b is null group by b; +explain verbose select count(*) from def where b is null group by b; + +create table g(a int, b float, c numeric); +insert into g values(1,2.1,3.2); +insert into g values(1,2.1,3.2); +insert into g values(2,2.3,5.2); + +select sum(a) from g group by a; +explain verbose select sum(a) from g group by a; +select sum(b) from g group by b; +explain verbose select sum(b) from g group by b; +select sum(c) from g group by b; +explain verbose select sum(c) from g group by b; + +select avg(a) from g group by b; +explain verbose select avg(a) from g group by b; +select avg(b) from g group by c; +explain verbose select avg(b) from g group by c; +select avg(c) from g group by c; +explain verbose select avg(c) from g group by c; + +drop table def; +drop table g; + +-- Combination 2, enable_hashagg on and replicated tables. -- repeat the same tests for replicated tables -- create required tables and fill them with data create table tab1 (val int, val2 int) distribute by replication; @@ -47,7 +114,6 @@ explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x; explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x; -- group by without aggregate -set enable_hashagg to off; select val2 from tab1 group by val2; explain verbose select val2 from tab1 group by val2; select val + val2 from tab1 group by val + val2; @@ -58,7 +124,102 @@ select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2; select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2; explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2; +-- group by with aggregates in expression +select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; +explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; +-- group by with expressions in group by clause +select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2; +explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2; +drop table tab1; +drop table tab2; + +-- some tests involving nulls, characters, float type etc. +create table def(a int, b varchar(25)) distribute by replication; +insert into def VALUES (NULL, NULL); +insert into def VALUES (1, NULL); +insert into def VALUES (NULL, 'One'); +insert into def VALUES (2, 'Two'); +insert into def VALUES (2, 'Two'); +insert into def VALUES (3, 'Three'); +insert into def VALUES (4, 'Three'); +insert into def VALUES (5, 'Three'); +insert into def VALUES (6, 'Two'); +insert into def VALUES (7, NULL); +insert into def VALUES (8, 'Two'); +insert into def VALUES (9, 'Three'); +insert into def VALUES (10, 'Three'); + +select a,count(a) from def group by a order by a; +explain verbose select a,count(a) from def group by a order by a; +select avg(a) from def group by a; +explain verbose select avg(a) from def group by a; +select avg(a) from def group by a; +explain verbose select avg(a) from def group by a; +select avg(a) from def group by b; +explain verbose select avg(a) from def group by b; +select sum(a) from def group by b; +explain verbose select sum(a) from def group by b; +select count(*) from def group by b; +explain verbose select count(*) from def group by b; +select count(*) from def where a is not null group by a; +explain verbose select count(*) from def where a is not null group by a; + +select b from def group by b; +explain verbose select b from def group by b; +select b,count(b) from def group by b; +explain verbose select b,count(b) from def group by b; +select count(*) from def where b is null group by b; +explain verbose select count(*) from def where b is null group by b; + +create table g(a int, b float, c numeric) distribute by replication; +insert into g values(1,2.1,3.2); +insert into g values(1,2.1,3.2); +insert into g values(2,2.3,5.2); + +select sum(a) from g group by a; +explain verbose select sum(a) from g group by a; +select sum(b) from g group by b; +explain verbose select sum(b) from g group by b; +select sum(c) from g group by b; +explain verbose select sum(c) from g group by b; + +select avg(a) from g group by b; +explain verbose select avg(a) from g group by b; +select avg(b) from g group by c; +explain verbose select avg(b) from g group by c; +select avg(c) from g group by c; +explain verbose select avg(c) from g group by c; + +drop table def; +drop table g; reset enable_hashagg; + +-- Combination 3 enable_hashagg off and distributed tables +set enable_hashagg to off; +-- create required tables and fill them with data +create table tab1 (val int, val2 int); +create table tab2 (val int, val2 int); +insert into tab1 values (1, 1), (2, 1), (3, 1), (2, 2), (6, 2), (4, 3), (1, 3), (6, 3); +insert into tab2 values (1, 1), (4, 1), (8, 1), (2, 4), (9, 4), (3, 4), (4, 2), (5, 2), (3, 2); +select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2; +explain verbose select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2; +-- joins and group by +select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2; +explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2; +-- aggregates over aggregates +select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x; +explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x; +-- group by without aggregate +select val2 from tab1 group by val2; +explain verbose select val2 from tab1 group by val2; +select val + val2 from tab1 group by val + val2; +explain verbose select val + val2 from tab1 group by val + val2; +select val + val2, val, val2 from tab1 group by val, val2; +explain verbose select val + val2, val, val2 from tab1 group by val, val2; +select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2; +explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2; +select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2; +explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2; -- group by with aggregates in expression select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; @@ -127,7 +288,42 @@ explain verbose select avg(c) from g group by c; drop table def; drop table g; --- same test with replicated tables +-- Combination 4 enable_hashagg off and replicated tables. +-- repeat the same tests for replicated tables +-- create required tables and fill them with data +create table tab1 (val int, val2 int) distribute by replication; +create table tab2 (val int, val2 int) distribute by replication; +insert into tab1 values (1, 1), (2, 1), (3, 1), (2, 2), (6, 2), (4, 3), (1, 3), (6, 3); +insert into tab2 values (1, 1), (4, 1), (8, 1), (2, 4), (9, 4), (3, 4), (4, 2), (5, 2), (3, 2); +select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2; +explain verbose select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2; +-- joins and group by +select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2; +explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2; +-- aggregates over aggregates +select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x; +explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x; +-- group by without aggregate +select val2 from tab1 group by val2; +explain verbose select val2 from tab1 group by val2; +select val + val2 from tab1 group by val + val2; +explain verbose select val + val2 from tab1 group by val + val2; +select val + val2, val, val2 from tab1 group by val, val2; +explain verbose select val + val2, val, val2 from tab1 group by val, val2; +select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2; +explain verbose select tab1.val + tab2.val2, tab1.val, tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val, tab2.val2; +select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2; +explain verbose select tab1.val + tab2.val2 from tab1, tab2 where tab1.val = tab2.val group by tab1.val + tab2.val2; +-- group by with aggregates in expression +select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; +explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; +-- group by with expressions in group by clause +select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2; +explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2; +drop table tab1; +drop table tab2; + +-- some tests involving nulls, characters, float type etc. create table def(a int, b varchar(25)) distribute by replication; insert into def VALUES (NULL, NULL); insert into def VALUES (1, NULL); @@ -186,3 +382,4 @@ explain verbose select avg(c) from g group by c; drop table def; drop table g; +reset enable_hashagg; |
