diff options
| author | Ashutosh Bapat | 2011-05-31 11:57:00 +0000 |
|---|---|---|
| committer | Michael P | 2011-06-06 03:02:38 +0000 |
| commit | a186a47d75e278909ec1346a2919b9e2cdbe469d (patch) | |
| tree | 9d60a80b37a11eee16da511b3ed4ddaa55bfc46d /src | |
| parent | 86fb8b6ba5bf6646ef32d03882b98ad46897dfe1 (diff) | |
In following cases, we push the aggregates and group by clauses to the datanode
and collect the results at coordinator
1. The query is single step
2. it does not have having clause, order by clause, windowing clause, distinct
clause
3. the target list has only aggregates (no expressions involving aggregates)
and/or grouping clause expressions
If these conditions are met, then following steps are taken,
1. Create RemoteQuery node with GROUP BY clause, and clauses corresponding to
the targetlist, Scan nodes in the lefttree of the Agg node etc.
2. The target list of Agg node is modified such that the aggregate nodes accept
the corresponding aggregate nodes of the RemoteQuery as inputs. That enables
feeding the transition results from the datanodes to the aggregates on
coordinator.
3. The grouping columns in Agg node are modified to the corresponding columns in
the target list of the RemoteQuery node.
During execution, the transition results from the datanodes are grouped
according to the group by clause and "collected" at coordinator and finalized at
the end for each group. The patch adds the machinary for group wise collections
and finalization of collected results.
This work will be refined later to lift the restrictions as much.
Diffstat (limited to 'src')
| -rw-r--r-- | src/backend/executor/nodeAgg.c | 183 | ||||
| -rw-r--r-- | src/backend/nodes/copyfuncs.c | 3 | ||||
| -rw-r--r-- | src/backend/nodes/equalfuncs.c | 3 | ||||
| -rw-r--r-- | src/backend/nodes/outfuncs.c | 3 | ||||
| -rw-r--r-- | src/backend/nodes/readfuncs.c | 3 | ||||
| -rw-r--r-- | src/backend/optimizer/plan/createplan.c | 321 | ||||
| -rw-r--r-- | src/backend/optimizer/plan/planner.c | 7 | ||||
| -rw-r--r-- | src/backend/parser/parse_agg.c | 30 | ||||
| -rw-r--r-- | src/include/nodes/execnodes.h | 3 | ||||
| -rw-r--r-- | src/include/nodes/plannodes.h | 5 | ||||
| -rw-r--r-- | src/include/nodes/primnodes.h | 3 | ||||
| -rw-r--r-- | src/include/optimizer/planmain.h | 1 | ||||
| -rw-r--r-- | src/include/pgxc/planner.h | 1 | ||||
| -rw-r--r-- | src/test/regress/expected/xc_groupby.out | 532 | ||||
| -rw-r--r-- | src/test/regress/sql/xc_groupby.sql | 42 |
15 files changed, 1109 insertions, 31 deletions
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 437bf20b9a..ccf5079a19 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -247,6 +247,17 @@ typedef struct AggStatePerGroupData * NULL and not auto-replace it with a later input value. Only the first * non-NULL input will be auto-substituted. */ +#ifdef PGXC + /* + * PGXCTODO: we should be able to reuse the fields above, rather than having + * separate fields here, that can be done once we get rid of different + * collection and transition result types in pg_aggregate.h. Collection at + * coordinator is equivalent to the transition at non-XC PG. + */ + Datum collectValue; /* current collection value */ + bool collectValueIsNull; + bool noCollectValue; /* true if the collectValue not set yet */ +#endif /* PGXC */ } AggStatePerGroupData; /* @@ -370,6 +381,37 @@ initialize_aggregates(AggState *aggstate, * signals that we still need to do this. */ pergroupstate->noTransValue = peraggstate->initValueIsNull; + +#ifdef PGXC + /* + * (Re)set collectValue to the initial value. + * + * Note that when the initial value is pass-by-ref, we must copy it + * (into the aggcontext) since we will pfree the collectValue later. + */ + if (peraggstate->initCollectValueIsNull) + pergroupstate->collectValue = peraggstate->initCollectValue; + else + { + MemoryContext oldContext; + + oldContext = MemoryContextSwitchTo(aggstate->aggcontext); + pergroupstate->collectValue = datumCopy(peraggstate->initCollectValue, + peraggstate->collecttypeByVal, + peraggstate->collecttypeLen); + MemoryContextSwitchTo(oldContext); + } + pergroupstate->collectValueIsNull = peraggstate->initCollectValueIsNull; + + /* + * If the initial value for the transition state doesn't exist in the + * pg_aggregate table then we will let the first non-NULL value + * returned from the outer procNode become the initial value. (This is + * useful for aggregates like max() and min().) The noTransValue flag + * signals that we still need to do this. + */ + pergroupstate->noCollectValue = peraggstate->initCollectValueIsNull; +#endif /* PGXC */ } } @@ -475,6 +517,116 @@ advance_transition_function(AggState *aggstate, MemoryContextSwitchTo(oldContext); } +#ifdef PGXC +/* + * Given new input value(s), advance the collection function of an aggregate. + * + * The new values (and null flags) have been preloaded into argument positions + * 1 and up in fcinfo, so that we needn't copy them again to pass to the + * collection function. No other fields of fcinfo are assumed valid. + * + * It doesn't matter which memory context this is called in. + */ +static void +advance_collection_function(AggState *aggstate, + AggStatePerAgg peraggstate, + AggStatePerGroup pergroupstate, + FunctionCallInfoData *fcinfo) +{ + int numArguments = peraggstate->numArguments; + Datum newVal; + MemoryContext oldContext; + + /* + * numArgument has to be one, since each datanode is going to send a single + * transition value + */ + Assert(numArguments == 1); + if (peraggstate->collectfn.fn_strict) + { + int cntArgs; + /* + * For a strict collectfn, nothing happens when there's a NULL input; we + * just keep the prior transition value, transValue. + */ + for (cntArgs = 1; cntArgs <= numArguments; cntArgs++) + { + if (fcinfo->argnull[cntArgs]) + return; + } + if (pergroupstate->noCollectValue) + { + /* + * collection result has not been initialized + * We must copy the datum into result if it is pass-by-ref. We + * do not need to pfree the old result, since it's NULL. + * PGXCTODO: in case the transition result type is different from + * collection result type, this code would not work, since we are + * assigning datum of one type to another. For this code to work the + * input and output of collection function needs to be binary + * compatible which is not. So, either check in AggregateCreate, + * that the input and output of collection function are binary + * coercible or set the initial values something non-null or change + * this code + */ + oldContext = MemoryContextSwitchTo(aggstate->aggcontext); + pergroupstate->collectValue = datumCopy(fcinfo->arg[1], + peraggstate->collecttypeByVal, + peraggstate->collecttypeLen); + pergroupstate->collectValueIsNull = false; + pergroupstate->noCollectValue = false; + MemoryContextSwitchTo(oldContext); + return; + } + if (pergroupstate->collectValueIsNull) + { + /* + * Don't call a strict function with NULL inputs. Note it is + * possible to get here despite the above tests, if the collectfn is + * strict *and* returned a NULL on a prior cycle. If that happens + * we will propagate the NULL all the way to the end. + */ + return; + } + } + + /* We run the collection functions in per-input-tuple memory context */ + oldContext = MemoryContextSwitchTo(aggstate->tmpcontext->ecxt_per_tuple_memory); + + /* + * OK to call the collection function + */ + InitFunctionCallInfoData(*fcinfo, &(peraggstate->collectfn), 2, (void *)aggstate, NULL); + fcinfo->arg[0] = pergroupstate->collectValue; + fcinfo->argnull[0] = pergroupstate->collectValueIsNull; + newVal = FunctionCallInvoke(fcinfo); + + /* + * If pass-by-ref datatype, must copy the new value into aggcontext and + * pfree the prior transValue. But if collectfn returned a pointer to its + * first input, we don't need to do anything. + */ + if (!peraggstate->collecttypeByVal && + DatumGetPointer(newVal) != DatumGetPointer(pergroupstate->collectValue)) + { + if (!fcinfo->isnull) + { + MemoryContextSwitchTo(aggstate->aggcontext); + newVal = datumCopy(newVal, + peraggstate->collecttypeByVal, + peraggstate->collecttypeLen); + } + if (!pergroupstate->collectValueIsNull) + pfree(DatumGetPointer(pergroupstate->collectValue)); + } + + pergroupstate->collectValue = newVal; + pergroupstate->collectValueIsNull = fcinfo->isnull; + + MemoryContextSwitchTo(oldContext); +} +#endif /* PGXC */ + /* * Advance all the aggregates for one input tuple. The input tuple * has been stored in tmpcontext->ecxt_outertuple, so that it is accessible @@ -544,8 +696,21 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup) fcinfo.argnull[i + 1] = slot->tts_isnull[i]; } +#ifdef PGXC + if (aggstate->skip_trans) + { + Assert(IS_PGXC_COORDINATOR); + /* + * we are collecting results sent by the datanodes, so advance + * collections instead of transitions + */ + advance_collection_function(aggstate, peraggstate, + pergroupstate, &fcinfo); + } + else +#endif /* PGXC */ advance_transition_function(aggstate, peraggstate, pergroupstate, - &fcinfo); + &fcinfo); } } } @@ -751,9 +916,10 @@ finalize_aggregate(AggState *aggstate, * should be consumable by final function. * As such this step is meant only to convert transition results into form * consumable by final function, the step does not actually do any - * collection. + * collection. Skipping transitionp means, that the collection + * phase is over and we need to apply final function directly. */ - if (OidIsValid(peraggstate->collectfn_oid)) + if (OidIsValid(peraggstate->collectfn_oid) && !aggstate->skip_trans) { FunctionCallInfoData fcinfo; InitFunctionCallInfoData(fcinfo, &(peraggstate->collectfn), 2, @@ -790,6 +956,16 @@ finalize_aggregate(AggState *aggstate, pergroupstate->transValueIsNull = fcinfo.isnull; } } + + /* + * if we skipped the transition phase, we have the collection result in the + * collectValue, move it to transValue for finalization to work on + */ + if (aggstate->skip_trans) + { + pergroupstate->transValue = pergroupstate->collectValue; + pergroupstate->transValueIsNull = pergroupstate->collectValueIsNull; + } #endif /* PGXC */ /* @@ -1453,6 +1629,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) aggstate->pergroup = NULL; aggstate->grp_firstTuple = NULL; aggstate->hashtable = NULL; + aggstate->skip_trans = node->skip_trans; /* * Create expression contexts. We need two, one for per-input-tuple diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index f8a154e1fd..2c7fee4c3c 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -1170,6 +1170,9 @@ _copyAggref(Aggref *from) COPY_SCALAR_FIELD(aggfnoid); COPY_SCALAR_FIELD(aggtype); +#ifdef PGXC + COPY_SCALAR_FIELD(aggtrantype); +#endif /* PGXC */ COPY_NODE_FIELD(args); COPY_NODE_FIELD(aggorder); COPY_NODE_FIELD(aggdistinct); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index e0764f50a3..c5b46bbbbc 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -183,6 +183,9 @@ _equalAggref(Aggref *a, Aggref *b) { COMPARE_SCALAR_FIELD(aggfnoid); COMPARE_SCALAR_FIELD(aggtype); +#ifdef PGXC + COMPARE_SCALAR_FIELD(aggtrantype); +#endif /* PGXC */ COMPARE_NODE_FIELD(args); COMPARE_NODE_FIELD(aggorder); COMPARE_NODE_FIELD(aggdistinct); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 9b5581c76b..d9dd1b8def 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -871,6 +871,9 @@ _outAggref(StringInfo str, Aggref *node) WRITE_OID_FIELD(aggfnoid); WRITE_OID_FIELD(aggtype); +#ifdef PGXC + WRITE_OID_FIELD(aggtrantype); +#endif /* PGXC */ WRITE_NODE_FIELD(args); WRITE_NODE_FIELD(aggorder); WRITE_NODE_FIELD(aggdistinct); diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 2d7215acbe..6dccae989c 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -466,6 +466,9 @@ _readAggref(void) READ_OID_FIELD(aggfnoid); READ_OID_FIELD(aggtype); +#ifdef PGXC + READ_OID_FIELD(aggtrantype); +#endif /* PGXC */ READ_NODE_FIELD(args); READ_NODE_FIELD(aggorder); READ_NODE_FIELD(aggdistinct); diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index efd89fb8b3..585ee0b15f 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -87,6 +87,8 @@ static void create_remote_target_list(PlannerInfo *root, char *in_alias, int in_index); static Alias *generate_remote_rte_alias(RangeTblEntry *rte, int varno, char *aliasname, int reduce_level); +static void pgxc_locate_grouping_columns(PlannerInfo *root, List *tlist, + AttrNumber *grpColIdx); #endif static NestLoop *create_nestloop_plan(PlannerInfo *root, NestPath *best_path, Plan *outer_plan, Plan *inner_plan); @@ -5050,4 +5052,323 @@ create_remotedelete_plan(PlannerInfo *root, Plan *topplan) return (Plan *) fstep; } + +/* + * create_remotegrouping_plan + * tries to see if the grouping and aggregates can be pushed down to the + * datanodes. + * Right now we can push with following restrictions + * 1. there are plain aggregates (no expressions involving aggregates) and/or + * expressions in group by clauses + * 2. No distinct or order by clauses + * 3. No windowing clause + * + * Inputs + * root - planerInfo root for this query + * agg_plan - local grouping plan produced by grouping_planner() + * + * PGXCTODO: work on reducing these restrictions as much or document the reasons + * why we need the restrictions, in these comments themselves. In case of + * replicated tables, we should be able to push the whole query to the data + * node in case there are no local clauses. + */ +Plan * +create_remotegrouping_plan(PlannerInfo *root, Plan *local_plan) +{ + Query *query = root->parse; + RemoteQuery *agg_left; + Plan *temp_plan = local_plan->lefttree; + List *agg_tlist = local_plan->targetlist; + StringInfo remote_sql_stmt = makeStringInfo(); + StringInfo remote_targetlist = makeStringInfo(); + StringInfo remote_fromlist = makeStringInfo(); + StringInfo groupby_clause = makeStringInfo(); + StringInfo in_alias = makeStringInfo(); + ListCell *temp; + ListCell *temp_remote; + RemoteQuery *agg_remote; + Plan *agg_remote_plan; + RangeTblEntry *dummy_rte; + Index dummy_rtindex; + List *base_tlist; + Agg *agg_plan = NULL; + List *temp_vars; /* temporarily hold the VARs */ + List *temp_vartlist; /* temporarity hold tlist of VARs */ + Relids in_relids; /* the list of Relids referenced by + * the Agg plan + */ + + /* for now only Agg plans */ + Assert(IsA(local_plan, Agg)); + agg_plan = (Agg *)local_plan; + + /* + * we don't push aggregation and grouping to datanodes, in case there are + * windowing aggregates, distinct, having clause or sort clauses + */ + if (query->hasWindowFuncs || + query->distinctClause || + query->sortClause || + query->havingQual) + return local_plan; + + /* + * optimize if only the tree underneath is reduced to RemoteQuery, any other + * node there indicates that the scans can not be completely pushed to the + * remote data nodes + * RemoteQuery is hidden underneath Material plan, take it out + */ + if (IsA(temp_plan, Material)) + temp_plan = temp_plan->lefttree; + if (!IsA(temp_plan, RemoteQuery)) + return local_plan; + else + agg_left = (RemoteQuery *)temp_plan; + + + /* + * walk through the target list and find out whether we can push the + * aggregates and grouping to datanodes. We can do so if the target list + * contains plain aggregates (without any expression involving those) and + * expressions in group by clauses only (last one to make the query legit. + */ + foreach(temp, agg_tlist) + { + TargetEntry *tle = lfirst(temp); + Node *expr = (Node *)tle->expr; + + /* + * PGXCTODO: once we allow sort clauses to be pushed to data nodes, + * along with group by clause, this condition will need to be changed. + */ + if (!(IsA(expr, Aggref) || tle->ressortgroupref > 0)) + return local_plan; + } + + /* + * cleared of all the charges, now take following steps + * 1. create a remote query node reflecting the query to be pushed to the + * datanode + * 2. Modify the Agg node passed in so that it reflects the aggregation + * (collection) to be done at the coordinator based on the results sent by + * the datanodes. + */ + + appendStringInfo(in_alias, "%s_%d", "group", root->rs_alias_index); + + /* find all the relations referenced by targetlist of Agg node */ + temp_vars = pull_var_clause((Node *)agg_tlist, PVC_REJECT_PLACEHOLDERS); + findReferencedVars(temp_vars, (Plan *)agg_left, &temp_vartlist, &in_relids); + + /* + * build partial RemoteQuery node to be used for creating the Select clause + * to be sent to the remote node. Rest of the node will be built later + */ + agg_remote = makeNode(RemoteQuery); + /* + * Save information about the plan we are reducing. + * We may need this information later if more entries are added to it + * as part of the remote expression optimization. + */ + agg_remote->remotejoin = false; + agg_remote->inner_alias = pstrdup(in_alias->data); + agg_remote->inner_reduce_level = agg_left->reduce_level; + agg_remote->inner_relids = in_relids; + agg_remote->inner_statement = pstrdup(agg_left->sql_statement); + agg_remote->exec_nodes = agg_left->exec_nodes; + /* don't forget to increment the index for the next time around! */ + agg_remote->reduce_level = root->rs_alias_index++; + + /* generate the select clause of the remote query */ + appendStringInfoString(remote_targetlist, "SELECT"); + foreach (temp, agg_tlist) + { + TargetEntry *tle = lfirst(temp); + Node *expr = (Node *)tle->expr; + + create_remote_expr(root, local_plan, remote_targetlist, expr, agg_remote); + /* if this is not last target entry, add a comma with space */ + if (lnext(temp)) + appendStringInfoString(remote_targetlist, ","); + } + + /* generate the from clause of the remote query */ + appendStringInfo(remote_fromlist, "FROM (%s) %s", + agg_remote->inner_statement, agg_remote->inner_alias); + + /* + * generate group by clause for the remote query and recompute the group by + * column locations + */ + if (query->groupClause) + { + int cntCols; + Assert(IsA(local_plan, Agg)); + + /* + * recompute the column ids of the grouping columns, + * the group column indexes computed earlier point in the + * targetlists of the scan plans under this node. But now the grouping + * column indexes will be pointing in the targetlist of the new + * RemoteQuery, hence those need to be recomputed + */ + pgxc_locate_grouping_columns(root, agg_tlist, agg_plan->grpColIdx); + + appendStringInfoString(groupby_clause, "GROUP BY "); + for (cntCols = 0; cntCols < agg_plan->numCols; cntCols++) + { + appendStringInfo(groupby_clause, "%d", + agg_plan->grpColIdx[cntCols]); + if (cntCols < agg_plan->numCols - 1) + appendStringInfoString(groupby_clause, ", "); + } + } + + /* generate the remote sql statement from the pieces */ + appendStringInfo(remote_sql_stmt, "%s %s %s", remote_targetlist->data, + remote_fromlist->data, groupby_clause->data); + /* + * set the base_tlist for the RemoteQuery node being created, it's used to + * create the tuple descriptor for the result from RemoteQuery and rewrite + * the Aggregates targetlist accept the results of the RemoteQuery. + */ + base_tlist = add_to_flat_tlist(NIL, get_tlist_exprs(agg_tlist, true)); + /* + * we need to change the return types of the aggregates. Datanodes send the + * aggregation results in the form of transition results + */ + foreach (temp, base_tlist) + { + TargetEntry *tle = lfirst(temp); + Node *expr = (Node *)tle->expr; + Aggref *agg; + + if (IsA(expr, Aggref)) + { + agg = (Aggref *)expr; + agg->aggtype = agg->aggtrantype; + } + } + + /* + * create a dummy RTE for the remote query being created. Append the dummy + * range table entry to the range table. Note that this modifies the master + * copy the caller passed us, otherwise e.g EXPLAIN VERBOSE will fail to + * find the rte the Vars built below refer to. Also create the tuple + * descriptor for the result of this query from the base_tlist (targetlist + * we used to generate the remote node query). + */ + dummy_rte = makeNode(RangeTblEntry); + dummy_rte->reltupdesc = ExecTypeFromTL(base_tlist, false); + dummy_rte->rtekind = RTE_RELATION; + /* use a dummy relname... */ + dummy_rte->relname = "__FOREIGN_QUERY__"; + dummy_rte->eref = makeAlias("__FOREIGN_QUERY__", NIL); + /* rest will be zeroed out in makeNode() */ + root->parse->rtable = lappend(root->parse->rtable, dummy_rte); + dummy_rtindex = list_length(root->parse->rtable); + + /* + * change the aggref nodes in the local Agg plan to accept the transition + * results from the remote query output. Do this after we have created base + * list, otherwise we might introduce these changes in the base list + * Do this after the RTE for the remote query is added to the root. + */ + forboth (temp, agg_tlist, temp_remote, base_tlist) + { + TargetEntry *tle = lfirst(temp); + Node *expr = (Node *)tle->expr; + Aggref *agg; + TargetEntry *tle_remote = lfirst(temp_remote); + Node *expr_remote = (Node *)tle_remote->expr; + + if (IsA(expr, Aggref)) + { + Assert(IsA(expr_remote, Aggref)); + + /* + * replace the args of the local Aggref with Aggref node to be + * included in RemoteQuery node, so that set_plan_refs can convert + * the args into VAR pointing to the appropriate result in the tuple + * coming from RemoteQuery node + * PGXCTODO: should we push this change in targetlists of plans + * above? + */ + agg = (Aggref *)expr; + agg->args = list_make1(makeTargetEntry(copyObject(expr_remote), 1, NULL, false)); + } + } + + /* build rest of the RemoteQuery node and the plan there */ + agg_remote_plan = &agg_remote->scan.plan; + /* the join targetlist becomes this node's tlist */ + agg_remote_plan->targetlist = base_tlist; + agg_remote_plan->lefttree = NULL; + agg_remote_plan->righttree = NULL; + agg_remote->scan.scanrelid = dummy_rtindex; + agg_remote->sql_statement = remote_sql_stmt->data; + /* set_plan_refs needs this later */ + agg_remote->base_tlist = base_tlist; + agg_remote->relname = "__FOREIGN_QUERY__"; + agg_remote->partitioned_replicated = agg_left->partitioned_replicated; + /* + * Only quals that can be pushed to the remote side the ones in the having + * clause. Till we work out how to handle having quals in XC, we don't have + * any quals here. + * PGXCTODO: the RemoteQuery node that was earlier the lefttree of Agg + * node, may have local quals. In such case, we have to aggregate and group + * at coordinator and can not push the grouping clause to the datanodes. Is + * there a case in XC, where we can have local quals? + * we actually need not worry about costs since this is the final plan + */ + agg_remote_plan->startup_cost = agg_left->scan.plan.startup_cost; + agg_remote_plan->total_cost = agg_left->scan.plan.total_cost; + agg_remote_plan->plan_rows = agg_left->scan.plan.plan_rows; + agg_remote_plan->plan_width = agg_left->scan.plan.plan_width; + + /* modify the passed in Agg plan according to the remote query we built */ + /* + * Materialization is always need for RemoteQuery in case we need to restart + * the scan + */ + agg_plan->plan.lefttree = (Plan *) make_material(agg_remote_plan); + /* indicate that we should apply collection function directly */ + agg_plan->skip_trans = true; + + return (Plan *)agg_plan; +} + +/* + * locates the grouping clauses in the given target list. This is very similar + * to locate_grouping_columns except that there is only one target list to + * search into + * PGXCTODO: can we reuse locate_grouping_columns() instead of writing this + * function. But this function is optimized to search in the same target list. + */ +static void +pgxc_locate_grouping_columns(PlannerInfo *root, List *tlist, + AttrNumber *groupColIdx) +{ + int keyno = 0; + ListCell *gl; + + /* + * No work unless grouping. + */ + if (!root->parse->groupClause) + { + Assert(groupColIdx == NULL); + return; + } + Assert(groupColIdx != NULL); + + foreach(gl, root->parse->groupClause) + { + SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl); + TargetEntry *te = get_sortgroupclause_tle(grpcl, tlist); + if (!te) + elog(ERROR, "failed to locate grouping columns"); + groupColIdx[keyno++] = te->resno; + } +} #endif diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 1f3213bcf3..213da56115 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1334,6 +1334,13 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) numGroups, agg_counts.numAggs, result_plan); +#ifdef PGXC + /* + * can we push any clauses to the remote node? try doing that + */ + if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) + result_plan = create_remotegrouping_plan(root, result_plan); +#endif /* PGXC */ /* Hashed aggregation produces randomly-ordered results */ current_pathkeys = NIL; } diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c index 3c4738edc4..43a7974cee 100644 --- a/src/backend/parser/parse_agg.c +++ b/src/backend/parser/parse_agg.c @@ -75,6 +75,10 @@ transformAggregateCall(ParseState *pstate, Aggref *agg, int save_next_resno; int min_varlevel; ListCell *lc; +#ifdef PGXC + HeapTuple aggTuple; + Form_pg_aggregate aggform; +#endif /* PGXC */ /* * Transform the plain list of Exprs into a targetlist. We don't bother @@ -191,23 +195,19 @@ transformAggregateCall(ParseState *pstate, Aggref *agg, * on the coordinator. * Look up the aggregate definition and replace agg->aggtype */ - if (IS_PGXC_DATANODE) - { - HeapTuple aggTuple; - Form_pg_aggregate aggform; - aggTuple = SearchSysCache(AGGFNOID, - ObjectIdGetDatum(agg->aggfnoid), - 0, 0, 0); - if (!HeapTupleIsValid(aggTuple)) - elog(ERROR, "cache lookup failed for aggregate %u", - agg->aggfnoid); - aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple); - - agg->aggtype = aggform->aggtranstype; + aggTuple = SearchSysCache(AGGFNOID, + ObjectIdGetDatum(agg->aggfnoid), + 0, 0, 0); + if (!HeapTupleIsValid(aggTuple)) + elog(ERROR, "cache lookup failed for aggregate %u", + agg->aggfnoid); + aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple); + agg->aggtrantype = aggform->aggtranstype; + if (IS_PGXC_DATANODE) + agg->aggtype = agg->aggtrantype; - ReleaseSysCache(aggTuple); - } + ReleaseSysCache(aggTuple); #endif } diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 6455eeaa60..6c4fcb77b2 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1570,6 +1570,9 @@ typedef struct AggState List *hash_needed; /* list of columns needed in hash table */ bool table_filled; /* hash table filled yet? */ TupleHashIterator hashiter; /* for iterating through hash table */ +#ifdef PGXC + bool skip_trans; /* skip the transition step for aggregates */ +#endif /* PGXC */ } AggState; /* ---------------- diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 79876100d5..82ecedfd1e 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -535,6 +535,11 @@ typedef struct Agg AttrNumber *grpColIdx; /* their indexes in the target list */ Oid *grpOperators; /* equality operators to compare with */ long numGroups; /* estimated number of groups in input */ +#ifdef PGXC + bool skip_trans; /* apply collection directly on the data received + * from remote data nodes + */ +#endif /* PGXC */ } Agg; /* ---------------- diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 07ad2a0453..8d38c24958 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -228,6 +228,9 @@ typedef struct Aggref Expr xpr; Oid aggfnoid; /* pg_proc Oid of the aggregate */ Oid aggtype; /* type Oid of result of the aggregate */ +#ifdef PGXC + Oid aggtrantype; /* type Oid of transition results */ +#endif /* PGXC */ List *args; /* arguments and sort expressions */ List *aggorder; /* ORDER BY (list of SortGroupClause) */ List *aggdistinct; /* DISTINCT (list of SortGroupClause) */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 41b4630fe6..e595f1e003 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -82,6 +82,7 @@ extern ModifyTable *make_modifytable(CmdType operation, List *resultRelations, List *subplans, List *returningLists, List *rowMarks, int epqParam); extern bool is_projection_capable_plan(Plan *plan); +extern Plan *create_remotegrouping_plan(PlannerInfo *root, Plan *agg_plan); /* * prototypes for plan/initsplan.c diff --git a/src/include/pgxc/planner.h b/src/include/pgxc/planner.h index fe36abbb19..96a2a68634 100644 --- a/src/include/pgxc/planner.h +++ b/src/include/pgxc/planner.h @@ -203,6 +203,7 @@ extern bool StrictSelectChecking; extern PlannedStmt *pgxc_planner(Query *query, int cursorOptions, ParamListInfo boundParams); +extern Plan *pgxc_grouping_planner(PlannerInfo *root, Plan *agg_plan); extern bool IsHashDistributable(Oid col_type); extern bool is_immutable_func(Oid funcid); diff --git a/src/test/regress/expected/xc_groupby.out b/src/test/regress/expected/xc_groupby.out index 58f9ea7316..672084ac78 100644 --- a/src/test/regress/expected/xc_groupby.out +++ b/src/test/regress/expected/xc_groupby.out @@ -21,6 +21,30 @@ select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val* 3 | | | | | 4 (4 rows) +explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + HashAggregate (cost=2.09..2.13 rows=1 width=16) + Output: count(*), sum((tab1.val * tab2.val)), avg((tab1.val * tab2.val)), ((sum((tab1.val * tab2.val)))::double precision / (count(*))::double precision), tab1.val2, tab2.val2 + -> Merge Full Join (cost=2.05..2.07 rows=1 width=16) + Output: tab1.val, tab1.val2, tab2.val, tab2.val2 + Merge Cond: (tab1.val2 = tab2.val2) + -> Sort (cost=1.02..1.03 rows=1 width=8) + Output: tab1.val, tab1.val2 + Sort Key: tab1.val2 + -> Materialize (cost=0.00..1.01 rows=1 width=8) + Output: tab1.val, tab1.val2 + -> Data Node Scan (Node Count [2]) "SELECT val, val2 FROM public.tab1 tab1" on tab1 (cost=0.00..1.01 rows=1000 width=8) + Output: tab1.val, tab1.val2 + -> Sort (cost=1.02..1.03 rows=1 width=8) + Output: tab2.val, tab2.val2 + Sort Key: tab2.val2 + -> Materialize (cost=0.00..1.01 rows=1 width=8) + Output: tab2.val, tab2.val2 + -> Data Node Scan (Node Count [2]) "SELECT val, val2 FROM public.tab2 tab2" on tab2 (cost=0.00..1.01 rows=1000 width=8) + Output: tab2.val, tab2.val2 +(19 rows) + -- aggregates over aggregates select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x; sum @@ -29,6 +53,19 @@ select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 grou 17 (2 rows) +explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x; + QUERY PLAN +---------------------------------------------------------------------------------------- + HashAggregate (cost=1.05..1.06 rows=1 width=12) + Output: sum((sum(tab1.val))), ((tab1.val2 % 2)) + -> HashAggregate (cost=1.02..1.03 rows=1 width=8) + Output: sum(tab1.val), (tab1.val2 % 2), tab1.val2 + -> Materialize (cost=0.00..1.01 rows=1 width=8) + Output: tab1.val, tab1.val2 + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8) + Output: tab1.val, tab1.val2 +(8 rows) + -- group by without aggregate, just like distinct? select val2 from tab1 group by val2; val2 @@ -38,6 +75,17 @@ select val2 from tab1 group by val2; 3 (3 rows) +explain verbose select val2 from tab1 group by val2; + QUERY PLAN +---------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=4) + Output: tab1.val2 + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: tab1.val2 + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4) + Output: tab1.val2 +(6 rows) + -- group by with aggregates in expression select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; ?column? | val2 @@ -47,6 +95,17 @@ select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; 17.6666666666666667 | 3 (3 rows) +explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; + QUERY PLAN +---------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.05 rows=1 width=8) + Output: (((count(*) + sum(val)))::numeric + avg(val)), val2 + -> Materialize (cost=0.00..1.01 rows=1 width=8) + Output: val, val2 + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8) + Output: val, val2 +(6 rows) + -- group by with expressions in group by clause select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2; sum | avg | ?column? @@ -56,6 +115,19 @@ select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2; 8 | 4.0000000000000000 | 4 (3 rows) +explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2; + QUERY PLAN +---------------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.04 rows=1 width=8) + Output: sum(val), avg(val), ((2 * val2)) + -> Result (cost=0.00..1.02 rows=1 width=8) + Output: val, val2, (2 * val2) + -> Materialize (cost=0.00..1.01 rows=1 width=8) + Output: val, val2 + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8) + Output: val, val2 +(8 rows) + drop table tab1; drop table tab2; -- repeat the same tests for replicated tables @@ -72,6 +144,17 @@ select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 g 3 | 11 | 3.6666666666666667 | 3.66666666666667 | 3 (3 rows) +explain verbose select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2; + QUERY PLAN +------------------------------------------------------------------------------------------------------------- + HashAggregate (cost=1.03..1.06 rows=1 width=8) + Output: count(*), sum(val), avg(val), ((sum(val))::double precision / (count(*))::double precision), val2 + -> Materialize (cost=0.00..1.01 rows=1 width=8) + Output: val, val2 + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8) + Output: val, val2 +(6 rows) + -- joins and group by select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2; count | sum | avg | ?column? | val2 | val2 @@ -82,6 +165,30 @@ select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val* 3 | | | | | 4 (4 rows) +explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + HashAggregate (cost=2.09..2.13 rows=1 width=16) + Output: count(*), sum((tab1.val * tab2.val)), avg((tab1.val * tab2.val)), ((sum((tab1.val * tab2.val)))::double precision / (count(*))::double precision), tab1.val2, tab2.val2 + -> Merge Full Join (cost=2.05..2.07 rows=1 width=16) + Output: tab1.val, tab1.val2, tab2.val, tab2.val2 + Merge Cond: (tab1.val2 = tab2.val2) + -> Sort (cost=1.02..1.03 rows=1 width=8) + Output: tab1.val, tab1.val2 + Sort Key: tab1.val2 + -> Materialize (cost=0.00..1.01 rows=1 width=8) + Output: tab1.val, tab1.val2 + -> Data Node Scan (Node Count [1]) "SELECT val, val2 FROM public.tab1 tab1" on tab1 (cost=0.00..1.01 rows=1000 width=8) + Output: tab1.val, tab1.val2 + -> Sort (cost=1.02..1.03 rows=1 width=8) + Output: tab2.val, tab2.val2 + Sort Key: tab2.val2 + -> Materialize (cost=0.00..1.01 rows=1 width=8) + Output: tab2.val, tab2.val2 + -> Data Node Scan (Node Count [1]) "SELECT val, val2 FROM public.tab2 tab2" on tab2 (cost=0.00..1.01 rows=1000 width=8) + Output: tab2.val, tab2.val2 +(19 rows) + -- aggregates over aggregates select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x; sum @@ -90,6 +197,19 @@ select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 grou 17 (2 rows) +explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x; + QUERY PLAN +---------------------------------------------------------------------------------------- + HashAggregate (cost=1.05..1.06 rows=1 width=12) + Output: sum((sum(tab1.val))), ((tab1.val2 % 2)) + -> HashAggregate (cost=1.02..1.03 rows=1 width=8) + Output: sum(tab1.val), (tab1.val2 % 2), tab1.val2 + -> Materialize (cost=0.00..1.01 rows=1 width=8) + Output: tab1.val, tab1.val2 + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8) + Output: tab1.val, tab1.val2 +(8 rows) + -- group by without aggregate, just like distinct? select val2 from tab1 group by val2; val2 @@ -99,6 +219,17 @@ select val2 from tab1 group by val2; 3 (3 rows) +explain verbose select val2 from tab1 group by val2; + QUERY PLAN +---------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=4) + Output: tab1.val2 + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: tab1.val2 + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=4) + Output: tab1.val2 +(6 rows) + -- group by with aggregates in expression select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; ?column? | val2 @@ -108,6 +239,17 @@ select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; 17.6666666666666667 | 3 (3 rows) +explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; + QUERY PLAN +---------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.05 rows=1 width=8) + Output: (((count(*) + sum(val)))::numeric + avg(val)), val2 + -> Materialize (cost=0.00..1.01 rows=1 width=8) + Output: val, val2 + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8) + Output: val, val2 +(6 rows) + -- group by with expressions in group by clause select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2; sum | avg | ?column? @@ -117,6 +259,19 @@ select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2; 8 | 4.0000000000000000 | 4 (3 rows) +explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2; + QUERY PLAN +---------------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.04 rows=1 width=8) + Output: sum(val), avg(val), ((2 * val2)) + -> Result (cost=0.00..1.02 rows=1 width=8) + Output: val, val2, (2 * val2) + -> Materialize (cost=0.00..1.01 rows=1 width=8) + Output: val, val2 + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8) + Output: val, val2 +(8 rows) + drop table tab1; drop table tab2; -- some tests involving nulls, characters, float type etc. @@ -150,19 +305,35 @@ select a,count(a) from def group by a order by a; | 0 (11 rows) +explain verbose select a,count(a) from def group by a order by a; + QUERY PLAN +---------------------------------------------------------------------------------------------- + GroupAggregate (cost=1.02..1.05 rows=1 width=4) + Output: a, count(a) + -> Sort (cost=1.02..1.03 rows=1 width=4) + Output: a + Sort Key: def.a + -> Result (cost=0.00..1.01 rows=1 width=4) + Output: a + -> Materialize (cost=0.00..1.01 rows=1 width=4) + Output: a, b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4) + Output: a, b +(11 rows) + select avg(a) from def group by a; avg ------------------------ 6.0000000000000000 - 5.0000000000000000 8.0000000000000000 + 5.0000000000000000 + 2.0000000000000000 1.00000000000000000000 9.0000000000000000 - 2.0000000000000000 - 7.0000000000000000 - 10.0000000000000000 3.0000000000000000 + 10.0000000000000000 + 7.0000000000000000 4.0000000000000000 (11 rows) @@ -171,17 +342,28 @@ select avg(a) from def group by a; ------------------------ 6.0000000000000000 - 5.0000000000000000 8.0000000000000000 + 5.0000000000000000 1.00000000000000000000 9.0000000000000000 2.0000000000000000 - 7.0000000000000000 10.0000000000000000 + 7.0000000000000000 3.0000000000000000 4.0000000000000000 (11 rows) +explain verbose select avg(a) from def group by a; + QUERY PLAN +---------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=4) + Output: pg_catalog.avg((avg(def.a))), def.a + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (avg(def.a)), def.a + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4) + Output: avg(def.a), def.a +(6 rows) + select avg(a) from def group by b; avg -------------------- @@ -191,6 +373,17 @@ select avg(a) from def group by b; 6.2000000000000000 (4 rows) +explain verbose select avg(a) from def group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=33) + Output: pg_catalog.avg((avg(def.a))), def.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (avg(def.a)), def.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=33) + Output: avg(def.a), def.b +(6 rows) + select sum(a) from def group by b; sum ----- @@ -200,6 +393,17 @@ select sum(a) from def group by b; 31 (4 rows) +explain verbose select sum(a) from def group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=33) + Output: pg_catalog.sum((sum(def.a))), def.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(def.a)), def.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=33) + Output: sum(def.a), def.b +(6 rows) + select count(*) from def group by b; count ------- @@ -209,6 +413,17 @@ select count(*) from def group by b; 5 (4 rows) +explain verbose select count(*) from def group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=29) + Output: pg_catalog.count(*), def.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (count(*)), def.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29) + Output: count(*), def.b +(6 rows) + select count(*) from def where a is not null group by a; count ------- @@ -224,6 +439,17 @@ select count(*) from def where a is not null group by a; 1 (10 rows) +explain verbose select count(*) from def where a is not null group by a; + QUERY PLAN +---------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=4) + Output: pg_catalog.count(*), def.a + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (count(*)), def.a + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4) + Output: count(*), def.a +(6 rows) + select b from def group by b; b ------- @@ -233,6 +459,17 @@ select b from def group by b; Three (4 rows) +explain verbose select b from def group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=29) + Output: def.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: def.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29) + Output: def.b +(6 rows) + select b,count(b) from def group by b; b | count -------+------- @@ -242,12 +479,34 @@ select b,count(b) from def group by b; Three | 5 (4 rows) +explain verbose select b,count(b) from def group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=29) + Output: def.b, count((count(def.b))) + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: def.b, (count(def.b)) + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29) + Output: def.b, count(def.b) +(6 rows) + select count(*) from def where b is null group by b; count ------- 3 (1 row) +explain verbose select count(*) from def where b is null group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=29) + Output: pg_catalog.count(*), def.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (count(*)), def.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=29) + Output: count(*), def.b +(6 rows) + create table g(a int, b float, c numeric); insert into g values(1,2.1,3.2); insert into g values(1,2.1,3.2); @@ -259,6 +518,17 @@ select sum(a) from g group by a; 2 (2 rows) +explain verbose select sum(a) from g group by a; + QUERY PLAN +---------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=4) + Output: pg_catalog.sum((sum(g.a))), g.a + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(g.a)), g.a + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=4) + Output: sum(g.a), g.a +(6 rows) + select sum(b) from g group by b; sum ----- @@ -266,6 +536,17 @@ select sum(b) from g group by b; 4.2 (2 rows) +explain verbose select sum(b) from g group by b; + QUERY PLAN +---------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=8) + Output: sum((sum(g.b))), g.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(g.b)), g.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=8) + Output: sum(g.b), g.b +(6 rows) + select sum(c) from g group by b; sum ----- @@ -273,6 +554,17 @@ select sum(c) from g group by b; 6.4 (2 rows) +explain verbose select sum(c) from g group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=40) + Output: sum((sum(g.c))), g.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(g.c)), g.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=40) + Output: sum(g.c), g.b +(6 rows) + select avg(a) from g group by b; avg ------------------------ @@ -280,6 +572,17 @@ select avg(a) from g group by b; 1.00000000000000000000 (2 rows) +explain verbose select avg(a) from g group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=12) + Output: pg_catalog.avg((avg(g.a))), g.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (avg(g.a)), g.b + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=12) + Output: avg(g.a), g.b +(6 rows) + select avg(b) from g group by c; avg ----- @@ -287,6 +590,17 @@ select avg(b) from g group by c; 2.1 (2 rows) +explain verbose select avg(b) from g group by c; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=40) + Output: pg_catalog.avg((avg(g.b))), g.c + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (avg(g.b)), g.c + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=40) + Output: avg(g.b), g.c +(6 rows) + select avg(c) from g group by c; avg -------------------- @@ -294,6 +608,17 @@ select avg(c) from g group by c; 3.2000000000000000 (2 rows) +explain verbose select avg(c) from g group by c; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=32) + Output: pg_catalog.avg((avg(g.c))), g.c + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (avg(g.c)), g.c + -> Data Node Scan (Node Count [2]) (cost=0.00..1.01 rows=1000 width=32) + Output: avg(g.c), g.c +(6 rows) + drop table def; drop table g; -- same test with replicated tables @@ -327,38 +652,76 @@ select a,count(a) from def group by a order by a; | 0 (11 rows) +explain verbose select a,count(a) from def group by a order by a; + QUERY PLAN +---------------------------------------------------------------------------------------------- + GroupAggregate (cost=1.02..1.05 rows=1 width=4) + Output: a, count(a) + -> Sort (cost=1.02..1.03 rows=1 width=4) + Output: a + Sort Key: def.a + -> Result (cost=0.00..1.01 rows=1 width=4) + Output: a + -> Materialize (cost=0.00..1.01 rows=1 width=4) + Output: a, b + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=4) + Output: a, b +(11 rows) + select avg(a) from def group by a; avg ------------------------ 6.0000000000000000 - 5.0000000000000000 8.0000000000000000 + 5.0000000000000000 1.00000000000000000000 - 2.0000000000000000 9.0000000000000000 + 2.0000000000000000 3.0000000000000000 - 7.0000000000000000 10.0000000000000000 + 7.0000000000000000 4.0000000000000000 (11 rows) +explain verbose select avg(a) from def group by a; + QUERY PLAN +---------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=4) + Output: pg_catalog.avg((avg(def.a))), def.a + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (avg(def.a)), def.a + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=4) + Output: avg(def.a), def.a +(6 rows) + select avg(a) from def group by a; avg ------------------------ 6.0000000000000000 - 5.0000000000000000 8.0000000000000000 + 5.0000000000000000 1.00000000000000000000 - 2.0000000000000000 9.0000000000000000 + 2.0000000000000000 3.0000000000000000 - 7.0000000000000000 10.0000000000000000 + 7.0000000000000000 4.0000000000000000 (11 rows) +explain verbose select avg(a) from def group by a; + QUERY PLAN +---------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=4) + Output: pg_catalog.avg((avg(def.a))), def.a + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (avg(def.a)), def.a + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=4) + Output: avg(def.a), def.a +(6 rows) + select avg(a) from def group by b; avg -------------------- @@ -368,6 +731,17 @@ select avg(a) from def group by b; 6.2000000000000000 (4 rows) +explain verbose select avg(a) from def group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=33) + Output: pg_catalog.avg((avg(def.a))), def.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (avg(def.a)), def.b + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=33) + Output: avg(def.a), def.b +(6 rows) + select sum(a) from def group by b; sum ----- @@ -377,6 +751,17 @@ select sum(a) from def group by b; 31 (4 rows) +explain verbose select sum(a) from def group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=33) + Output: pg_catalog.sum((sum(def.a))), def.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(def.a)), def.b + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=33) + Output: sum(def.a), def.b +(6 rows) + select count(*) from def group by b; count ------- @@ -386,6 +771,17 @@ select count(*) from def group by b; 5 (4 rows) +explain verbose select count(*) from def group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=29) + Output: pg_catalog.count(*), def.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (count(*)), def.b + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=29) + Output: count(*), def.b +(6 rows) + select count(*) from def where a is not null group by a; count ------- @@ -394,13 +790,24 @@ select count(*) from def where a is not null group by a; 1 1 1 - 2 1 + 2 1 1 1 (10 rows) +explain verbose select count(*) from def where a is not null group by a; + QUERY PLAN +---------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=4) + Output: pg_catalog.count(*), def.a + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (count(*)), def.a + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=4) + Output: count(*), def.a +(6 rows) + select b from def group by b; b ------- @@ -410,6 +817,17 @@ select b from def group by b; Three (4 rows) +explain verbose select b from def group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=29) + Output: def.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: def.b + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=29) + Output: def.b +(6 rows) + select b,count(b) from def group by b; b | count -------+------- @@ -419,12 +837,34 @@ select b,count(b) from def group by b; Three | 5 (4 rows) +explain verbose select b,count(b) from def group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=29) + Output: def.b, count((count(def.b))) + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: def.b, (count(def.b)) + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=29) + Output: def.b, count(def.b) +(6 rows) + select count(*) from def where b is null group by b; count ------- 3 (1 row) +explain verbose select count(*) from def where b is null group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=29) + Output: pg_catalog.count(*), def.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (count(*)), def.b + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=29) + Output: count(*), def.b +(6 rows) + create table g(a int, b float, c numeric) distribute by replication; insert into g values(1,2.1,3.2); insert into g values(1,2.1,3.2); @@ -436,6 +876,17 @@ select sum(a) from g group by a; 2 (2 rows) +explain verbose select sum(a) from g group by a; + QUERY PLAN +---------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=4) + Output: pg_catalog.sum((sum(g.a))), g.a + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(g.a)), g.a + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=4) + Output: sum(g.a), g.a +(6 rows) + select sum(b) from g group by b; sum ----- @@ -443,6 +894,17 @@ select sum(b) from g group by b; 4.2 (2 rows) +explain verbose select sum(b) from g group by b; + QUERY PLAN +---------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=8) + Output: sum((sum(g.b))), g.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(g.b)), g.b + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=8) + Output: sum(g.b), g.b +(6 rows) + select sum(c) from g group by b; sum ----- @@ -450,6 +912,17 @@ select sum(c) from g group by b; 6.4 (2 rows) +explain verbose select sum(c) from g group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=40) + Output: sum((sum(g.c))), g.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (sum(g.c)), g.b + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=40) + Output: sum(g.c), g.b +(6 rows) + select avg(a) from g group by b; avg ------------------------ @@ -457,6 +930,17 @@ select avg(a) from g group by b; 1.00000000000000000000 (2 rows) +explain verbose select avg(a) from g group by b; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=12) + Output: pg_catalog.avg((avg(g.a))), g.b + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (avg(g.a)), g.b + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=12) + Output: avg(g.a), g.b +(6 rows) + select avg(b) from g group by c; avg ----- @@ -464,6 +948,17 @@ select avg(b) from g group by c; 2.1 (2 rows) +explain verbose select avg(b) from g group by c; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=40) + Output: pg_catalog.avg((avg(g.b))), g.c + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (avg(g.b)), g.c + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=40) + Output: avg(g.b), g.c +(6 rows) + select avg(c) from g group by c; avg -------------------- @@ -471,5 +966,16 @@ select avg(c) from g group by c; 3.2000000000000000 (2 rows) +explain verbose select avg(c) from g group by c; + QUERY PLAN +----------------------------------------------------------------------------------- + HashAggregate (cost=1.02..1.03 rows=1 width=32) + Output: pg_catalog.avg((avg(g.c))), g.c + -> Materialize (cost=0.00..0.00 rows=0 width=0) + Output: (avg(g.c)), g.c + -> Data Node Scan (Node Count [1]) (cost=0.00..1.01 rows=1000 width=32) + Output: avg(g.c), g.c +(6 rows) + drop table def; drop table g; diff --git a/src/test/regress/sql/xc_groupby.sql b/src/test/regress/sql/xc_groupby.sql index 56a53c0f05..66dfabc35f 100644 --- a/src/test/regress/sql/xc_groupby.sql +++ b/src/test/regress/sql/xc_groupby.sql @@ -6,14 +6,19 @@ insert into tab2 values (1, 1), (4, 1), (8, 1), (2, 4), (9, 4), (3, 4), (4, 2), select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2; -- joins and group by select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2; +explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2; -- aggregates over aggregates select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x; +explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x; -- group by without aggregate, just like distinct? select val2 from tab1 group by val2; +explain verbose select val2 from tab1 group by val2; -- group by with aggregates in expression select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; +explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; -- group by with expressions in group by clause select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2; +explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2; drop table tab1; drop table tab2; @@ -24,16 +29,22 @@ create table tab2 (val int, val2 int) distribute by replication; insert into tab1 values (1, 1), (2, 1), (3, 1), (2, 2), (6, 2), (4, 3), (1, 3), (6, 3); insert into tab2 values (1, 1), (4, 1), (8, 1), (2, 4), (9, 4), (3, 4), (4, 2), (5, 2), (3, 2); select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2; +explain verbose select count(*), sum(val), avg(val), sum(val)::float8/count(*), val2 from tab1 group by val2; -- joins and group by select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2; +explain verbose select count(*), sum(tab1.val * tab2.val), avg(tab1.val*tab2.val), sum(tab1.val*tab2.val)::float8/count(*), tab1.val2, tab2.val2 from tab1 full outer join tab2 on tab1.val2 = tab2.val2 group by tab1.val2, tab2.val2; -- aggregates over aggregates select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x; +explain verbose select sum(y) from (select sum(val) y, val2%2 x from tab1 group by val2) q1 group by x; -- group by without aggregate, just like distinct? select val2 from tab1 group by val2; +explain verbose select val2 from tab1 group by val2; -- group by with aggregates in expression select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; +explain verbose select count(*) + sum(val) + avg(val), val2 from tab1 group by val2; -- group by with expressions in group by clause select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2; +explain verbose select sum(val), avg(val), 2 * val2 from tab1 group by 2 * val2; drop table tab1; drop table tab2; @@ -54,16 +65,25 @@ insert into def VALUES (9, 'Three'); insert into def VALUES (10, 'Three'); select a,count(a) from def group by a order by a; +explain verbose select a,count(a) from def group by a order by a; select avg(a) from def group by a; select avg(a) from def group by a; +explain verbose select avg(a) from def group by a; select avg(a) from def group by b; +explain verbose select avg(a) from def group by b; select sum(a) from def group by b; +explain verbose select sum(a) from def group by b; select count(*) from def group by b; +explain verbose select count(*) from def group by b; select count(*) from def where a is not null group by a; +explain verbose select count(*) from def where a is not null group by a; select b from def group by b; +explain verbose select b from def group by b; select b,count(b) from def group by b; +explain verbose select b,count(b) from def group by b; select count(*) from def where b is null group by b; +explain verbose select count(*) from def where b is null group by b; create table g(a int, b float, c numeric); insert into g values(1,2.1,3.2); @@ -71,12 +91,18 @@ insert into g values(1,2.1,3.2); insert into g values(2,2.3,5.2); select sum(a) from g group by a; +explain verbose select sum(a) from g group by a; select sum(b) from g group by b; +explain verbose select sum(b) from g group by b; select sum(c) from g group by b; +explain verbose select sum(c) from g group by b; select avg(a) from g group by b; +explain verbose select avg(a) from g group by b; select avg(b) from g group by c; +explain verbose select avg(b) from g group by c; select avg(c) from g group by c; +explain verbose select avg(c) from g group by c; drop table def; drop table g; @@ -98,16 +124,26 @@ insert into def VALUES (9, 'Three'); insert into def VALUES (10, 'Three'); select a,count(a) from def group by a order by a; +explain verbose select a,count(a) from def group by a order by a; select avg(a) from def group by a; +explain verbose select avg(a) from def group by a; select avg(a) from def group by a; +explain verbose select avg(a) from def group by a; select avg(a) from def group by b; +explain verbose select avg(a) from def group by b; select sum(a) from def group by b; +explain verbose select sum(a) from def group by b; select count(*) from def group by b; +explain verbose select count(*) from def group by b; select count(*) from def where a is not null group by a; +explain verbose select count(*) from def where a is not null group by a; select b from def group by b; +explain verbose select b from def group by b; select b,count(b) from def group by b; +explain verbose select b,count(b) from def group by b; select count(*) from def where b is null group by b; +explain verbose select count(*) from def where b is null group by b; create table g(a int, b float, c numeric) distribute by replication; insert into g values(1,2.1,3.2); @@ -115,12 +151,18 @@ insert into g values(1,2.1,3.2); insert into g values(2,2.3,5.2); select sum(a) from g group by a; +explain verbose select sum(a) from g group by a; select sum(b) from g group by b; +explain verbose select sum(b) from g group by b; select sum(c) from g group by b; +explain verbose select sum(c) from g group by b; select avg(a) from g group by b; +explain verbose select avg(a) from g group by b; select avg(b) from g group by c; +explain verbose select avg(b) from g group by c; select avg(c) from g group by c; +explain verbose select avg(c) from g group by c; drop table def; drop table g; |
