Support hashed aggregation with grouping sets.

This extends the Aggregate node with two new features: HashAggregate can now run multiple hashtables concurrently, and a new strategy MixedAggregate populates hashtables while doing sorted grouping. The planner will now attempt to save as many sorts as possible when planning grouping sets queries, while not exceeding work_mem for the estimated combined sizes of all hashtables used. No SQL-level changes are required. There should be no user-visible impact other than the new EXPLAIN output and possible changes to result ordering when ORDER BY was not used (which affected a few regression tests). The enable_hashagg option is respected. Author: Andrew Gierth Reviewers: Mark Dilger, Andres Freund Discussion: https://postgr.es/m/87vatszyhj.fsf@news-spur.riddles.org.uk
author: Andrew Gierth 2017-03-27 03:20:54 +0000
committer: Andrew Gierth 2017-03-27 03:20:54 +0000
commit: b5635948ab165b6070e7d05d111f966e07570d81 (patch)
tree: 9e8581fa3530ea777b14ce4900ba7cea106e3450 /src/backend/optimizer
parent: f0a6046bcb15c2fe48384ef547df2bfb5d7f0a89 (diff)
4 files changed, 849 insertions, 252 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 57229059bd..92de2b7d48 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1884,11 +1884,16 @@ cost_agg(Path *path, PlannerInfo *root,
 		total_cost = startup_cost + cpu_tuple_cost;
 		output_tuples = 1;
 	}
-	else if (aggstrategy == AGG_SORTED)
+	else if (aggstrategy == AGG_SORTED || aggstrategy == AGG_MIXED)
 	{
 		/* Here we are able to deliver output on-the-fly */
 		startup_cost = input_startup_cost;
 		total_cost = input_total_cost;
+		if (aggstrategy == AGG_MIXED && !enable_hashagg)
+		{
+			startup_cost += disable_cost;
+			total_cost += disable_cost;
+		}
 		/* calcs phrased this way to match HASHED case, see note above */
 		total_cost += aggcosts->transCost.startup;
 		total_cost += aggcosts->transCost.per_tuple * input_tuples;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index c80c9992c9..aafec58281 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -1783,18 +1783,15 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
 {
 	Agg		   *plan;
 	Plan	   *subplan;
-	List	   *rollup_groupclauses = best_path->rollup_groupclauses;
-	List	   *rollup_lists = best_path->rollup_lists;
+	List	   *rollups = best_path->rollups;
 	AttrNumber *grouping_map;
 	int			maxref;
 	List	   *chain;
-	ListCell   *lc,
-			   *lc2;
+	ListCell   *lc;
 
 	/* Shouldn't get here without grouping sets */
 	Assert(root->parse->groupingSets);
-	Assert(rollup_lists != NIL);
-	Assert(list_length(rollup_lists) == list_length(rollup_groupclauses));
+	Assert(rollups != NIL);
 
 	/*
 	 * Agg can project, so no need to be terribly picky about child tlist, but
@@ -1846,72 +1843,86 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
 	 * costs will be shown by EXPLAIN.
 	 */
 	chain = NIL;
-	if (list_length(rollup_groupclauses) > 1)
+	if (list_length(rollups) > 1)
 	{
-		forboth(lc, rollup_groupclauses, lc2, rollup_lists)
+		ListCell   *lc2 = lnext(list_head(rollups));
+		bool		is_first_sort = ((RollupData *) linitial(rollups))->is_hashed;
+
+		for_each_cell(lc, lc2)
 		{
-			List	   *groupClause = (List *) lfirst(lc);
-			List	   *gsets = (List *) lfirst(lc2);
+			RollupData *rollup = lfirst(lc);
 			AttrNumber *new_grpColIdx;
-			Plan	   *sort_plan;
+			Plan	   *sort_plan = NULL;
 			Plan	   *agg_plan;
+			AggStrategy strat;
 
-			/* We want to iterate over all but the last rollup list elements */
-			if (lnext(lc) == NULL)
-				break;
+			new_grpColIdx = remap_groupColIdx(root, rollup->groupClause);
+
+			if (!rollup->is_hashed && !is_first_sort)
+			{
+				sort_plan = (Plan *)
+					make_sort_from_groupcols(rollup->groupClause,
+											 new_grpColIdx,
+											 subplan);
+			}
 
-			new_grpColIdx = remap_groupColIdx(root, groupClause);
+			if (!rollup->is_hashed)
+				is_first_sort = false;
 
-			sort_plan = (Plan *)
-				make_sort_from_groupcols(groupClause,
-										 new_grpColIdx,
-										 subplan);
+			if (rollup->is_hashed)
+				strat = AGG_HASHED;
+			else if (list_length(linitial(rollup->gsets)) == 0)
+				strat = AGG_PLAIN;
+			else
+				strat = AGG_SORTED;
 
 			agg_plan = (Plan *) make_agg(NIL,
 										 NIL,
-										 AGG_SORTED,
+										 strat,
 										 AGGSPLIT_SIMPLE,
-									   list_length((List *) linitial(gsets)),
+							   list_length((List *) linitial(rollup->gsets)),
 										 new_grpColIdx,
-										 extract_grouping_ops(groupClause),
-										 gsets,
+								   extract_grouping_ops(rollup->groupClause),
+										 rollup->gsets,
 										 NIL,
-										 0,		/* numGroups not needed */
+										 rollup->numGroups,
 										 sort_plan);
 
 			/*
-			 * Nuke stuff we don't need to avoid bloating debug output.
+			 * Remove stuff we don't need to avoid bloating debug output.
 			 */
-			sort_plan->targetlist = NIL;
-			sort_plan->lefttree = NULL;
+			if (sort_plan)
+			{
+				sort_plan->targetlist = NIL;
+				sort_plan->lefttree = NULL;
+			}
 
 			chain = lappend(chain, agg_plan);
 		}
 	}
 
 	/*
-	 * Now make the final Agg node
+	 * Now make the real Agg node
 	 */
 	{
-		List	   *groupClause = (List *) llast(rollup_groupclauses);
-		List	   *gsets = (List *) llast(rollup_lists);
+		RollupData *rollup = linitial(rollups);
 		AttrNumber *top_grpColIdx;
 		int			numGroupCols;
 
-		top_grpColIdx = remap_groupColIdx(root, groupClause);
+		top_grpColIdx = remap_groupColIdx(root, rollup->groupClause);
 
-		numGroupCols = list_length((List *) linitial(gsets));
+		numGroupCols = list_length((List *) linitial(rollup->gsets));
 
 		plan = make_agg(build_path_tlist(root, &best_path->path),
 						best_path->qual,
-						(numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN,
+						best_path->aggstrategy,
 						AGGSPLIT_SIMPLE,
 						numGroupCols,
 						top_grpColIdx,
-						extract_grouping_ops(groupClause),
-						gsets,
+						extract_grouping_ops(rollup->groupClause),
+						rollup->gsets,
 						chain,
-						0,		/* numGroups not needed */
+						rollup->numGroups,
 						subplan);
 
 		/* Copy cost data from Path to Plan */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 90619509a2..fa7a5f8427 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -30,6 +30,7 @@
 #include "foreign/fdwapi.h"
 #include "miscadmin.h"
 #include "lib/bipartite_match.h"
+#include "lib/knapsack.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #ifdef OPTIMIZER_DEBUG
@@ -91,12 +92,31 @@ typedef struct
 	List	   *groupClause;	/* overrides parse->groupClause */
 } standard_qp_extra;
 
+/*
+ * Data specific to grouping sets
+ */
+
+typedef struct
+{
+	List	   *rollups;
+	List	   *hash_sets_idx;
+	double		dNumHashGroups;
+	bool		any_hashable;
+	Bitmapset  *unsortable_refs;
+	Bitmapset  *unhashable_refs;
+	List	   *unsortable_sets;
+	int		   *tleref_to_colnum_map;
+} grouping_sets_data;
+
 /* Local functions */
 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
 static void inheritance_planner(PlannerInfo *root);
 static void grouping_planner(PlannerInfo *root, bool inheritance_update,
 				 double tuple_fraction);
+static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
+static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
+						 int *tleref_to_colnum_map);
 static void preprocess_rowmarks(PlannerInfo *root);
 static double preprocess_limit(PlannerInfo *root,
 				 double tuple_fraction,
@@ -109,8 +129,7 @@ static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
 static void standard_qp_callback(PlannerInfo *root, void *extra);
 static double get_number_of_groups(PlannerInfo *root,
 					 double path_rows,
-					 List *rollup_lists,
-					 List *rollup_groupclauses);
+					 grouping_sets_data *gd);
 static Size estimate_hashagg_tablesize(Path *path,
 						   const AggClauseCosts *agg_costs,
 						   double dNumGroups);
@@ -118,8 +137,16 @@ static RelOptInfo *create_grouping_paths(PlannerInfo *root,
 					  RelOptInfo *input_rel,
 					  PathTarget *target,
 					  const AggClauseCosts *agg_costs,
-					  List *rollup_lists,
-					  List *rollup_groupclauses);
+					  grouping_sets_data *gd);
+static void consider_groupingsets_paths(PlannerInfo *root,
+							RelOptInfo *grouped_rel,
+							Path *path,
+							bool is_sorted,
+							bool can_hash,
+							PathTarget *target,
+							grouping_sets_data *gd,
+							const AggClauseCosts *agg_costs,
+							double dNumGroups);
 static RelOptInfo *create_window_paths(PlannerInfo *root,
 					RelOptInfo *input_rel,
 					PathTarget *input_target,
@@ -1540,8 +1567,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 		AggClauseCosts agg_costs;
 		WindowFuncLists *wflists = NULL;
 		List	   *activeWindows = NIL;
-		List	   *rollup_lists = NIL;
-		List	   *rollup_groupclauses = NIL;
+		grouping_sets_data *gset_data = NULL;
 		standard_qp_extra qp_extra;
 
 		/* A recursive query should always have setOperations */
@@ -1550,84 +1576,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 		/* Preprocess grouping sets and GROUP BY clause, if any */
 		if (parse->groupingSets)
 		{
-			int		   *tleref_to_colnum_map;
-			List	   *sets;
-			int			maxref;
-			ListCell   *lc;
-			ListCell   *lc2;
-			ListCell   *lc_set;
-
-			parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
-
-			/* Identify max SortGroupRef in groupClause, for array sizing */
-			maxref = 0;
-			foreach(lc, parse->groupClause)
-			{
-				SortGroupClause *gc = lfirst(lc);
-
-				if (gc->tleSortGroupRef > maxref)
-					maxref = gc->tleSortGroupRef;
-			}
-
-			/* Allocate workspace array for remapping */
-			tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
-
-			/* Examine the rollup sets */
-			sets = extract_rollup_sets(parse->groupingSets);
-
-			foreach(lc_set, sets)
-			{
-				List	   *current_sets = (List *) lfirst(lc_set);
-				List	   *groupclause;
-				int			ref;
-
-				/*
-				 * Reorder the current list of grouping sets into correct
-				 * prefix order.  If only one aggregation pass is needed, try
-				 * to make the list match the ORDER BY clause; if more than
-				 * one pass is needed, we don't bother with that.
-				 */
-				current_sets = reorder_grouping_sets(current_sets,
-													 (list_length(sets) == 1
-													  ? parse->sortClause
-													  : NIL));
-
-				/*
-				 * Order the groupClause appropriately.  If the first grouping
-				 * set is empty, this can match regular GROUP BY
-				 * preprocessing, otherwise we have to force the groupClause
-				 * to match that grouping set's order.
-				 */
-				groupclause = preprocess_groupclause(root,
-													 linitial(current_sets));
-
-				/*
-				 * Now that we've pinned down an order for the groupClause for
-				 * this list of grouping sets, we need to remap the entries in
-				 * the grouping sets from sortgrouprefs to plain indices
-				 * (0-based) into the groupClause for this collection of
-				 * grouping sets.
-				 */
-				ref = 0;
-				foreach(lc, groupclause)
-				{
-					SortGroupClause *gc = lfirst(lc);
-
-					tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
-				}
-
-				foreach(lc, current_sets)
-				{
-					foreach(lc2, (List *) lfirst(lc))
-					{
-						lfirst_int(lc2) = tleref_to_colnum_map[lfirst_int(lc2)];
-					}
-				}
-
-				/* Save the reordered sets and corresponding groupclauses */
-				rollup_lists = lcons(current_sets, rollup_lists);
-				rollup_groupclauses = lcons(groupclause, rollup_groupclauses);
-			}
+			gset_data = preprocess_grouping_sets(root);
 		}
 		else
 		{
@@ -1721,8 +1670,9 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 		/* Set up data needed by standard_qp_callback */
 		qp_extra.tlist = tlist;
 		qp_extra.activeWindows = activeWindows;
-		qp_extra.groupClause =
-			parse->groupingSets ? llast(rollup_groupclauses) : parse->groupClause;
+		qp_extra.groupClause = (gset_data
+								? (gset_data->rollups ? ((RollupData *) linitial(gset_data->rollups))->groupClause : NIL)
+								: parse->groupClause);
 
 		/*
 		 * Generate the best unsorted and presorted paths for the scan/join
@@ -1922,8 +1872,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 												current_rel,
 												grouping_target,
 												&agg_costs,
-												rollup_lists,
-												rollup_groupclauses);
+												gset_data);
 			/* Fix things up if grouping_target contains SRFs */
 			if (parse->hasTargetSRFs)
 				adjust_paths_for_srfs(root, current_rel,
@@ -1960,7 +1909,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 			current_rel = create_distinct_paths(root,
 												current_rel);
 		}
-
 	}							/* end of if (setOperations) */
 
 	/*
@@ -2113,6 +2061,221 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 	/* Note: currently, we leave it to callers to do set_cheapest() */
 }
 
+/*
+ * Do preprocessing for groupingSets clause and related data.  This handles the
+ * preliminary steps of expanding the grouping sets, organizing them into lists
+ * of rollups, and preparing annotations which will later be filled in with
+ * size estimates.
+ */
+static grouping_sets_data *
+preprocess_grouping_sets(PlannerInfo *root)
+{
+	Query	   *parse = root->parse;
+	List	   *sets;
+	int			maxref = 0;
+	ListCell   *lc;
+	ListCell   *lc_set;
+	grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data));
+
+	parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
+
+	gd->any_hashable = false;
+	gd->unhashable_refs = NULL;
+	gd->unsortable_refs = NULL;
+	gd->unsortable_sets = NIL;
+
+	if (parse->groupClause)
+	{
+		ListCell   *lc;
+
+		foreach(lc, parse->groupClause)
+		{
+			SortGroupClause *gc = lfirst(lc);
+			Index		ref = gc->tleSortGroupRef;
+
+			if (ref > maxref)
+				maxref = ref;
+
+			if (!gc->hashable)
+				gd->unhashable_refs = bms_add_member(gd->unhashable_refs, ref);
+
+			if (!OidIsValid(gc->sortop))
+				gd->unsortable_refs = bms_add_member(gd->unsortable_refs, ref);
+		}
+	}
+
+	/* Allocate workspace array for remapping */
+	gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
+
+	/*
+	 * If we have any unsortable sets, we must extract them before trying to
+	 * prepare rollups. Unsortable sets don't go through
+	 * reorder_grouping_sets, so we must apply the GroupingSetData annotation
+	 * here.
+	 */
+	if (!bms_is_empty(gd->unsortable_refs))
+	{
+		List	   *sortable_sets = NIL;
+
+		foreach(lc, parse->groupingSets)
+		{
+			List	   *gset = lfirst(lc);
+
+			if (bms_overlap_list(gd->unsortable_refs, gset))
+			{
+				GroupingSetData *gs = makeNode(GroupingSetData);
+
+				gs->set = gset;
+				gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
+
+				/*
+				 * We must enforce here that an unsortable set is hashable;
+				 * later code assumes this.  Parse analysis only checks that
+				 * every individual column is either hashable or sortable.
+				 *
+				 * Note that passing this test doesn't guarantee we can
+				 * generate a plan; there might be other showstoppers.
+				 */
+				if (bms_overlap_list(gd->unhashable_refs, gset))
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("could not implement GROUP BY"),
+							 errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
+			}
+			else
+				sortable_sets = lappend(sortable_sets, gset);
+		}
+
+		if (sortable_sets)
+			sets = extract_rollup_sets(sortable_sets);
+		else
+			sets = NIL;
+	}
+	else
+		sets = extract_rollup_sets(parse->groupingSets);
+
+	foreach(lc_set, sets)
+	{
+		List	   *current_sets = (List *) lfirst(lc_set);
+		RollupData *rollup = makeNode(RollupData);
+		GroupingSetData *gs;
+
+		/*
+		 * Reorder the current list of grouping sets into correct prefix
+		 * order.  If only one aggregation pass is needed, try to make the
+		 * list match the ORDER BY clause; if more than one pass is needed, we
+		 * don't bother with that.
+		 *
+		 * Note that this reorders the sets from smallest-member-first to
+		 * largest-member-first, and applies the GroupingSetData annotations,
+		 * though the data will be filled in later.
+		 */
+		current_sets = reorder_grouping_sets(current_sets,
+											 (list_length(sets) == 1
+											  ? parse->sortClause
+											  : NIL));
+
+		/*
+		 * Get the initial (and therefore largest) grouping set.
+		 */
+		gs = linitial(current_sets);
+
+		/*
+		 * Order the groupClause appropriately.  If the first grouping set is
+		 * empty, then the groupClause must also be empty; otherwise we have
+		 * to force the groupClause to match that grouping set's order.
+		 *
+		 * (The first grouping set can be empty even though parse->groupClause
+		 * is not empty only if all non-empty grouping sets are unsortable.
+		 * The groupClauses for hashed grouping sets are built later on.)
+		 */
+		if (gs->set)
+			rollup->groupClause = preprocess_groupclause(root, gs->set);
+		else
+			rollup->groupClause = NIL;
+
+		/*
+		 * Is it hashable? We pretend empty sets are hashable even though we
+		 * actually force them not to be hashed later. But don't bother if
+		 * there's nothing but empty sets (since in that case we can't hash
+		 * anything).
+		 */
+		if (gs->set &&
+			!bms_overlap_list(gd->unhashable_refs, gs->set))
+		{
+			rollup->hashable = true;
+			gd->any_hashable = true;
+		}
+
+		/*
+		 * Now that we've pinned down an order for the groupClause for this
+		 * list of grouping sets, we need to remap the entries in the grouping
+		 * sets from sortgrouprefs to plain indices (0-based) into the
+		 * groupClause for this collection of grouping sets. We keep the
+		 * original form for later use, though.
+		 */
+		rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+												 current_sets,
+												 gd->tleref_to_colnum_map);
+		rollup->gsets_data = current_sets;
+
+		gd->rollups = lappend(gd->rollups, rollup);
+	}
+
+	if (gd->unsortable_sets)
+	{
+		/*
+		 * We have not yet pinned down a groupclause for this, but we will
+		 * need index-based lists for estimation purposes. Construct
+		 * hash_sets_idx based on the entire original groupclause for now.
+		 */
+		gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause,
+													 gd->unsortable_sets,
+												   gd->tleref_to_colnum_map);
+		gd->any_hashable = true;
+	}
+
+	return gd;
+}
+
+/*
+ * Given a groupclause and a list of GroupingSetData, return equivalent sets
+ * (without annotation) mapped to indexes into the given groupclause.
+ */
+static List *
+remap_to_groupclause_idx(List *groupClause,
+						 List *gsets,
+						 int *tleref_to_colnum_map)
+{
+	int			ref = 0;
+	List	   *result = NIL;
+	ListCell   *lc;
+
+	foreach(lc, groupClause)
+	{
+		SortGroupClause *gc = lfirst(lc);
+
+		tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
+	}
+
+	foreach(lc, gsets)
+	{
+		List	   *set = NIL;
+		ListCell   *lc2;
+		GroupingSetData *gs = lfirst(lc);
+
+		foreach(lc2, gs->set)
+		{
+			set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
+		}
+
+		result = lappend(result, set);
+	}
+
+	return result;
+}
+
+
 
 /*
  * Detect whether a plan node is a "dummy" plan created when a relation
@@ -3028,7 +3191,7 @@ extract_rollup_sets(List *groupingSets)
 
 /*
  * Reorder the elements of a list of grouping sets such that they have correct
- * prefix relationships.
+ * prefix relationships. Also inserts the GroupingSetData annotations.
  *
  * The input must be ordered with smallest sets first; the result is returned
  * with largest sets first.  Note that the result shares no list substructure
@@ -3051,6 +3214,7 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
 	{
 		List	   *candidate = lfirst(lc);
 		List	   *new_elems = list_difference_int(candidate, previous);
+		GroupingSetData *gs = makeNode(GroupingSetData);
 
 		if (list_length(new_elems) > 0)
 		{
@@ -3078,7 +3242,8 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
 			}
 		}
 
-		result = lcons(list_copy(previous), result);
+		gs->set = list_copy(previous);
+		result = lcons(gs, result);
 		list_free(new_elems);
 	}
 
@@ -3173,15 +3338,16 @@ standard_qp_callback(PlannerInfo *root, void *extra)
  * Estimate number of groups produced by grouping clauses (1 if not grouping)
  *
  * path_rows: number of output rows from scan/join step
- * rollup_lists: list of grouping sets, or NIL if not doing grouping sets
- * rollup_groupclauses: list of grouping clauses for grouping sets,
- *		or NIL if not doing grouping sets
+ * gsets: grouping set data, or NULL if not doing grouping sets
+ *
+ * If doing grouping sets, we also annotate the gsets data with the estimates
+ * for each set and each individual rollup list, with a view to later
+ * determining whether some combination of them could be hashed instead.
  */
 static double
 get_number_of_groups(PlannerInfo *root,
 					 double path_rows,
-					 List *rollup_lists,
-					 List *rollup_groupclauses)
+					 grouping_sets_data *gd)
 {
 	Query	   *parse = root->parse;
 	double		dNumGroups;
@@ -3193,28 +3359,60 @@ get_number_of_groups(PlannerInfo *root,
 		if (parse->groupingSets)
 		{
 			/* Add up the estimates for each grouping set */
-			ListCell   *lc,
-					   *lc2;
+			ListCell   *lc;
+			ListCell   *lc2;
 
 			dNumGroups = 0;
-			forboth(lc, rollup_groupclauses, lc2, rollup_lists)
+
+			foreach(lc, gd->rollups)
 			{
-				List	   *groupClause = (List *) lfirst(lc);
-				List	   *gsets = (List *) lfirst(lc2);
-				ListCell   *lc3;
+				RollupData *rollup = lfirst(lc);
+				ListCell   *lc;
 
-				groupExprs = get_sortgrouplist_exprs(groupClause,
+				groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
 													 parse->targetList);
 
-				foreach(lc3, gsets)
+				rollup->numGroups = 0.0;
+
+				forboth(lc, rollup->gsets, lc2, rollup->gsets_data)
 				{
-					List	   *gset = (List *) lfirst(lc3);
+					List	   *gset = (List *) lfirst(lc);
+					GroupingSetData *gs = lfirst(lc2);
+					double		numGroups = estimate_num_groups(root,
+																groupExprs,
+																path_rows,
+																&gset);
+
+					gs->numGroups = numGroups;
+					rollup->numGroups += numGroups;
+				}
+
+				dNumGroups += rollup->numGroups;
+			}
+
+			if (gd->hash_sets_idx)
+			{
+				ListCell   *lc;
+
+				gd->dNumHashGroups = 0;
 
-					dNumGroups += estimate_num_groups(root,
-													  groupExprs,
-													  path_rows,
-													  &gset);
+				groupExprs = get_sortgrouplist_exprs(parse->groupClause,
+													 parse->targetList);
+
+				forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
+				{
+					List	   *gset = (List *) lfirst(lc);
+					GroupingSetData *gs = lfirst(lc2);
+					double		numGroups = estimate_num_groups(root,
+																groupExprs,
+																path_rows,
+																&gset);
+
+					gs->numGroups = numGroups;
+					gd->dNumHashGroups += numGroups;
 				}
+
+				dNumGroups += gd->dNumHashGroups;
 			}
 		}
 		else
@@ -3250,6 +3448,11 @@ get_number_of_groups(PlannerInfo *root,
  * estimate_hashagg_tablesize
  *	  estimate the number of bytes that a hash aggregate hashtable will
  *	  require based on the agg_costs, path width and dNumGroups.
+ *
+ * XXX this may be over-estimating the size now that hashagg knows to omit
+ * unneeded columns from the hashtable. Also for mixed-mode grouping sets,
+ * grouping columns not in the hashed set are counted here even though hashagg
+ * won't store them. Is this a problem?
  */
 static Size
 estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
@@ -3300,8 +3503,7 @@ create_grouping_paths(PlannerInfo *root,
 					  RelOptInfo *input_rel,
 					  PathTarget *target,
 					  const AggClauseCosts *agg_costs,
-					  List *rollup_lists,
-					  List *rollup_groupclauses)
+					  grouping_sets_data *gd)
 {
 	Query	   *parse = root->parse;
 	Path	   *cheapest_path = input_rel->cheapest_total_path;
@@ -3410,8 +3612,7 @@ create_grouping_paths(PlannerInfo *root,
 	 */
 	dNumGroups = get_number_of_groups(root,
 									  cheapest_path->rows,
-									  rollup_lists,
-									  rollup_groupclauses);
+									  gd);
 
 	/*
 	 * Determine whether it's possible to perform sort-based implementations
@@ -3419,15 +3620,22 @@ create_grouping_paths(PlannerInfo *root,
 	 * grouping_is_sortable() is trivially true, and all the
 	 * pathkeys_contained_in() tests will succeed too, so that we'll consider
 	 * every surviving input path.)
+	 *
+	 * If we have grouping sets, we might be able to sort some but not all of
+	 * them; in this case, we need can_sort to be true as long as we must
+	 * consider any sorted-input plan.
 	 */
-	can_sort = grouping_is_sortable(parse->groupClause);
+	can_sort = (gd && gd->rollups != NIL)
+		|| grouping_is_sortable(parse->groupClause);
 
 	/*
 	 * Determine whether we should consider hash-based implementations of
 	 * grouping.
 	 *
-	 * Hashed aggregation only applies if we're grouping.  We currently can't
-	 * hash if there are grouping sets, though.
+	 * Hashed aggregation only applies if we're grouping. If we have grouping
+	 * sets, some groups might be hashable but others not; in this case we set
+	 * can_hash true as long as there is nothing globally preventing us from
+	 * hashing (and we should therefore consider plans with hashes).
 	 *
 	 * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
 	 * aggregates.  (Doing so would imply storing *all* the input values in
@@ -3440,9 +3648,8 @@ create_grouping_paths(PlannerInfo *root,
 	 * other gating conditions, so we want to do it last.
 	 */
 	can_hash = (parse->groupClause != NIL &&
-				parse->groupingSets == NIL &&
 				agg_costs->numOrderedAggs == 0 &&
-				grouping_is_hashable(parse->groupClause));
+		 (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)));
 
 	/*
 	 * If grouped_rel->consider_parallel is true, then paths that we generate
@@ -3508,8 +3715,7 @@ create_grouping_paths(PlannerInfo *root,
 		/* Estimate number of partial groups. */
 		dNumPartialGroups = get_number_of_groups(root,
 												 cheapest_partial_path->rows,
-												 NIL,
-												 NIL);
+												 gd);
 
 		/*
 		 * Collect statistics about aggregates for estimating costs of
@@ -3642,20 +3848,9 @@ create_grouping_paths(PlannerInfo *root,
 				/* Now decide what to stick atop it */
 				if (parse->groupingSets)
 				{
-					/*
-					 * We have grouping sets, possibly with aggregation.  Make
-					 * a GroupingSetsPath.
-					 */
-					add_path(grouped_rel, (Path *)
-							 create_groupingsets_path(root,
-													  grouped_rel,
-													  path,
-													  target,
-												  (List *) parse->havingQual,
-													  rollup_lists,
-													  rollup_groupclauses,
-													  agg_costs,
-													  dNumGroups));
+					consider_groupingsets_paths(root, grouped_rel,
+												path, true, can_hash, target,
+												gd, agg_costs, dNumGroups);
 				}
 				else if (parse->hasAggs)
 				{
@@ -3816,33 +4011,45 @@ create_grouping_paths(PlannerInfo *root,
 
 	if (can_hash)
 	{
-		hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
-													  agg_costs,
-													  dNumGroups);
-
-		/*
-		 * Provided that the estimated size of the hashtable does not exceed
-		 * work_mem, we'll generate a HashAgg Path, although if we were unable
-		 * to sort above, then we'd better generate a Path, so that we at
-		 * least have one.
-		 */
-		if (hashaggtablesize < work_mem * 1024L ||
-			grouped_rel->pathlist == NIL)
+		if (parse->groupingSets)
 		{
 			/*
-			 * We just need an Agg over the cheapest-total input path, since
-			 * input order won't matter.
+			 * Try for a hash-only groupingsets path over unsorted input.
 			 */
-			add_path(grouped_rel, (Path *)
-					 create_agg_path(root, grouped_rel,
-									 cheapest_path,
-									 target,
-									 AGG_HASHED,
-									 AGGSPLIT_SIMPLE,
-									 parse->groupClause,
-									 (List *) parse->havingQual,
-									 agg_costs,
-									 dNumGroups));
+			consider_groupingsets_paths(root, grouped_rel,
+										cheapest_path, false, true, target,
+										gd, agg_costs, dNumGroups);
+		}
+		else
+		{
+			hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
+														  agg_costs,
+														  dNumGroups);
+
+			/*
+			 * Provided that the estimated size of the hashtable does not
+			 * exceed work_mem, we'll generate a HashAgg Path, although if we
+			 * were unable to sort above, then we'd better generate a Path, so
+			 * that we at least have one.
+			 */
+			if (hashaggtablesize < work_mem * 1024L ||
+				grouped_rel->pathlist == NIL)
+			{
+				/*
+				 * We just need an Agg over the cheapest-total input path,
+				 * since input order won't matter.
+				 */
+				add_path(grouped_rel, (Path *)
+						 create_agg_path(root, grouped_rel,
+										 cheapest_path,
+										 target,
+										 AGG_HASHED,
+										 AGGSPLIT_SIMPLE,
+										 parse->groupClause,
+										 (List *) parse->havingQual,
+										 agg_costs,
+										 dNumGroups));
+			}
 		}
 
 		/*
@@ -3921,6 +4128,344 @@ create_grouping_paths(PlannerInfo *root,
 	return grouped_rel;
 }
 
+
+/*
+ * For a given input path, consider the possible ways of doing grouping sets on
+ * it, by combinations of hashing and sorting.  This can be called multiple
+ * times, so it's important that it not scribble on input.  No result is
+ * returned, but any generated paths are added to grouped_rel.
+ */
+static void
+consider_groupingsets_paths(PlannerInfo *root,
+							RelOptInfo *grouped_rel,
+							Path *path,
+							bool is_sorted,
+							bool can_hash,
+							PathTarget *target,
+							grouping_sets_data *gd,
+							const AggClauseCosts *agg_costs,
+							double dNumGroups)
+{
+	Query	   *parse = root->parse;
+
+	/*
+	 * If we're not being offered sorted input, then only consider plans that
+	 * can be done entirely by hashing.
+	 *
+	 * We can hash everything if it looks like it'll fit in work_mem. But if
+	 * the input is actually sorted despite not being advertised as such, we
+	 * prefer to make use of that in order to use less memory.
+	 *
+	 * If none of the grouping sets are sortable, then ignore the work_mem
+	 * limit and generate a path anyway, since otherwise we'll just fail.
+	 */
+	if (!is_sorted)
+	{
+		List	   *new_rollups = NIL;
+		RollupData *unhashed_rollup = NULL;
+		List	   *sets_data;
+		List	   *empty_sets_data = NIL;
+		List	   *empty_sets = NIL;
+		ListCell   *lc;
+		ListCell   *l_start = list_head(gd->rollups);
+		AggStrategy strat = AGG_HASHED;
+		Size		hashsize;
+		double		exclude_groups = 0.0;
+
+		Assert(can_hash);
+
+		if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
+		{
+			unhashed_rollup = lfirst(l_start);
+			exclude_groups = unhashed_rollup->numGroups;
+			l_start = lnext(l_start);
+		}
+
+		hashsize = estimate_hashagg_tablesize(path,
+											  agg_costs,
+											  dNumGroups - exclude_groups);
+
+		/*
+		 * gd->rollups is empty if we have only unsortable columns to work
+		 * with.  Override work_mem in that case; otherwise, we'll rely on the
+		 * sorted-input case to generate usable mixed paths.
+		 */
+		if (hashsize > work_mem * 1024L && gd->rollups)
+			return;				/* nope, won't fit */
+
+		/*
+		 * We need to burst the existing rollups list into individual grouping
+		 * sets and recompute a groupClause for each set.
+		 */
+		sets_data = list_copy(gd->unsortable_sets);
+
+		for_each_cell(lc, l_start)
+		{
+			RollupData *rollup = lfirst(lc);
+
+			/*
+			 * If we find an unhashable rollup that's not been skipped by the
+			 * "actually sorted" check above, we can't cope; we'd need sorted
+			 * input (with a different sort order) but we can't get that here.
+			 * So bail out; we'll get a valid path from the is_sorted case
+			 * instead.
+			 *
+			 * The mere presence of empty grouping sets doesn't make a rollup
+			 * unhashable (see preprocess_grouping_sets), we handle those
+			 * specially below.
+			 */
+			if (!rollup->hashable)
+				return;
+			else
+				sets_data = list_concat(sets_data, list_copy(rollup->gsets_data));
+		}
+		foreach(lc, sets_data)
+		{
+			GroupingSetData *gs = lfirst(lc);
+			List	   *gset = gs->set;
+			RollupData *rollup;
+
+			if (gset == NIL)
+			{
+				/* Empty grouping sets can't be hashed. */
+				empty_sets_data = lappend(empty_sets_data, gs);
+				empty_sets = lappend(empty_sets, NIL);
+			}
+			else
+			{
+				rollup = makeNode(RollupData);
+
+				rollup->groupClause = preprocess_groupclause(root, gset);
+				rollup->gsets_data = list_make1(gs);
+				rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+														 rollup->gsets_data,
+												   gd->tleref_to_colnum_map);
+				rollup->numGroups = gs->numGroups;
+				rollup->hashable = true;
+				rollup->is_hashed = true;
+				new_rollups = lappend(new_rollups, rollup);
+			}
+		}
+
+		/*
+		 * If we didn't find anything nonempty to hash, then bail.  We'll
+		 * generate a path from the is_sorted case.
+		 */
+		if (new_rollups == NIL)
+			return;
+
+		/*
+		 * If there were empty grouping sets they should have been in the
+		 * first rollup.
+		 */
+		Assert(!unhashed_rollup || !empty_sets);
+
+		if (unhashed_rollup)
+		{
+			new_rollups = lappend(new_rollups, unhashed_rollup);
+			strat = AGG_MIXED;
+		}
+		else if (empty_sets)
+		{
+			RollupData *rollup = makeNode(RollupData);
+
+			rollup->groupClause = NIL;
+			rollup->gsets_data = empty_sets_data;
+			rollup->gsets = empty_sets;
+			rollup->numGroups = list_length(empty_sets);
+			rollup->hashable = false;
+			rollup->is_hashed = false;
+			new_rollups = lappend(new_rollups, rollup);
+			strat = AGG_MIXED;
+		}
+
+		add_path(grouped_rel, (Path *)
+				 create_groupingsets_path(root,
+										  grouped_rel,
+										  path,
+										  target,
+										  (List *) parse->havingQual,
+										  strat,
+										  new_rollups,
+										  agg_costs,
+										  dNumGroups));
+		return;
+	}
+
+	/*
+	 * If we have sorted input but nothing we can do with it, bail.
+	 */
+	if (list_length(gd->rollups) == 0)
+		return;
+
+	/*
+	 * Given sorted input, we try and make two paths: one sorted and one mixed
+	 * sort/hash. (We need to try both because hashagg might be disabled, or
+	 * some columns might not be sortable.)
+	 *
+	 * can_hash is passed in as false if some obstacle elsewhere (such as
+	 * ordered aggs) means that we shouldn't consider hashing at all.
+	 */
+	if (can_hash && gd->any_hashable)
+	{
+		List	   *rollups = NIL;
+		List	   *hash_sets = list_copy(gd->unsortable_sets);
+		double		availspace = (work_mem * 1024.0);
+		ListCell   *lc;
+
+		/*
+		 * Account first for space needed for groups we can't sort at all.
+		 */
+		availspace -= (double) estimate_hashagg_tablesize(path,
+														  agg_costs,
+														  gd->dNumHashGroups);
+
+		if (availspace > 0 && list_length(gd->rollups) > 1)
+		{
+			double		scale;
+			int			num_rollups = list_length(gd->rollups);
+			int			k_capacity;
+			int		   *k_weights = palloc(num_rollups * sizeof(int));
+			Bitmapset  *hash_items = NULL;
+			int			i;
+
+			/*
+			 * We treat this as a knapsack problem: the knapsack capacity
+			 * represents work_mem, the item weights are the estimated memory
+			 * usage of the hashtables needed to implement a single rollup, and
+			 * we really ought to use the cost saving as the item value;
+			 * however, currently the costs assigned to sort nodes don't
+			 * reflect the comparison costs well, and so we treat all items as
+			 * of equal value (each rollup we hash instead saves us one sort).
+			 *
+			 * To use the discrete knapsack, we need to scale the values to a
+			 * reasonably small bounded range.  We choose to allow a 5% error
+			 * margin; we have no more than 4096 rollups in the worst possible
+			 * case, which with a 5% error margin will require a bit over 42MB
+			 * of workspace. (Anyone wanting to plan queries that complex had
+			 * better have the memory for it.  In more reasonable cases, with
+			 * no more than a couple of dozen rollups, the memory usage will
+			 * be negligible.)
+			 *
+			 * k_capacity is naturally bounded, but we clamp the values for
+			 * scale and weight (below) to avoid overflows or underflows (or
+			 * uselessly trying to use a scale factor less than 1 byte).
+			 */
+			scale = Max(availspace / (20.0 * num_rollups), 1.0);
+			k_capacity = (int) floor(availspace / scale);
+
+			/*
+			 * We leave the first rollup out of consideration since it's the
+			 * one that matches the input sort order.  We assign indexes "i"
+			 * to only those entries considered for hashing; the second loop,
+			 * below, must use the same condition.
+			 */
+			i = 0;
+			for_each_cell(lc, lnext(list_head(gd->rollups)))
+			{
+				RollupData *rollup = lfirst(lc);
+
+				if (rollup->hashable)
+				{
+					double		sz = estimate_hashagg_tablesize(path,
+																agg_costs,
+														  rollup->numGroups);
+
+					/*
+					 * If sz is enormous, but work_mem (and hence scale) is
+					 * small, avoid integer overflow here.
+					 */
+					k_weights[i] = (int) Min(floor(sz / scale),
+											 k_capacity + 1.0);
+					++i;
+				}
+			}
+
+			/*
+			 * Apply knapsack algorithm; compute the set of items which
+			 * maximizes the value stored (in this case the number of sorts
+			 * saved) while keeping the total size (approximately) within
+			 * capacity.
+			 */
+			if (i > 0)
+				hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
+
+			if (!bms_is_empty(hash_items))
+			{
+				rollups = list_make1(linitial(gd->rollups));
+
+				i = 0;
+				for_each_cell(lc, lnext(list_head(gd->rollups)))
+				{
+					RollupData *rollup = lfirst(lc);
+
+					if (rollup->hashable)
+					{
+						if (bms_is_member(i, hash_items))
+							hash_sets = list_concat(hash_sets,
+											  list_copy(rollup->gsets_data));
+						else
+							rollups = lappend(rollups, rollup);
+						++i;
+					}
+					else
+						rollups = lappend(rollups, rollup);
+				}
+			}
+		}
+
+		if (!rollups && hash_sets)
+			rollups = list_copy(gd->rollups);
+
+		foreach(lc, hash_sets)
+		{
+			GroupingSetData *gs = lfirst(lc);
+			RollupData *rollup = makeNode(RollupData);
+
+			Assert(gs->set != NIL);
+
+			rollup->groupClause = preprocess_groupclause(root, gs->set);
+			rollup->gsets_data = list_make1(gs);
+			rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
+													 rollup->gsets_data,
+												   gd->tleref_to_colnum_map);
+			rollup->numGroups = gs->numGroups;
+			rollup->hashable = true;
+			rollup->is_hashed = true;
+			rollups = lcons(rollup, rollups);
+		}
+
+		if (rollups)
+		{
+			add_path(grouped_rel, (Path *)
+					 create_groupingsets_path(root,
+											  grouped_rel,
+											  path,
+											  target,
+											  (List *) parse->havingQual,
+											  AGG_MIXED,
+											  rollups,
+											  agg_costs,
+											  dNumGroups));
+		}
+	}
+
+	/*
+	 * Now try the simple sorted case.
+	 */
+	if (!gd->unsortable_sets)
+		add_path(grouped_rel, (Path *)
+				 create_groupingsets_path(root,
+										  grouped_rel,
+										  path,
+										  target,
+										  (List *) parse->havingQual,
+										  AGG_SORTED,
+										  gd->rollups,
+										  agg_costs,
+										  dNumGroups));
+}
+
 /*
  * create_window_paths
  *
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index fca96eb001..999ebcee70 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2697,10 +2697,9 @@ create_agg_path(PlannerInfo *root,
  * 'subpath' is the path representing the source of data
  * 'target' is the PathTarget to be computed
  * 'having_qual' is the HAVING quals if any
- * 'rollup_lists' is a list of grouping sets
- * 'rollup_groupclauses' is a list of grouping clauses for grouping sets
+ * 'rollups' is a list of RollupData nodes
  * 'agg_costs' contains cost info about the aggregate functions to be computed
- * 'numGroups' is the estimated number of groups
+ * 'numGroups' is the estimated total number of groups
  */
 GroupingSetsPath *
 create_groupingsets_path(PlannerInfo *root,
@@ -2708,13 +2707,15 @@ create_groupingsets_path(PlannerInfo *root,
 						 Path *subpath,
 						 PathTarget *target,
 						 List *having_qual,
-						 List *rollup_lists,
-						 List *rollup_groupclauses,
+						 AggStrategy aggstrategy,
+						 List *rollups,
 						 const AggClauseCosts *agg_costs,
 						 double numGroups)
 {
 	GroupingSetsPath *pathnode = makeNode(GroupingSetsPath);
-	int			numGroupCols;
+	ListCell   *lc;
+	bool		is_first = true;
+	bool		is_first_sort = true;
 
 	/* The topmost generated Plan node will be an Agg */
 	pathnode->path.pathtype = T_Agg;
@@ -2728,74 +2729,109 @@ create_groupingsets_path(PlannerInfo *root,
 	pathnode->subpath = subpath;
 
 	/*
+	 * Simplify callers by downgrading AGG_SORTED to AGG_PLAIN, and AGG_MIXED
+	 * to AGG_HASHED, here if possible.
+	 */
+	if (aggstrategy == AGG_SORTED &&
+		list_length(rollups) == 1 &&
+		((RollupData *) linitial(rollups))->groupClause == NIL)
+		aggstrategy = AGG_PLAIN;
+
+	if (aggstrategy == AGG_MIXED &&
+		list_length(rollups) == 1)
+		aggstrategy = AGG_HASHED;
+
+	/*
 	 * Output will be in sorted order by group_pathkeys if, and only if, there
 	 * is a single rollup operation on a non-empty list of grouping
 	 * expressions.
 	 */
-	if (list_length(rollup_groupclauses) == 1 &&
-		((List *) linitial(rollup_groupclauses)) != NIL)
+	if (aggstrategy == AGG_SORTED && list_length(rollups) == 1)
 		pathnode->path.pathkeys = root->group_pathkeys;
 	else
 		pathnode->path.pathkeys = NIL;
 
-	pathnode->rollup_groupclauses = rollup_groupclauses;
-	pathnode->rollup_lists = rollup_lists;
+	pathnode->aggstrategy = aggstrategy;
+	pathnode->rollups = rollups;
 	pathnode->qual = having_qual;
 
-	Assert(rollup_lists != NIL);
-	Assert(list_length(rollup_lists) == list_length(rollup_groupclauses));
-
-	/* Account for cost of the topmost Agg node */
-	numGroupCols = list_length((List *) linitial((List *) llast(rollup_lists)));
-
-	cost_agg(&pathnode->path, root,
-			 (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN,
-			 agg_costs,
-			 numGroupCols,
-			 numGroups,
-			 subpath->startup_cost,
-			 subpath->total_cost,
-			 subpath->rows);
+	Assert(rollups != NIL);
+	Assert(aggstrategy != AGG_PLAIN || list_length(rollups) == 1);
+	Assert(aggstrategy != AGG_MIXED || list_length(rollups) > 1);
 
-	/*
-	 * Add in the costs and output rows of the additional sorting/aggregation
-	 * steps, if any.  Only total costs count, since the extra sorts aren't
-	 * run on startup.
-	 */
-	if (list_length(rollup_lists) > 1)
+	foreach(lc, rollups)
 	{
-		ListCell   *lc;
+		RollupData *rollup = lfirst(lc);
+		List	   *gsets = rollup->gsets;
+		int			numGroupCols = list_length(linitial(gsets));
 
-		foreach(lc, rollup_lists)
+		/*
+		 * In AGG_SORTED or AGG_PLAIN mode, the first rollup takes the
+		 * (already-sorted) input, and following ones do their own sort.
+		 *
+		 * In AGG_HASHED mode, there is one rollup for each grouping set.
+		 *
+		 * In AGG_MIXED mode, the first rollups are hashed, the first
+		 * non-hashed one takes the (already-sorted) input, and following ones
+		 * do their own sort.
+		 */
+		if (is_first)
+		{
+			cost_agg(&pathnode->path, root,
+					 aggstrategy,
+					 agg_costs,
+					 numGroupCols,
+					 rollup->numGroups,
+					 subpath->startup_cost,
+					 subpath->total_cost,
+					 subpath->rows);
+			is_first = false;
+			if (!rollup->is_hashed)
+				is_first_sort = false;
+		}
+		else
 		{
-			List	   *gsets = (List *) lfirst(lc);
 			Path		sort_path;		/* dummy for result of cost_sort */
 			Path		agg_path;		/* dummy for result of cost_agg */
 
-			/* We must iterate over all but the last rollup_lists element */
-			if (lnext(lc) == NULL)
-				break;
-
-			/* Account for cost of sort, but don't charge input cost again */
-			cost_sort(&sort_path, root, NIL,
-					  0.0,
-					  subpath->rows,
-					  subpath->pathtarget->width,
-					  0.0,
-					  work_mem,
-					  -1.0);
-
-			/* Account for cost of aggregation */
-			numGroupCols = list_length((List *) linitial(gsets));
-
-			cost_agg(&agg_path, root,
-					 AGG_SORTED,
-					 agg_costs,
-					 numGroupCols,
-					 numGroups, /* XXX surely not right for all steps? */
-					 sort_path.startup_cost,
-					 sort_path.total_cost,
-					 sort_path.rows);
+			if (rollup->is_hashed || is_first_sort)
+			{
+				/*
+				 * Account for cost of aggregation, but don't charge input
+				 * cost again
+				 */
+				cost_agg(&agg_path, root,
+						 rollup->is_hashed ? AGG_HASHED : AGG_SORTED,
+						 agg_costs,
+						 numGroupCols,
+						 rollup->numGroups,
+						 0.0, 0.0,
+						 subpath->rows);
+				if (!rollup->is_hashed)
+					is_first_sort = false;
+			}
+			else
+			{
+				/* Account for cost of sort, but don't charge input cost again */
+				cost_sort(&sort_path, root, NIL,
+						  0.0,
+						  subpath->rows,
+						  subpath->pathtarget->width,
+						  0.0,
+						  work_mem,
+						  -1.0);
+
+				/* Account for cost of aggregation */
+
+				cost_agg(&agg_path, root,
+						 AGG_SORTED,
+						 agg_costs,
+						 numGroupCols,
+						 rollup->numGroups,
+						 sort_path.startup_cost,
+						 sort_path.total_cost,
+						 sort_path.rows);
+			}
 
 			pathnode->path.total_cost += agg_path.total_cost;
 			pathnode->path.rows += agg_path.rows;
author	Andrew Gierth	2017-03-27 03:20:54 +0000
committer	Andrew Gierth	2017-03-27 03:20:54 +0000
commit	b5635948ab165b6070e7d05d111f966e07570d81 (patch)
tree	9e8581fa3530ea777b14ce4900ba7cea106e3450 /src/backend/optimizer
parent	f0a6046bcb15c2fe48384ef547df2bfb5d7f0a89 (diff)