summaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorAndrew Gierth2017-03-27 03:20:54 +0000
committerAndrew Gierth2017-03-27 03:20:54 +0000
commitb5635948ab165b6070e7d05d111f966e07570d81 (patch)
tree9e8581fa3530ea777b14ce4900ba7cea106e3450 /src/include
parentf0a6046bcb15c2fe48384ef547df2bfb5d7f0a89 (diff)
Support hashed aggregation with grouping sets.
This extends the Aggregate node with two new features: HashAggregate can now run multiple hashtables concurrently, and a new strategy MixedAggregate populates hashtables while doing sorted grouping. The planner will now attempt to save as many sorts as possible when planning grouping sets queries, while not exceeding work_mem for the estimated combined sizes of all hashtables used. No SQL-level changes are required. There should be no user-visible impact other than the new EXPLAIN output and possible changes to result ordering when ORDER BY was not used (which affected a few regression tests). The enable_hashagg option is respected. Author: Andrew Gierth Reviewers: Mark Dilger, Andres Freund Discussion: https://postgr.es/m/87vatszyhj.fsf@news-spur.riddles.org.uk
Diffstat (limited to 'src/include')
-rw-r--r--src/include/lib/knapsack.h17
-rw-r--r--src/include/nodes/bitmapset.h6
-rw-r--r--src/include/nodes/execnodes.h21
-rw-r--r--src/include/nodes/nodes.h5
-rw-r--r--src/include/nodes/plannodes.h2
-rw-r--r--src/include/nodes/relation.h30
-rw-r--r--src/include/optimizer/pathnode.h4
7 files changed, 65 insertions, 20 deletions
diff --git a/src/include/lib/knapsack.h b/src/include/lib/knapsack.h
new file mode 100644
index 00000000000..8d1e6d0aa04
--- /dev/null
+++ b/src/include/lib/knapsack.h
@@ -0,0 +1,17 @@
+/*
+ * knapsack.h
+ *
+ * Copyright (c) 2017, PostgreSQL Global Development Group
+ *
+ * src/include/lib/knapsack.h
+ */
+#ifndef KNAPSACK_H
+#define KNAPSACK_H
+
+#include "postgres.h"
+#include "nodes/bitmapset.h"
+
+extern Bitmapset *DiscreteKnapsack(int max_weight, int num_items,
+ int *item_weights, double *item_values);
+
+#endif /* KNAPSACK_H */
diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h
index 4f1910e5a98..109f7b0c148 100644
--- a/src/include/nodes/bitmapset.h
+++ b/src/include/nodes/bitmapset.h
@@ -21,6 +21,11 @@
#define BITMAPSET_H
/*
+ * Forward decl to save including pg_list.h
+ */
+struct List;
+
+/*
* Data representation
*/
@@ -70,6 +75,7 @@ extern bool bms_is_subset(const Bitmapset *a, const Bitmapset *b);
extern BMS_Comparison bms_subset_compare(const Bitmapset *a, const Bitmapset *b);
extern bool bms_is_member(int x, const Bitmapset *a);
extern bool bms_overlap(const Bitmapset *a, const Bitmapset *b);
+extern bool bms_overlap_list(const Bitmapset *a, const struct List *b);
extern bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b);
extern int bms_singleton_member(const Bitmapset *a);
extern bool bms_get_singleton_member(const Bitmapset *a, int *member);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index ff428951186..11a68500eea 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1699,6 +1699,7 @@ typedef struct AggStatePerAggData *AggStatePerAgg;
typedef struct AggStatePerTransData *AggStatePerTrans;
typedef struct AggStatePerGroupData *AggStatePerGroup;
typedef struct AggStatePerPhaseData *AggStatePerPhase;
+typedef struct AggStatePerHashData *AggStatePerHash;
typedef struct AggState
{
@@ -1706,15 +1707,17 @@ typedef struct AggState
List *aggs; /* all Aggref nodes in targetlist & quals */
int numaggs; /* length of list (could be zero!) */
int numtrans; /* number of pertrans items */
+ AggStrategy aggstrategy; /* strategy mode */
AggSplit aggsplit; /* agg-splitting mode, see nodes.h */
AggStatePerPhase phase; /* pointer to current phase data */
- int numphases; /* number of phases */
+ int numphases; /* number of phases (including phase 0) */
int current_phase; /* current phase number */
- FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
AggStatePerAgg peragg; /* per-Aggref information */
AggStatePerTrans pertrans; /* per-Trans state information */
+ ExprContext *hashcontext; /* econtexts for long-lived data (hashtable) */
ExprContext **aggcontexts; /* econtexts for long-lived data (per GS) */
ExprContext *tmpcontext; /* econtext for input expressions */
+ ExprContext *curaggcontext; /* currently active aggcontext */
AggStatePerTrans curpertrans; /* currently active trans state */
bool input_done; /* indicates end of input */
bool agg_done; /* indicates completion of Agg scan */
@@ -1726,21 +1729,17 @@ typedef struct AggState
/* These fields are for grouping set phase data */
int maxsets; /* The max number of sets in any phase */
AggStatePerPhase phases; /* array of all phases */
- Tuplesortstate *sort_in; /* sorted input to phases > 0 */
+ Tuplesortstate *sort_in; /* sorted input to phases > 1 */
Tuplesortstate *sort_out; /* input is copied here for next phase */
TupleTableSlot *sort_slot; /* slot for sort results */
/* these fields are used in AGG_PLAIN and AGG_SORTED modes: */
AggStatePerGroup pergroup; /* per-Aggref-per-group working state */
HeapTuple grp_firstTuple; /* copy of first tuple of current group */
- /* these fields are used in AGG_HASHED mode: */
- TupleHashTable hashtable; /* hash table with one entry per group */
- TupleTableSlot *hashslot; /* slot for loading hash table */
- int numhashGrpCols; /* number of columns in hash table */
- int largestGrpColIdx; /* largest column required for hashing */
- AttrNumber *hashGrpColIdxInput; /* and their indices in input slot */
- AttrNumber *hashGrpColIdxHash; /* indices for execGrouping in hashtbl */
+ /* these fields are used in AGG_HASHED and AGG_MIXED modes: */
bool table_filled; /* hash table filled yet? */
- TupleHashIterator hashiter; /* for iterating through hash table */
+ int num_hashes;
+ AggStatePerHash perhash;
+ AggStatePerGroup *hash_pergroup; /* array of per-group pointers */
/* support for evaluation of agg inputs */
TupleTableSlot *evalslot; /* slot for agg inputs */
ProjectionInfo *evalproj; /* projection machinery */
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index c83216943c1..b9369ac2754 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -261,6 +261,8 @@ typedef enum NodeTag
T_PlaceHolderInfo,
T_MinMaxAggInfo,
T_PlannerParamItem,
+ T_RollupData,
+ T_GroupingSetData,
T_StatisticExtInfo,
/*
@@ -724,7 +726,8 @@ typedef enum AggStrategy
{
AGG_PLAIN, /* simple agg across all input rows */
AGG_SORTED, /* grouped agg, input must be sorted */
- AGG_HASHED /* grouped agg, use internal hashtable */
+ AGG_HASHED, /* grouped agg, use internal hashtable */
+ AGG_MIXED /* grouped agg, hash and sort both used */
} AggStrategy;
/*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 4a95e16b695..6e531b62386 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -758,7 +758,7 @@ typedef struct Agg
Oid *grpOperators; /* equality operators to compare with */
long numGroups; /* estimated number of groups in input */
Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */
- /* Note: planner provides numGroups & aggParams only in AGG_HASHED case */
+ /* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */
List *groupingSets; /* grouping sets to use */
List *chain; /* chained Agg/Sort nodes */
} Agg;
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 0a5187cef3b..8930edf8264 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -1418,17 +1418,37 @@ typedef struct AggPath
} AggPath;
/*
+ * Various annotations used for grouping sets in the planner.
+ */
+
+typedef struct GroupingSetData
+{
+ NodeTag type;
+ List *set; /* grouping set as list of sortgrouprefs */
+ double numGroups; /* est. number of result groups */
+} GroupingSetData;
+
+typedef struct RollupData
+{
+ NodeTag type;
+ List *groupClause; /* applicable subset of parse->groupClause */
+ List *gsets; /* lists of integer indexes into groupClause */
+ List *gsets_data; /* list of GroupingSetData */
+ double numGroups; /* est. number of result groups */
+ bool hashable; /* can be hashed */
+ bool is_hashed; /* to be implemented as a hashagg */
+} RollupData;
+
+/*
* GroupingSetsPath represents a GROUPING SETS aggregation
- *
- * Currently we only support this in sorted not hashed form, so the input
- * must always be appropriately presorted.
*/
+
typedef struct GroupingSetsPath
{
Path path;
Path *subpath; /* path representing input source */
- List *rollup_groupclauses; /* list of lists of SortGroupClause's */
- List *rollup_lists; /* parallel list of lists of grouping sets */
+ AggStrategy aggstrategy; /* basic strategy */
+ List *rollups; /* list of RollupData */
List *qual; /* quals (HAVING quals), if any */
} GroupingSetsPath;
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 81640de7ab7..c72c7e02cbb 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -195,8 +195,8 @@ extern GroupingSetsPath *create_groupingsets_path(PlannerInfo *root,
Path *subpath,
PathTarget *target,
List *having_qual,
- List *rollup_lists,
- List *rollup_groupclauses,
+ AggStrategy aggstrategy,
+ List *rollups,
const AggClauseCosts *agg_costs,
double numGroups);
extern MinMaxAggPath *create_minmaxagg_path(PlannerInfo *root,