diff options
| author | Andrew Gierth | 2017-03-27 03:20:54 +0000 |
|---|---|---|
| committer | Andrew Gierth | 2017-03-27 03:20:54 +0000 |
| commit | b5635948ab165b6070e7d05d111f966e07570d81 (patch) | |
| tree | 9e8581fa3530ea777b14ce4900ba7cea106e3450 /src/include | |
| parent | f0a6046bcb15c2fe48384ef547df2bfb5d7f0a89 (diff) | |
Support hashed aggregation with grouping sets.
This extends the Aggregate node with two new features: HashAggregate
can now run multiple hashtables concurrently, and a new strategy
MixedAggregate populates hashtables while doing sorted grouping.
The planner will now attempt to save as many sorts as possible when
planning grouping sets queries, while not exceeding work_mem for the
estimated combined sizes of all hashtables used. No SQL-level changes
are required. There should be no user-visible impact other than the
new EXPLAIN output and possible changes to result ordering when ORDER
BY was not used (which affected a few regression tests). The
enable_hashagg option is respected.
Author: Andrew Gierth
Reviewers: Mark Dilger, Andres Freund
Discussion: https://postgr.es/m/87vatszyhj.fsf@news-spur.riddles.org.uk
Diffstat (limited to 'src/include')
| -rw-r--r-- | src/include/lib/knapsack.h | 17 | ||||
| -rw-r--r-- | src/include/nodes/bitmapset.h | 6 | ||||
| -rw-r--r-- | src/include/nodes/execnodes.h | 21 | ||||
| -rw-r--r-- | src/include/nodes/nodes.h | 5 | ||||
| -rw-r--r-- | src/include/nodes/plannodes.h | 2 | ||||
| -rw-r--r-- | src/include/nodes/relation.h | 30 | ||||
| -rw-r--r-- | src/include/optimizer/pathnode.h | 4 |
7 files changed, 65 insertions, 20 deletions
diff --git a/src/include/lib/knapsack.h b/src/include/lib/knapsack.h new file mode 100644 index 00000000000..8d1e6d0aa04 --- /dev/null +++ b/src/include/lib/knapsack.h @@ -0,0 +1,17 @@ +/* + * knapsack.h + * + * Copyright (c) 2017, PostgreSQL Global Development Group + * + * src/include/lib/knapsack.h + */ +#ifndef KNAPSACK_H +#define KNAPSACK_H + +#include "postgres.h" +#include "nodes/bitmapset.h" + +extern Bitmapset *DiscreteKnapsack(int max_weight, int num_items, + int *item_weights, double *item_values); + +#endif /* KNAPSACK_H */ diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h index 4f1910e5a98..109f7b0c148 100644 --- a/src/include/nodes/bitmapset.h +++ b/src/include/nodes/bitmapset.h @@ -21,6 +21,11 @@ #define BITMAPSET_H /* + * Forward decl to save including pg_list.h + */ +struct List; + +/* * Data representation */ @@ -70,6 +75,7 @@ extern bool bms_is_subset(const Bitmapset *a, const Bitmapset *b); extern BMS_Comparison bms_subset_compare(const Bitmapset *a, const Bitmapset *b); extern bool bms_is_member(int x, const Bitmapset *a); extern bool bms_overlap(const Bitmapset *a, const Bitmapset *b); +extern bool bms_overlap_list(const Bitmapset *a, const struct List *b); extern bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b); extern int bms_singleton_member(const Bitmapset *a); extern bool bms_get_singleton_member(const Bitmapset *a, int *member); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index ff428951186..11a68500eea 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1699,6 +1699,7 @@ typedef struct AggStatePerAggData *AggStatePerAgg; typedef struct AggStatePerTransData *AggStatePerTrans; typedef struct AggStatePerGroupData *AggStatePerGroup; typedef struct AggStatePerPhaseData *AggStatePerPhase; +typedef struct AggStatePerHashData *AggStatePerHash; typedef struct AggState { @@ -1706,15 +1707,17 @@ typedef struct AggState List *aggs; /* all Aggref nodes in targetlist & quals */ int numaggs; /* length of list (could be zero!) */ int numtrans; /* number of pertrans items */ + AggStrategy aggstrategy; /* strategy mode */ AggSplit aggsplit; /* agg-splitting mode, see nodes.h */ AggStatePerPhase phase; /* pointer to current phase data */ - int numphases; /* number of phases */ + int numphases; /* number of phases (including phase 0) */ int current_phase; /* current phase number */ - FmgrInfo *hashfunctions; /* per-grouping-field hash fns */ AggStatePerAgg peragg; /* per-Aggref information */ AggStatePerTrans pertrans; /* per-Trans state information */ + ExprContext *hashcontext; /* econtexts for long-lived data (hashtable) */ ExprContext **aggcontexts; /* econtexts for long-lived data (per GS) */ ExprContext *tmpcontext; /* econtext for input expressions */ + ExprContext *curaggcontext; /* currently active aggcontext */ AggStatePerTrans curpertrans; /* currently active trans state */ bool input_done; /* indicates end of input */ bool agg_done; /* indicates completion of Agg scan */ @@ -1726,21 +1729,17 @@ typedef struct AggState /* These fields are for grouping set phase data */ int maxsets; /* The max number of sets in any phase */ AggStatePerPhase phases; /* array of all phases */ - Tuplesortstate *sort_in; /* sorted input to phases > 0 */ + Tuplesortstate *sort_in; /* sorted input to phases > 1 */ Tuplesortstate *sort_out; /* input is copied here for next phase */ TupleTableSlot *sort_slot; /* slot for sort results */ /* these fields are used in AGG_PLAIN and AGG_SORTED modes: */ AggStatePerGroup pergroup; /* per-Aggref-per-group working state */ HeapTuple grp_firstTuple; /* copy of first tuple of current group */ - /* these fields are used in AGG_HASHED mode: */ - TupleHashTable hashtable; /* hash table with one entry per group */ - TupleTableSlot *hashslot; /* slot for loading hash table */ - int numhashGrpCols; /* number of columns in hash table */ - int largestGrpColIdx; /* largest column required for hashing */ - AttrNumber *hashGrpColIdxInput; /* and their indices in input slot */ - AttrNumber *hashGrpColIdxHash; /* indices for execGrouping in hashtbl */ + /* these fields are used in AGG_HASHED and AGG_MIXED modes: */ bool table_filled; /* hash table filled yet? */ - TupleHashIterator hashiter; /* for iterating through hash table */ + int num_hashes; + AggStatePerHash perhash; + AggStatePerGroup *hash_pergroup; /* array of per-group pointers */ /* support for evaluation of agg inputs */ TupleTableSlot *evalslot; /* slot for agg inputs */ ProjectionInfo *evalproj; /* projection machinery */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index c83216943c1..b9369ac2754 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -261,6 +261,8 @@ typedef enum NodeTag T_PlaceHolderInfo, T_MinMaxAggInfo, T_PlannerParamItem, + T_RollupData, + T_GroupingSetData, T_StatisticExtInfo, /* @@ -724,7 +726,8 @@ typedef enum AggStrategy { AGG_PLAIN, /* simple agg across all input rows */ AGG_SORTED, /* grouped agg, input must be sorted */ - AGG_HASHED /* grouped agg, use internal hashtable */ + AGG_HASHED, /* grouped agg, use internal hashtable */ + AGG_MIXED /* grouped agg, hash and sort both used */ } AggStrategy; /* diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 4a95e16b695..6e531b62386 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -758,7 +758,7 @@ typedef struct Agg Oid *grpOperators; /* equality operators to compare with */ long numGroups; /* estimated number of groups in input */ Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */ - /* Note: planner provides numGroups & aggParams only in AGG_HASHED case */ + /* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */ List *groupingSets; /* grouping sets to use */ List *chain; /* chained Agg/Sort nodes */ } Agg; diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 0a5187cef3b..8930edf8264 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -1418,17 +1418,37 @@ typedef struct AggPath } AggPath; /* + * Various annotations used for grouping sets in the planner. + */ + +typedef struct GroupingSetData +{ + NodeTag type; + List *set; /* grouping set as list of sortgrouprefs */ + double numGroups; /* est. number of result groups */ +} GroupingSetData; + +typedef struct RollupData +{ + NodeTag type; + List *groupClause; /* applicable subset of parse->groupClause */ + List *gsets; /* lists of integer indexes into groupClause */ + List *gsets_data; /* list of GroupingSetData */ + double numGroups; /* est. number of result groups */ + bool hashable; /* can be hashed */ + bool is_hashed; /* to be implemented as a hashagg */ +} RollupData; + +/* * GroupingSetsPath represents a GROUPING SETS aggregation - * - * Currently we only support this in sorted not hashed form, so the input - * must always be appropriately presorted. */ + typedef struct GroupingSetsPath { Path path; Path *subpath; /* path representing input source */ - List *rollup_groupclauses; /* list of lists of SortGroupClause's */ - List *rollup_lists; /* parallel list of lists of grouping sets */ + AggStrategy aggstrategy; /* basic strategy */ + List *rollups; /* list of RollupData */ List *qual; /* quals (HAVING quals), if any */ } GroupingSetsPath; diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 81640de7ab7..c72c7e02cbb 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -195,8 +195,8 @@ extern GroupingSetsPath *create_groupingsets_path(PlannerInfo *root, Path *subpath, PathTarget *target, List *having_qual, - List *rollup_lists, - List *rollup_groupclauses, + AggStrategy aggstrategy, + List *rollups, const AggClauseCosts *agg_costs, double numGroups); extern MinMaxAggPath *create_minmaxagg_path(PlannerInfo *root, |
