diff options
| author | Tomas Vondra | 2019-03-27 17:32:18 +0000 |
|---|---|---|
| committer | Tomas Vondra | 2019-03-27 17:32:18 +0000 |
| commit | 7300a699502fe5432b05fbc75baca534b080bebb (patch) | |
| tree | 2fa5740b9cf8363068e8a575ae569ca172ffb66a /src/include/statistics | |
| parent | 333ed246c6f351c4e8fe22c764b97793c4101b00 (diff) | |
Add support for multivariate MCV lists
Introduce a third extended statistic type, supported by the CREATE
STATISTICS command - MCV lists, a generalization of the statistic
already built and used for individual columns.
Compared to the already supported types (n-distinct coefficients and
functional dependencies), MCV lists are more complex, include column
values and allow estimation of much wider range of common clauses
(equality and inequality conditions, IS NULL, IS NOT NULL etc.).
Similarly to the other types, a new pseudo-type (pg_mcv_list) is used.
Author: Tomas Vondra
Reviewed-by: Dean Rasheed, David Rowley, Mark Dilger, Alvaro Herrera
Discussion: https://postgr.es/m/dfdac334-9cf2-2597-fb27-f0fb3753f435@2ndquadrant.com
Diffstat (limited to 'src/include/statistics')
| -rw-r--r-- | src/include/statistics/extended_stats_internal.h | 40 | ||||
| -rw-r--r-- | src/include/statistics/statistics.h | 40 |
2 files changed, 80 insertions, 0 deletions
diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h index 8b73cc688f..9fc591261e 100644 --- a/src/include/statistics/extended_stats_internal.h +++ b/src/include/statistics/extended_stats_internal.h @@ -31,6 +31,15 @@ typedef struct int tupno; /* position index for tuple it came from */ } ScalarItem; +/* (de)serialization info */ +typedef struct DimensionInfo +{ + int nvalues; /* number of deduplicated values */ + int nbytes; /* number of bytes (serialized) */ + int typlen; /* pg_type.typlen */ + bool typbyval; /* pg_type.typbyval */ +} DimensionInfo; + /* multi-sort */ typedef struct MultiSortSupportData { @@ -44,6 +53,7 @@ typedef struct SortItem { Datum *values; bool *isnull; + int count; } SortItem; extern MVNDistinct *statext_ndistinct_build(double totalrows, @@ -57,6 +67,12 @@ extern MVDependencies *statext_dependencies_build(int numrows, HeapTuple *rows, extern bytea *statext_dependencies_serialize(MVDependencies *dependencies); extern MVDependencies *statext_dependencies_deserialize(bytea *data); +extern MCVList *statext_mcv_build(int numrows, HeapTuple *rows, + Bitmapset *attrs, VacAttrStats **stats, + double totalrows); +extern bytea *statext_mcv_serialize(MCVList * mcv, VacAttrStats **stats); +extern MCVList * statext_mcv_deserialize(bytea *data); + extern MultiSortSupport multi_sort_init(int ndims); extern void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper, Oid collation); @@ -65,5 +81,29 @@ extern int multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b, MultiSortSupport mss); extern int multi_sort_compare_dims(int start, int end, const SortItem *a, const SortItem *b, MultiSortSupport mss); +extern int compare_scalars_simple(const void *a, const void *b, void *arg); +extern int compare_datums_simple(Datum a, Datum b, SortSupport ssup); + +extern void *bsearch_arg(const void *key, const void *base, + size_t nmemb, size_t size, + int (*compar) (const void *, const void *, void *), + void *arg); + +extern AttrNumber *build_attnums_array(Bitmapset *attrs, int *numattrs); + +extern SortItem *build_sorted_items(int numrows, int *nitems, HeapTuple *rows, + TupleDesc tdesc, MultiSortSupport mss, + int numattrs, AttrNumber *attnums); + + +extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root, + StatisticExtInfo *stat, + List *clauses, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo, + RelOptInfo *rel, + Selectivity *basesel, + Selectivity *totalsel); #endif /* EXTENDED_STATS_INTERNAL_H */ diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h index 4876e9523f..5799881075 100644 --- a/src/include/statistics/statistics.h +++ b/src/include/statistics/statistics.h @@ -78,8 +78,41 @@ typedef struct MVDependencies /* size of the struct excluding the deps array */ #define SizeOfDependencies (offsetof(MVDependencies, ndeps) + sizeof(uint32)) +/* used to flag stats serialized to bytea */ +#define STATS_MCV_MAGIC 0xE1A651C2 /* marks serialized bytea */ +#define STATS_MCV_TYPE_BASIC 1 /* basic MCV list type */ + +/* max items in MCV list (mostly arbitrary number) */ +#define STATS_MCVLIST_MAX_ITEMS 8192 + +/* + * Multivariate MCV (most-common value) lists + * + * A straightforward extension of MCV items - i.e. a list (array) of + * combinations of attribute values, together with a frequency and null flags. + */ +typedef struct MCVItem +{ + double frequency; /* frequency of this combination */ + double base_frequency; /* frequency if independent */ + bool *isnull; /* NULL flags */ + Datum *values; /* item values */ +} MCVItem; + +/* multivariate MCV list - essentally an array of MCV items */ +typedef struct MCVList +{ + uint32 magic; /* magic constant marker */ + uint32 type; /* type of MCV list (BASIC) */ + uint32 nitems; /* number of MCV items in the array */ + AttrNumber ndimensions; /* number of dimensions */ + Oid types[STATS_MAX_DIMENSIONS]; /* OIDs of data types */ + MCVItem **items; /* array of MCV items */ +} MCVList; + extern MVNDistinct *statext_ndistinct_load(Oid mvoid); extern MVDependencies *statext_dependencies_load(Oid mvoid); +extern MCVList *statext_mcv_load(Oid mvoid); extern void BuildRelationExtStatistics(Relation onerel, double totalrows, int numrows, HeapTuple *rows, @@ -92,6 +125,13 @@ extern Selectivity dependencies_clauselist_selectivity(PlannerInfo *root, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Bitmapset **estimatedclauses); +extern Selectivity statext_clauselist_selectivity(PlannerInfo *root, + List *clauses, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo, + RelOptInfo *rel, + Bitmapset **estimatedclauses); extern bool has_stats_of_kind(List *stats, char requiredkind); extern StatisticExtInfo *choose_best_statistics(List *stats, Bitmapset *attnums, char requiredkind); |
