diff options
| author | Tomas Vondra | 2019-03-27 17:32:18 +0000 |
|---|---|---|
| committer | Tomas Vondra | 2019-03-27 17:32:18 +0000 |
| commit | 7300a699502fe5432b05fbc75baca534b080bebb (patch) | |
| tree | 2fa5740b9cf8363068e8a575ae569ca172ffb66a /src/include | |
| parent | 333ed246c6f351c4e8fe22c764b97793c4101b00 (diff) | |
Add support for multivariate MCV lists
Introduce a third extended statistic type, supported by the CREATE
STATISTICS command - MCV lists, a generalization of the statistic
already built and used for individual columns.
Compared to the already supported types (n-distinct coefficients and
functional dependencies), MCV lists are more complex, include column
values and allow estimation of much wider range of common clauses
(equality and inequality conditions, IS NULL, IS NOT NULL etc.).
Similarly to the other types, a new pseudo-type (pg_mcv_list) is used.
Author: Tomas Vondra
Reviewed-by: Dean Rasheed, David Rowley, Mark Dilger, Alvaro Herrera
Discussion: https://postgr.es/m/dfdac334-9cf2-2597-fb27-f0fb3753f435@2ndquadrant.com
Diffstat (limited to 'src/include')
| -rw-r--r-- | src/include/catalog/pg_cast.dat | 6 | ||||
| -rw-r--r-- | src/include/catalog/pg_proc.dat | 24 | ||||
| -rw-r--r-- | src/include/catalog/pg_statistic_ext.h | 2 | ||||
| -rw-r--r-- | src/include/catalog/pg_type.dat | 7 | ||||
| -rw-r--r-- | src/include/nodes/bitmapset.h | 1 | ||||
| -rw-r--r-- | src/include/optimizer/optimizer.h | 6 | ||||
| -rw-r--r-- | src/include/statistics/extended_stats_internal.h | 40 | ||||
| -rw-r--r-- | src/include/statistics/statistics.h | 40 |
8 files changed, 126 insertions, 0 deletions
diff --git a/src/include/catalog/pg_cast.dat b/src/include/catalog/pg_cast.dat index 462a0a19602..aabfa7af03e 100644 --- a/src/include/catalog/pg_cast.dat +++ b/src/include/catalog/pg_cast.dat @@ -324,6 +324,12 @@ { castsource => 'pg_dependencies', casttarget => 'text', castfunc => '0', castcontext => 'i', castmethod => 'i' }, +# pg_mcv_list can be coerced to, but not from, bytea and text +{ castsource => 'pg_mcv_list', casttarget => 'bytea', castfunc => '0', + castcontext => 'i', castmethod => 'b' }, +{ castsource => 'pg_mcv_list', casttarget => 'text', castfunc => '0', + castcontext => 'i', castmethod => 'i' }, + # Datetime category { castsource => 'date', casttarget => 'timestamp', castfunc => 'timestamp(date)', castcontext => 'i', castmethod => 'f' }, diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 8cf77297e14..eac909109c5 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -4999,6 +4999,30 @@ proname => 'pg_dependencies_send', provolatile => 's', prorettype => 'bytea', proargtypes => 'pg_dependencies', prosrc => 'pg_dependencies_send' }, +{ oid => '5018', descr => 'I/O', + proname => 'pg_mcv_list_in', prorettype => 'pg_mcv_list', + proargtypes => 'cstring', prosrc => 'pg_mcv_list_in' }, +{ oid => '5019', descr => 'I/O', + proname => 'pg_mcv_list_out', prorettype => 'cstring', + proargtypes => 'pg_mcv_list', prosrc => 'pg_mcv_list_out' }, +{ oid => '5020', descr => 'I/O', + proname => 'pg_mcv_list_recv', provolatile => 's', + prorettype => 'pg_mcv_list', proargtypes => 'internal', + prosrc => 'pg_mcv_list_recv' }, +{ oid => '5021', descr => 'I/O', + proname => 'pg_mcv_list_send', provolatile => 's', prorettype => 'bytea', + proargtypes => 'pg_mcv_list', prosrc => 'pg_mcv_list_send' }, + +{ oid => '3427', + descr => 'details about MCV list items', + proname => 'pg_mcv_list_items', prorows => '1000', proisstrict => 't', + proretset => 't', provolatile => 's', prorettype => 'record', + proargtypes => 'pg_mcv_list', + proallargtypes => '{pg_mcv_list,int4,text,_bool,float8,float8}', + proargmodes => '{i,o,o,o,o,o}', + proargnames => '{mcv_list,index,values,nulls,frequency,base_frequency}', + prosrc => 'pg_stats_ext_mcvlist_items' }, + { oid => '1928', descr => 'statistics: number of scans done for table/index', proname => 'pg_stat_get_numscans', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', diff --git a/src/include/catalog/pg_statistic_ext.h b/src/include/catalog/pg_statistic_ext.h index c45c7b61bfb..e449f9efe8b 100644 --- a/src/include/catalog/pg_statistic_ext.h +++ b/src/include/catalog/pg_statistic_ext.h @@ -49,6 +49,7 @@ CATALOG(pg_statistic_ext,3381,StatisticExtRelationId) * to build */ pg_ndistinct stxndistinct; /* ndistinct coefficients (serialized) */ pg_dependencies stxdependencies; /* dependencies (serialized) */ + pg_mcv_list stxmcv; /* MCV (serialized) */ #endif } FormData_pg_statistic_ext; @@ -64,6 +65,7 @@ typedef FormData_pg_statistic_ext *Form_pg_statistic_ext; #define STATS_EXT_NDISTINCT 'd' #define STATS_EXT_DEPENDENCIES 'f' +#define STATS_EXT_MCV 'm' #endif /* EXPOSE_TO_CLIENT_CODE */ diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat index 8f5ea9332ad..559c54ab487 100644 --- a/src/include/catalog/pg_type.dat +++ b/src/include/catalog/pg_type.dat @@ -165,6 +165,13 @@ typoutput => 'pg_dependencies_out', typreceive => 'pg_dependencies_recv', typsend => 'pg_dependencies_send', typalign => 'i', typstorage => 'x', typcollation => 'default' }, +{ oid => '5017', oid_symbol => 'PGMCVLISTOID', + descr => 'multivariate MCV list', + typname => 'pg_mcv_list', typlen => '-1', typbyval => 'f', + typcategory => 'S', typinput => 'pg_mcv_list_in', + typoutput => 'pg_mcv_list_out', typreceive => 'pg_mcv_list_recv', + typsend => 'pg_mcv_list_send', typalign => 'i', typstorage => 'x', + typcollation => 'default' }, { oid => '32', oid_symbol => 'PGDDLCOMMANDOID', descr => 'internal type for passing CollectedCommand', typname => 'pg_ddl_command', typlen => 'SIZEOF_POINTER', typbyval => 't', diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h index 892410635b5..0c645628e55 100644 --- a/src/include/nodes/bitmapset.h +++ b/src/include/nodes/bitmapset.h @@ -87,6 +87,7 @@ extern Bitmapset *bms_difference(const Bitmapset *a, const Bitmapset *b); extern bool bms_is_subset(const Bitmapset *a, const Bitmapset *b); extern BMS_Comparison bms_subset_compare(const Bitmapset *a, const Bitmapset *b); extern bool bms_is_member(int x, const Bitmapset *a); +extern int bms_member_index(Bitmapset *a, int x); extern bool bms_overlap(const Bitmapset *a, const Bitmapset *b); extern bool bms_overlap_list(const Bitmapset *a, const struct List *b); extern bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b); diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h index ffd812a4ed8..6086821f201 100644 --- a/src/include/optimizer/optimizer.h +++ b/src/include/optimizer/optimizer.h @@ -58,6 +58,12 @@ extern Selectivity clause_selectivity(PlannerInfo *root, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo); +extern Selectivity clauselist_selectivity_simple(PlannerInfo *root, + List *clauses, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo, + Bitmapset *estimatedclauses); extern Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h index 8b73cc688f0..9fc591261eb 100644 --- a/src/include/statistics/extended_stats_internal.h +++ b/src/include/statistics/extended_stats_internal.h @@ -31,6 +31,15 @@ typedef struct int tupno; /* position index for tuple it came from */ } ScalarItem; +/* (de)serialization info */ +typedef struct DimensionInfo +{ + int nvalues; /* number of deduplicated values */ + int nbytes; /* number of bytes (serialized) */ + int typlen; /* pg_type.typlen */ + bool typbyval; /* pg_type.typbyval */ +} DimensionInfo; + /* multi-sort */ typedef struct MultiSortSupportData { @@ -44,6 +53,7 @@ typedef struct SortItem { Datum *values; bool *isnull; + int count; } SortItem; extern MVNDistinct *statext_ndistinct_build(double totalrows, @@ -57,6 +67,12 @@ extern MVDependencies *statext_dependencies_build(int numrows, HeapTuple *rows, extern bytea *statext_dependencies_serialize(MVDependencies *dependencies); extern MVDependencies *statext_dependencies_deserialize(bytea *data); +extern MCVList *statext_mcv_build(int numrows, HeapTuple *rows, + Bitmapset *attrs, VacAttrStats **stats, + double totalrows); +extern bytea *statext_mcv_serialize(MCVList * mcv, VacAttrStats **stats); +extern MCVList * statext_mcv_deserialize(bytea *data); + extern MultiSortSupport multi_sort_init(int ndims); extern void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper, Oid collation); @@ -65,5 +81,29 @@ extern int multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b, MultiSortSupport mss); extern int multi_sort_compare_dims(int start, int end, const SortItem *a, const SortItem *b, MultiSortSupport mss); +extern int compare_scalars_simple(const void *a, const void *b, void *arg); +extern int compare_datums_simple(Datum a, Datum b, SortSupport ssup); + +extern void *bsearch_arg(const void *key, const void *base, + size_t nmemb, size_t size, + int (*compar) (const void *, const void *, void *), + void *arg); + +extern AttrNumber *build_attnums_array(Bitmapset *attrs, int *numattrs); + +extern SortItem *build_sorted_items(int numrows, int *nitems, HeapTuple *rows, + TupleDesc tdesc, MultiSortSupport mss, + int numattrs, AttrNumber *attnums); + + +extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root, + StatisticExtInfo *stat, + List *clauses, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo, + RelOptInfo *rel, + Selectivity *basesel, + Selectivity *totalsel); #endif /* EXTENDED_STATS_INTERNAL_H */ diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h index 4876e9523f6..57998810755 100644 --- a/src/include/statistics/statistics.h +++ b/src/include/statistics/statistics.h @@ -78,8 +78,41 @@ typedef struct MVDependencies /* size of the struct excluding the deps array */ #define SizeOfDependencies (offsetof(MVDependencies, ndeps) + sizeof(uint32)) +/* used to flag stats serialized to bytea */ +#define STATS_MCV_MAGIC 0xE1A651C2 /* marks serialized bytea */ +#define STATS_MCV_TYPE_BASIC 1 /* basic MCV list type */ + +/* max items in MCV list (mostly arbitrary number) */ +#define STATS_MCVLIST_MAX_ITEMS 8192 + +/* + * Multivariate MCV (most-common value) lists + * + * A straightforward extension of MCV items - i.e. a list (array) of + * combinations of attribute values, together with a frequency and null flags. + */ +typedef struct MCVItem +{ + double frequency; /* frequency of this combination */ + double base_frequency; /* frequency if independent */ + bool *isnull; /* NULL flags */ + Datum *values; /* item values */ +} MCVItem; + +/* multivariate MCV list - essentally an array of MCV items */ +typedef struct MCVList +{ + uint32 magic; /* magic constant marker */ + uint32 type; /* type of MCV list (BASIC) */ + uint32 nitems; /* number of MCV items in the array */ + AttrNumber ndimensions; /* number of dimensions */ + Oid types[STATS_MAX_DIMENSIONS]; /* OIDs of data types */ + MCVItem **items; /* array of MCV items */ +} MCVList; + extern MVNDistinct *statext_ndistinct_load(Oid mvoid); extern MVDependencies *statext_dependencies_load(Oid mvoid); +extern MCVList *statext_mcv_load(Oid mvoid); extern void BuildRelationExtStatistics(Relation onerel, double totalrows, int numrows, HeapTuple *rows, @@ -92,6 +125,13 @@ extern Selectivity dependencies_clauselist_selectivity(PlannerInfo *root, SpecialJoinInfo *sjinfo, RelOptInfo *rel, Bitmapset **estimatedclauses); +extern Selectivity statext_clauselist_selectivity(PlannerInfo *root, + List *clauses, + int varRelid, + JoinType jointype, + SpecialJoinInfo *sjinfo, + RelOptInfo *rel, + Bitmapset **estimatedclauses); extern bool has_stats_of_kind(List *stats, char requiredkind); extern StatisticExtInfo *choose_best_statistics(List *stats, Bitmapset *attnums, char requiredkind); |
