summaryrefslogtreecommitdiff
path: root/src/include/statistics
diff options
context:
space:
mode:
authorTomas Vondra2019-03-27 17:32:18 +0000
committerTomas Vondra2019-03-27 17:32:18 +0000
commit7300a699502fe5432b05fbc75baca534b080bebb (patch)
tree2fa5740b9cf8363068e8a575ae569ca172ffb66a /src/include/statistics
parent333ed246c6f351c4e8fe22c764b97793c4101b00 (diff)
Add support for multivariate MCV lists
Introduce a third extended statistic type, supported by the CREATE STATISTICS command - MCV lists, a generalization of the statistic already built and used for individual columns. Compared to the already supported types (n-distinct coefficients and functional dependencies), MCV lists are more complex, include column values and allow estimation of much wider range of common clauses (equality and inequality conditions, IS NULL, IS NOT NULL etc.). Similarly to the other types, a new pseudo-type (pg_mcv_list) is used. Author: Tomas Vondra Reviewed-by: Dean Rasheed, David Rowley, Mark Dilger, Alvaro Herrera Discussion: https://postgr.es/m/dfdac334-9cf2-2597-fb27-f0fb3753f435@2ndquadrant.com
Diffstat (limited to 'src/include/statistics')
-rw-r--r--src/include/statistics/extended_stats_internal.h40
-rw-r--r--src/include/statistics/statistics.h40
2 files changed, 80 insertions, 0 deletions
diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h
index 8b73cc688f..9fc591261e 100644
--- a/src/include/statistics/extended_stats_internal.h
+++ b/src/include/statistics/extended_stats_internal.h
@@ -31,6 +31,15 @@ typedef struct
int tupno; /* position index for tuple it came from */
} ScalarItem;
+/* (de)serialization info */
+typedef struct DimensionInfo
+{
+ int nvalues; /* number of deduplicated values */
+ int nbytes; /* number of bytes (serialized) */
+ int typlen; /* pg_type.typlen */
+ bool typbyval; /* pg_type.typbyval */
+} DimensionInfo;
+
/* multi-sort */
typedef struct MultiSortSupportData
{
@@ -44,6 +53,7 @@ typedef struct SortItem
{
Datum *values;
bool *isnull;
+ int count;
} SortItem;
extern MVNDistinct *statext_ndistinct_build(double totalrows,
@@ -57,6 +67,12 @@ extern MVDependencies *statext_dependencies_build(int numrows, HeapTuple *rows,
extern bytea *statext_dependencies_serialize(MVDependencies *dependencies);
extern MVDependencies *statext_dependencies_deserialize(bytea *data);
+extern MCVList *statext_mcv_build(int numrows, HeapTuple *rows,
+ Bitmapset *attrs, VacAttrStats **stats,
+ double totalrows);
+extern bytea *statext_mcv_serialize(MCVList * mcv, VacAttrStats **stats);
+extern MCVList * statext_mcv_deserialize(bytea *data);
+
extern MultiSortSupport multi_sort_init(int ndims);
extern void multi_sort_add_dimension(MultiSortSupport mss, int sortdim,
Oid oper, Oid collation);
@@ -65,5 +81,29 @@ extern int multi_sort_compare_dim(int dim, const SortItem *a,
const SortItem *b, MultiSortSupport mss);
extern int multi_sort_compare_dims(int start, int end, const SortItem *a,
const SortItem *b, MultiSortSupport mss);
+extern int compare_scalars_simple(const void *a, const void *b, void *arg);
+extern int compare_datums_simple(Datum a, Datum b, SortSupport ssup);
+
+extern void *bsearch_arg(const void *key, const void *base,
+ size_t nmemb, size_t size,
+ int (*compar) (const void *, const void *, void *),
+ void *arg);
+
+extern AttrNumber *build_attnums_array(Bitmapset *attrs, int *numattrs);
+
+extern SortItem *build_sorted_items(int numrows, int *nitems, HeapTuple *rows,
+ TupleDesc tdesc, MultiSortSupport mss,
+ int numattrs, AttrNumber *attnums);
+
+
+extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root,
+ StatisticExtInfo *stat,
+ List *clauses,
+ int varRelid,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ RelOptInfo *rel,
+ Selectivity *basesel,
+ Selectivity *totalsel);
#endif /* EXTENDED_STATS_INTERNAL_H */
diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h
index 4876e9523f..5799881075 100644
--- a/src/include/statistics/statistics.h
+++ b/src/include/statistics/statistics.h
@@ -78,8 +78,41 @@ typedef struct MVDependencies
/* size of the struct excluding the deps array */
#define SizeOfDependencies (offsetof(MVDependencies, ndeps) + sizeof(uint32))
+/* used to flag stats serialized to bytea */
+#define STATS_MCV_MAGIC 0xE1A651C2 /* marks serialized bytea */
+#define STATS_MCV_TYPE_BASIC 1 /* basic MCV list type */
+
+/* max items in MCV list (mostly arbitrary number) */
+#define STATS_MCVLIST_MAX_ITEMS 8192
+
+/*
+ * Multivariate MCV (most-common value) lists
+ *
+ * A straightforward extension of MCV items - i.e. a list (array) of
+ * combinations of attribute values, together with a frequency and null flags.
+ */
+typedef struct MCVItem
+{
+ double frequency; /* frequency of this combination */
+ double base_frequency; /* frequency if independent */
+ bool *isnull; /* NULL flags */
+ Datum *values; /* item values */
+} MCVItem;
+
+/* multivariate MCV list - essentally an array of MCV items */
+typedef struct MCVList
+{
+ uint32 magic; /* magic constant marker */
+ uint32 type; /* type of MCV list (BASIC) */
+ uint32 nitems; /* number of MCV items in the array */
+ AttrNumber ndimensions; /* number of dimensions */
+ Oid types[STATS_MAX_DIMENSIONS]; /* OIDs of data types */
+ MCVItem **items; /* array of MCV items */
+} MCVList;
+
extern MVNDistinct *statext_ndistinct_load(Oid mvoid);
extern MVDependencies *statext_dependencies_load(Oid mvoid);
+extern MCVList *statext_mcv_load(Oid mvoid);
extern void BuildRelationExtStatistics(Relation onerel, double totalrows,
int numrows, HeapTuple *rows,
@@ -92,6 +125,13 @@ extern Selectivity dependencies_clauselist_selectivity(PlannerInfo *root,
SpecialJoinInfo *sjinfo,
RelOptInfo *rel,
Bitmapset **estimatedclauses);
+extern Selectivity statext_clauselist_selectivity(PlannerInfo *root,
+ List *clauses,
+ int varRelid,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo,
+ RelOptInfo *rel,
+ Bitmapset **estimatedclauses);
extern bool has_stats_of_kind(List *stats, char requiredkind);
extern StatisticExtInfo *choose_best_statistics(List *stats,
Bitmapset *attnums, char requiredkind);