summaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorTomas Vondra2021-03-26 22:22:01 +0000
committerTomas Vondra2021-03-26 23:01:11 +0000
commita4d75c86bf15220df22de0a92c819ecef9db3849 (patch)
treea736a68b1c3f022590a886b7bac45276f1f490a6 /src/include
parent98376c18f12e562421b5c77e619248e8b7aae3c6 (diff)
Extended statistics on expressions
Allow defining extended statistics on expressions, not just just on simple column references. With this commit, expressions are supported by all existing extended statistics kinds, improving the same types of estimates. A simple example may look like this: CREATE TABLE t (a int); CREATE STATISTICS s ON mod(a,10), mod(a,20) FROM t; ANALYZE t; The collected statistics are useful e.g. to estimate queries with those expressions in WHERE or GROUP BY clauses: SELECT * FROM t WHERE mod(a,10) = 0 AND mod(a,20) = 0; SELECT 1 FROM t GROUP BY mod(a,10), mod(a,20); This introduces new internal statistics kind 'e' (expressions) which is built automatically when the statistics object definition includes any expressions. This represents single-expression statistics, as if there was an expression index (but without the index maintenance overhead). The statistics is stored in pg_statistics_ext_data as an array of composite types, which is possible thanks to 79f6a942bd. CREATE STATISTICS allows building statistics on a single expression, in which case in which case it's not possible to specify statistics kinds. A new system view pg_stats_ext_exprs can be used to display expression statistics, similarly to pg_stats and pg_stats_ext views. ALTER TABLE ... ALTER COLUMN ... TYPE now treats indexes the same way it treats indexes, i.e. it drops and recreates the statistics. This means all statistics are reset, and we no longer try to preserve at least the functional dependencies. This should not be a major issue in practice, as the functional dependencies actually rely on per-column statistics, which were always reset anyway. Author: Tomas Vondra Reviewed-by: Justin Pryzby, Dean Rasheed, Zhihong Yu Discussion: https://postgr.es/m/ad7891d2-e90c-b446-9fe2-7419143847d7%40enterprisedb.com
Diffstat (limited to 'src/include')
-rw-r--r--src/include/catalog/catversion.h2
-rw-r--r--src/include/catalog/pg_proc.dat8
-rw-r--r--src/include/catalog/pg_statistic_ext.h4
-rw-r--r--src/include/catalog/pg_statistic_ext_data.h1
-rw-r--r--src/include/commands/defrem.h4
-rw-r--r--src/include/nodes/nodes.h1
-rw-r--r--src/include/nodes/parsenodes.h19
-rw-r--r--src/include/nodes/pathnodes.h1
-rw-r--r--src/include/parser/parse_node.h1
-rw-r--r--src/include/parser/parse_utilcmd.h2
-rw-r--r--src/include/statistics/extended_stats_internal.h32
-rw-r--r--src/include/statistics/statistics.h5
-rw-r--r--src/include/utils/ruleutils.h2
13 files changed, 64 insertions, 18 deletions
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 474ee2982b..4a39da3c9d 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 202103265
+#define CATALOG_VERSION_NO 202103266
#endif
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index ecf12f4639..cc7d90d2b0 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -3658,6 +3658,14 @@
proname => 'pg_get_statisticsobjdef', provolatile => 's',
prorettype => 'text', proargtypes => 'oid',
prosrc => 'pg_get_statisticsobjdef' },
+{ oid => '8887', descr => 'extended statistics columns',
+ proname => 'pg_get_statisticsobjdef_columns', provolatile => 's',
+ prorettype => 'text', proargtypes => 'oid',
+ prosrc => 'pg_get_statisticsobjdef_columns' },
+{ oid => '8886', descr => 'extended statistics expressions',
+ proname => 'pg_get_statisticsobjdef_expressions', provolatile => 's',
+ prorettype => '_text', proargtypes => 'oid',
+ prosrc => 'pg_get_statisticsobjdef_expressions' },
{ oid => '3352', descr => 'partition key description',
proname => 'pg_get_partkeydef', provolatile => 's', prorettype => 'text',
proargtypes => 'oid', prosrc => 'pg_get_partkeydef' },
diff --git a/src/include/catalog/pg_statistic_ext.h b/src/include/catalog/pg_statistic_ext.h
index 29649f5814..36912ce528 100644
--- a/src/include/catalog/pg_statistic_ext.h
+++ b/src/include/catalog/pg_statistic_ext.h
@@ -54,6 +54,9 @@ CATALOG(pg_statistic_ext,3381,StatisticExtRelationId)
#ifdef CATALOG_VARLEN
char stxkind[1] BKI_FORCE_NOT_NULL; /* statistics kinds requested
* to build */
+ pg_node_tree stxexprs; /* A list of expression trees for stats
+ * attributes that are not simple column
+ * references. */
#endif
} FormData_pg_statistic_ext;
@@ -81,6 +84,7 @@ DECLARE_ARRAY_FOREIGN_KEY((stxrelid, stxkeys), pg_attribute, (attrelid, attnum))
#define STATS_EXT_NDISTINCT 'd'
#define STATS_EXT_DEPENDENCIES 'f'
#define STATS_EXT_MCV 'm'
+#define STATS_EXT_EXPRESSIONS 'e'
#endif /* EXPOSE_TO_CLIENT_CODE */
diff --git a/src/include/catalog/pg_statistic_ext_data.h b/src/include/catalog/pg_statistic_ext_data.h
index 2f2577c218..5729154383 100644
--- a/src/include/catalog/pg_statistic_ext_data.h
+++ b/src/include/catalog/pg_statistic_ext_data.h
@@ -38,6 +38,7 @@ CATALOG(pg_statistic_ext_data,3429,StatisticExtDataRelationId)
pg_ndistinct stxdndistinct; /* ndistinct coefficients (serialized) */
pg_dependencies stxddependencies; /* dependencies (serialized) */
pg_mcv_list stxdmcv; /* MCV (serialized) */
+ pg_statistic stxdexpr[1]; /* stats for expressions */
#endif
diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h
index 1a79540c94..339f29f4c8 100644
--- a/src/include/commands/defrem.h
+++ b/src/include/commands/defrem.h
@@ -81,9 +81,7 @@ extern ObjectAddress AlterOperator(AlterOperatorStmt *stmt);
extern ObjectAddress CreateStatistics(CreateStatsStmt *stmt);
extern ObjectAddress AlterStatistics(AlterStatsStmt *stmt);
extern void RemoveStatisticsById(Oid statsOid);
-extern void UpdateStatisticsForTypeChange(Oid statsOid,
- Oid relationOid, int attnum,
- Oid oldColumnType, Oid newColumnType);
+extern Oid StatisticsGetRelation(Oid statId, bool missing_ok);
/* commands/aggregatecmds.c */
extern ObjectAddress DefineAggregate(ParseState *pstate, List *name, List *args, bool oldstyle,
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index e22df890ef..299956f329 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -454,6 +454,7 @@ typedef enum NodeTag
T_TypeName,
T_ColumnDef,
T_IndexElem,
+ T_StatsElem,
T_Constraint,
T_DefElem,
T_RangeTblEntry,
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 0ce19d98ec..12e0e026dc 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -1914,7 +1914,8 @@ typedef enum AlterTableType
AT_AddIdentity, /* ADD IDENTITY */
AT_SetIdentity, /* SET identity column options */
AT_DropIdentity, /* DROP IDENTITY */
- AT_AlterCollationRefreshVersion /* ALTER COLLATION ... REFRESH VERSION */
+ AT_AlterCollationRefreshVersion, /* ALTER COLLATION ... REFRESH VERSION */
+ AT_ReAddStatistics /* internal to commands/tablecmds.c */
} AlterTableType;
typedef struct ReplicaIdentityStmt
@@ -2872,8 +2873,24 @@ typedef struct CreateStatsStmt
List *relations; /* rels to build stats on (list of RangeVar) */
char *stxcomment; /* comment to apply to stats, or NULL */
bool if_not_exists; /* do nothing if stats name already exists */
+ bool transformed; /* true when transformStatsStmt is finished */
} CreateStatsStmt;
+/*
+ * StatsElem - statistics parameters (used in CREATE STATISTICS)
+ *
+ * For a plain attribute, 'name' is the name of the referenced table column
+ * and 'expr' is NULL. For an expression, 'name' is NULL and 'expr' is the
+ * expression tree.
+ */
+typedef struct StatsElem
+{
+ NodeTag type;
+ char *name; /* name of attribute to index, or NULL */
+ Node *expr; /* expression to index, or NULL */
+} StatsElem;
+
+
/* ----------------------
* Alter Statistics Statement
* ----------------------
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index c13642e35e..e4b554f811 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -923,6 +923,7 @@ typedef struct StatisticExtInfo
RelOptInfo *rel; /* back-link to statistic's table */
char kind; /* statistics kind of this entry */
Bitmapset *keys; /* attnums of the columns covered */
+ List *exprs; /* expressions */
} StatisticExtInfo;
/*
diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h
index 176b9f37c1..a71d7e1f74 100644
--- a/src/include/parser/parse_node.h
+++ b/src/include/parser/parse_node.h
@@ -69,6 +69,7 @@ typedef enum ParseExprKind
EXPR_KIND_FUNCTION_DEFAULT, /* default parameter value for function */
EXPR_KIND_INDEX_EXPRESSION, /* index expression */
EXPR_KIND_INDEX_PREDICATE, /* index predicate */
+ EXPR_KIND_STATS_EXPRESSION, /* extended statistics expression */
EXPR_KIND_ALTER_COL_TRANSFORM, /* transform expr in ALTER COLUMN TYPE */
EXPR_KIND_EXECUTE_PARAMETER, /* parameter value in EXECUTE */
EXPR_KIND_TRIGGER_WHEN, /* WHEN condition in CREATE TRIGGER */
diff --git a/src/include/parser/parse_utilcmd.h b/src/include/parser/parse_utilcmd.h
index bfa4a6b0f2..1056bf081b 100644
--- a/src/include/parser/parse_utilcmd.h
+++ b/src/include/parser/parse_utilcmd.h
@@ -26,6 +26,8 @@ extern AlterTableStmt *transformAlterTableStmt(Oid relid, AlterTableStmt *stmt,
List **afterStmts);
extern IndexStmt *transformIndexStmt(Oid relid, IndexStmt *stmt,
const char *queryString);
+extern CreateStatsStmt *transformStatsStmt(Oid relid, CreateStatsStmt *stmt,
+ const char *queryString);
extern void transformRuleStmt(RuleStmt *stmt, const char *queryString,
List **actions, Node **whereClause);
extern List *transformCreateSchemaStmt(CreateSchemaStmt *stmt);
diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h
index a0a3cf5b0f..55cd9252a5 100644
--- a/src/include/statistics/extended_stats_internal.h
+++ b/src/include/statistics/extended_stats_internal.h
@@ -57,19 +57,27 @@ typedef struct SortItem
int count;
} SortItem;
-extern MVNDistinct *statext_ndistinct_build(double totalrows,
- int numrows, HeapTuple *rows,
- Bitmapset *attrs, VacAttrStats **stats);
+/* a unified representation of the data the statistics is built on */
+typedef struct StatsBuildData
+{
+ int numrows;
+ int nattnums;
+ AttrNumber *attnums;
+ VacAttrStats **stats;
+ Datum **values;
+ bool **nulls;
+} StatsBuildData;
+
+
+extern MVNDistinct *statext_ndistinct_build(double totalrows, StatsBuildData *data);
extern bytea *statext_ndistinct_serialize(MVNDistinct *ndistinct);
extern MVNDistinct *statext_ndistinct_deserialize(bytea *data);
-extern MVDependencies *statext_dependencies_build(int numrows, HeapTuple *rows,
- Bitmapset *attrs, VacAttrStats **stats);
+extern MVDependencies *statext_dependencies_build(StatsBuildData *data);
extern bytea *statext_dependencies_serialize(MVDependencies *dependencies);
extern MVDependencies *statext_dependencies_deserialize(bytea *data);
-extern MCVList *statext_mcv_build(int numrows, HeapTuple *rows,
- Bitmapset *attrs, VacAttrStats **stats,
+extern MCVList *statext_mcv_build(StatsBuildData *data,
double totalrows, int stattarget);
extern bytea *statext_mcv_serialize(MCVList *mcv, VacAttrStats **stats);
extern MCVList *statext_mcv_deserialize(bytea *data);
@@ -85,14 +93,14 @@ extern int multi_sort_compare_dims(int start, int end, const SortItem *a,
extern int compare_scalars_simple(const void *a, const void *b, void *arg);
extern int compare_datums_simple(Datum a, Datum b, SortSupport ssup);
-extern AttrNumber *build_attnums_array(Bitmapset *attrs, int *numattrs);
+extern AttrNumber *build_attnums_array(Bitmapset *attrs, int nexprs, int *numattrs);
-extern SortItem *build_sorted_items(int numrows, int *nitems, HeapTuple *rows,
- TupleDesc tdesc, MultiSortSupport mss,
+extern SortItem *build_sorted_items(StatsBuildData *data, int *nitems,
+ MultiSortSupport mss,
int numattrs, AttrNumber *attnums);
-extern bool examine_clause_args(List *args, Var **varp,
- Const **cstp, bool *varonleftp);
+extern bool examine_opclause_args(List *args, Node **exprp,
+ Const **cstp, bool *expronleftp);
extern Selectivity mcv_combine_selectivities(Selectivity simple_sel,
Selectivity mcv_sel,
diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h
index fec50688ea..326cf26fea 100644
--- a/src/include/statistics/statistics.h
+++ b/src/include/statistics/statistics.h
@@ -26,7 +26,8 @@
typedef struct MVNDistinctItem
{
double ndistinct; /* ndistinct value for this combination */
- Bitmapset *attrs; /* attr numbers of items */
+ int nattributes; /* number of attributes */
+ AttrNumber *attributes; /* attribute numbers */
} MVNDistinctItem;
/* A MVNDistinct object, comprising all possible combinations of columns */
@@ -121,6 +122,8 @@ extern Selectivity statext_clauselist_selectivity(PlannerInfo *root,
extern bool has_stats_of_kind(List *stats, char requiredkind);
extern StatisticExtInfo *choose_best_statistics(List *stats, char requiredkind,
Bitmapset **clause_attnums,
+ List **clause_exprs,
int nclauses);
+extern HeapTuple statext_expressions_load(Oid stxoid, int idx);
#endif /* STATISTICS_H */
diff --git a/src/include/utils/ruleutils.h b/src/include/utils/ruleutils.h
index ac3d0a6742..d333e5e8a5 100644
--- a/src/include/utils/ruleutils.h
+++ b/src/include/utils/ruleutils.h
@@ -41,4 +41,6 @@ extern char *generate_collation_name(Oid collid);
extern char *generate_opclass_name(Oid opclass);
extern char *get_range_partbound_string(List *bound_datums);
+extern char *pg_get_statisticsobjdef_string(Oid statextid);
+
#endif /* RULEUTILS_H */