summaryrefslogtreecommitdiff
path: root/src/backend/statistics
diff options
context:
space:
mode:
authorTomas Vondra2022-01-16 12:37:56 +0000
committerTomas Vondra2022-01-16 12:38:01 +0000
commit269b532aef55a579ae02a3e8e8df14101570dfd9 (patch)
tree9a3b874d97938f14617e0f9f62c7f548ea790696 /src/backend/statistics
parente701bdd2f042829402d96589cb16ae218967af73 (diff)
Add stxdinherit flag to pg_statistic_ext_data
Add pg_statistic_ext_data.stxdinherit flag, so that for each extended statistics definition we can store two versions of data - one for the relation alone, one for the whole inheritance tree. This is analogous to pg_statistic.stainherit, but we failed to include such flag in catalogs for extended statistics, and we had to work around it (see commits 859b3003de, 36c4bc6e72 and 20b9fa308e). This changes the relationship between the two catalogs storing extended statistics objects (pg_statistic_ext and pg_statistic_ext_data). Until now, there was a simple 1:1 mapping - for each definition there was one pg_statistic_ext_data row, and this row was inserted while creating the statistics (and then updated during ANALYZE). With the stxdinherit flag, we don't know how many rows there will be (child relations may be added after the statistics object is defined), so there may be up to two rows. We could make CREATE STATISTICS to always create both rows, but that seems wasteful - without partitioning we only need stxdinherit=false rows, and declaratively partitioned tables need only stxdinherit=true. So we no longer initialize pg_statistic_ext_data in CREATE STATISTICS, and instead make that a responsibility of ANALYZE. Which is what we do for regular statistics too. Patch by me, with extensive improvements and fixes by Justin Pryzby. Author: Tomas Vondra, Justin Pryzby Reviewed-by: Tomas Vondra, Justin Pryzby Discussion: https://postgr.es/m/20210923212624.GI831%40telsasoft.com
Diffstat (limited to 'src/backend/statistics')
-rw-r--r--src/backend/statistics/dependencies.c22
-rw-r--r--src/backend/statistics/extended_stats.c75
-rw-r--r--src/backend/statistics/mcv.c8
-rw-r--r--src/backend/statistics/mvdistinct.c5
4 files changed, 50 insertions, 60 deletions
diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c
index bbc29b6711..34326d5561 100644
--- a/src/backend/statistics/dependencies.c
+++ b/src/backend/statistics/dependencies.c
@@ -619,14 +619,16 @@ dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
* Load the functional dependencies for the indicated pg_statistic_ext tuple
*/
MVDependencies *
-statext_dependencies_load(Oid mvoid)
+statext_dependencies_load(Oid mvoid, bool inh)
{
MVDependencies *result;
bool isnull;
Datum deps;
HeapTuple htup;
- htup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(mvoid));
+ htup = SearchSysCache2(STATEXTDATASTXOID,
+ ObjectIdGetDatum(mvoid),
+ BoolGetDatum(inh));
if (!HeapTupleIsValid(htup))
elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
@@ -1417,16 +1419,6 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
Node **unique_exprs;
int unique_exprs_cnt;
- /*
- * When dealing with regular inheritance trees, ignore extended stats
- * (which were built without data from child rels, and thus do not
- * represent them). For partitioned tables data there's no data in the
- * non-leaf relations, so we build stats only for the inheritance tree.
- * So for partitioned tables we do consider extended stats.
- */
- if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
- return 1.0;
-
/* check if there's any stats that might be useful for us. */
if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
return 1.0;
@@ -1610,6 +1602,10 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
if (stat->kind != STATS_EXT_DEPENDENCIES)
continue;
+ /* skip statistics with mismatching stxdinherit value */
+ if (stat->inherit != rte->inh)
+ continue;
+
/*
* Count matching attributes - we have to undo the attnum offsets. The
* input attribute numbers are not offset (expressions are not
@@ -1656,7 +1652,7 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
if (nmatched + nexprs < 2)
continue;
- deps = statext_dependencies_load(stat->statOid);
+ deps = statext_dependencies_load(stat->statOid, rte->inh);
/*
* The expressions may be represented by different attnums in the
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 5762621673..87fe82ed11 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -25,6 +25,7 @@
#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_statistic_ext_data.h"
#include "executor/executor.h"
+#include "commands/defrem.h"
#include "commands/progress.h"
#include "miscadmin.h"
#include "nodes/nodeFuncs.h"
@@ -78,7 +79,7 @@ typedef struct StatExtEntry
static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs, List *exprs,
int nvacatts, VacAttrStats **vacatts);
-static void statext_store(Oid statOid,
+static void statext_store(Oid statOid, bool inh,
MVNDistinct *ndistinct, MVDependencies *dependencies,
MCVList *mcv, Datum exprs, VacAttrStats **stats);
static int statext_compute_stattarget(int stattarget,
@@ -111,7 +112,7 @@ static StatsBuildData *make_build_data(Relation onerel, StatExtEntry *stat,
* requested stats, and serializes them back into the catalog.
*/
void
-BuildRelationExtStatistics(Relation onerel, double totalrows,
+BuildRelationExtStatistics(Relation onerel, bool inh, double totalrows,
int numrows, HeapTuple *rows,
int natts, VacAttrStats **vacattrstats)
{
@@ -231,7 +232,8 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
}
/* store the statistics in the catalog */
- statext_store(stat->statOid, ndistinct, dependencies, mcv, exprstats, stats);
+ statext_store(stat->statOid, inh,
+ ndistinct, dependencies, mcv, exprstats, stats);
/* for reporting progress */
pgstat_progress_update_param(PROGRESS_ANALYZE_EXT_STATS_COMPUTED,
@@ -782,23 +784,27 @@ lookup_var_attr_stats(Relation rel, Bitmapset *attrs, List *exprs,
* tuple.
*/
static void
-statext_store(Oid statOid,
+statext_store(Oid statOid, bool inh,
MVNDistinct *ndistinct, MVDependencies *dependencies,
MCVList *mcv, Datum exprs, VacAttrStats **stats)
{
Relation pg_stextdata;
- HeapTuple stup,
- oldtup;
+ HeapTuple stup;
Datum values[Natts_pg_statistic_ext_data];
bool nulls[Natts_pg_statistic_ext_data];
- bool replaces[Natts_pg_statistic_ext_data];
pg_stextdata = table_open(StatisticExtDataRelationId, RowExclusiveLock);
memset(nulls, true, sizeof(nulls));
- memset(replaces, false, sizeof(replaces));
memset(values, 0, sizeof(values));
+ /* basic info */
+ values[Anum_pg_statistic_ext_data_stxoid - 1] = ObjectIdGetDatum(statOid);
+ nulls[Anum_pg_statistic_ext_data_stxoid - 1] = false;
+
+ values[Anum_pg_statistic_ext_data_stxdinherit - 1] = BoolGetDatum(inh);
+ nulls[Anum_pg_statistic_ext_data_stxdinherit - 1] = false;
+
/*
* Construct a new pg_statistic_ext_data tuple, replacing the calculated
* stats.
@@ -831,25 +837,15 @@ statext_store(Oid statOid,
values[Anum_pg_statistic_ext_data_stxdexpr - 1] = exprs;
}
- /* always replace the value (either by bytea or NULL) */
- replaces[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true;
- replaces[Anum_pg_statistic_ext_data_stxddependencies - 1] = true;
- replaces[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
- replaces[Anum_pg_statistic_ext_data_stxdexpr - 1] = true;
-
- /* there should already be a pg_statistic_ext_data tuple */
- oldtup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(statOid));
- if (!HeapTupleIsValid(oldtup))
- elog(ERROR, "cache lookup failed for statistics object %u", statOid);
-
- /* replace it */
- stup = heap_modify_tuple(oldtup,
- RelationGetDescr(pg_stextdata),
- values,
- nulls,
- replaces);
- ReleaseSysCache(oldtup);
- CatalogTupleUpdate(pg_stextdata, &stup->t_self, stup);
+ /*
+ * Delete the old tuple if it exists, and insert a new one. It's easier
+ * than trying to update or insert, based on various conditions.
+ */
+ RemoveStatisticsDataById(statOid, inh);
+
+ /* form and insert a new tuple */
+ stup = heap_form_tuple(RelationGetDescr(pg_stextdata), values, nulls);
+ CatalogTupleInsert(pg_stextdata, stup);
heap_freetuple(stup);
@@ -1235,7 +1231,7 @@ stat_covers_expressions(StatisticExtInfo *stat, List *exprs,
* further tiebreakers are needed.
*/
StatisticExtInfo *
-choose_best_statistics(List *stats, char requiredkind,
+choose_best_statistics(List *stats, char requiredkind, bool inh,
Bitmapset **clause_attnums, List **clause_exprs,
int nclauses)
{
@@ -1257,6 +1253,10 @@ choose_best_statistics(List *stats, char requiredkind,
if (info->kind != requiredkind)
continue;
+ /* skip statistics with mismatching inheritance flag */
+ if (info->inherit != inh)
+ continue;
+
/*
* Collect attributes and expressions in remaining (unestimated)
* clauses fully covered by this statistic object.
@@ -1697,16 +1697,6 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
Selectivity sel = (is_or) ? 0.0 : 1.0;
RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
- /*
- * When dealing with regular inheritance trees, ignore extended stats
- * (which were built without data from child rels, and thus do not
- * represent them). For partitioned tables data there's no data in the
- * non-leaf relations, so we build stats only for the inheritance tree.
- * So for partitioned tables we do consider extended stats.
- */
- if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
- return sel;
-
/* check if there's any stats that might be useful for us. */
if (!has_stats_of_kind(rel->statlist, STATS_EXT_MCV))
return sel;
@@ -1758,7 +1748,7 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
Bitmapset *simple_clauses;
/* find the best suited statistics object for these attnums */
- stat = choose_best_statistics(rel->statlist, STATS_EXT_MCV,
+ stat = choose_best_statistics(rel->statlist, STATS_EXT_MCV, rte->inh,
list_attnums, list_exprs,
list_length(clauses));
@@ -1847,7 +1837,7 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
MCVList *mcv_list;
/* Load the MCV list stored in the statistics object */
- mcv_list = statext_mcv_load(stat->statOid);
+ mcv_list = statext_mcv_load(stat->statOid, rte->inh);
/*
* Compute the selectivity of the ORed list of clauses covered by
@@ -2408,7 +2398,7 @@ serialize_expr_stats(AnlExprData *exprdata, int nexprs)
* identified by the supplied index.
*/
HeapTuple
-statext_expressions_load(Oid stxoid, int idx)
+statext_expressions_load(Oid stxoid, bool inh, int idx)
{
bool isnull;
Datum value;
@@ -2418,7 +2408,8 @@ statext_expressions_load(Oid stxoid, int idx)
HeapTupleData tmptup;
HeapTuple tup;
- htup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(stxoid));
+ htup = SearchSysCache2(STATEXTDATASTXOID,
+ ObjectIdGetDatum(stxoid), BoolGetDatum(inh));
if (!HeapTupleIsValid(htup))
elog(ERROR, "cache lookup failed for statistics object %u", stxoid);
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index 65fa87b1c7..bad1787cfb 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -559,12 +559,13 @@ build_column_frequencies(SortItem *groups, int ngroups,
* Load the MCV list for the indicated pg_statistic_ext tuple.
*/
MCVList *
-statext_mcv_load(Oid mvoid)
+statext_mcv_load(Oid mvoid, bool inh)
{
MCVList *result;
bool isnull;
Datum mcvlist;
- HeapTuple htup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(mvoid));
+ HeapTuple htup = SearchSysCache2(STATEXTDATASTXOID,
+ ObjectIdGetDatum(mvoid), BoolGetDatum(inh));
if (!HeapTupleIsValid(htup))
elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
@@ -2038,12 +2039,13 @@ mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat,
int i;
MCVList *mcv;
Selectivity s = 0.0;
+ RangeTblEntry *rte = root->simple_rte_array[rel->relid];
/* match/mismatch bitmap for each MCV item */
bool *matches = NULL;
/* load the MCV list stored in the statistics object */
- mcv = statext_mcv_load(stat->statOid);
+ mcv = statext_mcv_load(stat->statOid, rte->inh);
/* build a match bitmap for the clauses */
matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c
index 55b831d4f5..6ade5eff78 100644
--- a/src/backend/statistics/mvdistinct.c
+++ b/src/backend/statistics/mvdistinct.c
@@ -146,14 +146,15 @@ statext_ndistinct_build(double totalrows, StatsBuildData *data)
* Load the ndistinct value for the indicated pg_statistic_ext tuple
*/
MVNDistinct *
-statext_ndistinct_load(Oid mvoid)
+statext_ndistinct_load(Oid mvoid, bool inh)
{
MVNDistinct *result;
bool isnull;
Datum ndist;
HeapTuple htup;
- htup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(mvoid));
+ htup = SearchSysCache2(STATEXTDATASTXOID,
+ ObjectIdGetDatum(mvoid), BoolGetDatum(inh));
if (!HeapTupleIsValid(htup))
elog(ERROR, "cache lookup failed for statistics object %u", mvoid);