summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/commands/analyze.c133
1 files changed, 103 insertions, 30 deletions
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index ca48fca486..832d99a065 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -115,7 +115,8 @@ static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
#ifdef XCP
static void analyze_rel_coordinator(Relation onerel, bool inh, int attr_cnt,
- VacAttrStats **vacattrstats);
+ VacAttrStats **vacattrstats, int nindexes,
+ Relation *indexes, AnlIndexData *indexdata);
#endif
/*
@@ -424,30 +425,6 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
attr_cnt = tcnt;
}
-#ifdef XCP
- if (IS_PGXC_COORDINATOR && onerel->rd_locator_info)
- {
- /*
- * Fetch relation statistics from remote nodes and update
- */
- vacuum_rel_coordinator(onerel, in_outer_xact);
-
- /*
- * Fetch attribute statistics from remote nodes.
- */
- analyze_rel_coordinator(onerel, inh, attr_cnt, vacattrstats);
-
- /*
- * Skip acquiring local stats. Coordinator does not store data of
- * distributed tables.
- */
- nindexes = 0;
- hasindex = false;
- Irel = NULL;
- goto cleanup;
- }
-#endif
-
/*
* Open all indexes of the relation, and see if there are any analyzable
* columns in the indexes. We do not analyze index columns if there was
@@ -505,6 +482,28 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
}
}
+#ifdef XCP
+ if (IS_PGXC_COORDINATOR && onerel->rd_locator_info)
+ {
+ /*
+ * Fetch relation statistics from remote nodes and update
+ */
+ vacuum_rel_coordinator(onerel, in_outer_xact);
+
+ /*
+ * Fetch attribute statistics from remote nodes.
+ */
+ analyze_rel_coordinator(onerel, inh, attr_cnt, vacattrstats,
+ nindexes, Irel, indexdata);
+
+ /*
+ * Skip acquiring local stats. Coordinator does not store data of
+ * distributed tables.
+ */
+ goto cleanup;
+ }
+#endif
+
/*
* Determine how many rows we need to sample, using the worst case from
* all analyzable columns. We use a lower bound of 100 rows to avoid
@@ -2944,9 +2943,24 @@ compare_mcvs(const void *a, const void *b)
#ifdef XCP
+/*
+ * coord_collect_simple_stats
+ * Collect simple stats for a relation (pg_statistic contents).
+ *
+ * Collects statistics from the datanodes, and then keeps the one of the
+ * received statistics for each attribute (the first one we receive, but
+ * it's mostly random).
+ *
+ * XXX We do not try to build statistics covering data fro all the nodes,
+ * either by collecting fresh sample of rows or merging the statistics
+ * somehow. The current approach is very simple and cheap, but may have
+ * negative impact on estimate accuracy as the stats only covers data
+ * from a single node, and we may end up with stats from different node
+ * for each attribute.
+ */
static void
-analyze_rel_coordinator(Relation onerel, bool inh, int attr_cnt,
- VacAttrStats **vacattrstats)
+coord_collect_simple_stats(Relation onerel, bool inh, int attr_cnt,
+ VacAttrStats **vacattrstats)
{
char *nspname;
char *relname;
@@ -2960,14 +2974,11 @@ analyze_rel_coordinator(Relation onerel, bool inh, int attr_cnt,
int i;
/* Number of data nodes from which attribute statistics are received. */
int *numnodes;
- List *stat_oids;
/* Get the relation identifier */
relname = RelationGetRelationName(onerel);
nspname = get_namespace_name(RelationGetNamespace(onerel));
- elog(LOG, "Getting detailed statistics for %s.%s", nspname, relname);
-
/* Make up query string */
initStringInfo(&query);
/* Generic statistic fields */
@@ -3367,6 +3378,38 @@ analyze_rel_coordinator(Relation onerel, bool inh, int attr_cnt,
}
}
update_attstats(RelationGetRelid(onerel), inh, attr_cnt, vacattrstats);
+}
+
+/*
+ * coord_collect_extended_stats
+ * Collect extended stats for a relation (pg_statistic_ext contents).
+ *
+ * Collects statistics from the datanodes, and then keeps the one of the
+ * received statistics for each attribute (the first one we receive, but
+ * it's mostly random).
+ *
+ * XXX This has similar issues as coord_collect_simple_stats.
+ */
+static void
+coord_collect_extended_stats(Relation onerel, int attr_cnt)
+{
+ char *nspname;
+ char *relname;
+ /* Fields to run query to read statistics from data nodes */
+ StringInfoData query;
+ EState *estate;
+ MemoryContext oldcontext;
+ RemoteQuery *step;
+ RemoteQueryState *node;
+ TupleTableSlot *result;
+ int i;
+ /* Number of data nodes from which attribute statistics are received. */
+ int *numnodes;
+ List *stat_oids;
+
+ /* Get the relation identifier */
+ relname = RelationGetRelationName(onerel);
+ nspname = get_namespace_name(RelationGetNamespace(onerel));
/*
* Build extended statistics on the coordinator.
@@ -3521,4 +3564,34 @@ analyze_rel_coordinator(Relation onerel, bool inh, int attr_cnt,
}
ExecEndRemoteQuery(node);
}
+
+/*
+ * analyze_rel_coordinator
+ * Collect all statistics for a particular relation.
+ *
+ * We collect three types of statistics for each table:
+ *
+ * - simple statistics (pg_statistic)
+ * - extended statistics (pg_statistic_ext)
+ * - index statistics (including expression indexes)
+ */
+static void
+analyze_rel_coordinator(Relation onerel, bool inh, int attr_cnt,
+ VacAttrStats **vacattrstats, int nindexes,
+ Relation *indexes, AnlIndexData *indexdata)
+{
+ int i;
+
+ /* simple statistics (pg_statistic) for the relation */
+ coord_collect_simple_stats(onerel, inh, attr_cnt, vacattrstats);
+
+ /* simple statistics (pg_statistic) for all indexes */
+ for (i = 0; i < nindexes; i++)
+ coord_collect_simple_stats(indexes[i], false,
+ indexdata[i].attr_cnt,
+ indexdata[i].vacattrstats);
+
+ /* extended statistics (pg_statistic) for the relation */
+ coord_collect_extended_stats(onerel, attr_cnt);
+}
#endif