1 files changed, 103 insertions, 30 deletions
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index ca48fca486..832d99a065 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -115,7 +115,8 @@ static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
 
 #ifdef XCP
 static void analyze_rel_coordinator(Relation onerel, bool inh, int attr_cnt,
-						VacAttrStats **vacattrstats);
+						VacAttrStats **vacattrstats, int nindexes,
+						Relation *indexes, AnlIndexData *indexdata);
 #endif
 
 /*
@@ -424,30 +425,6 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
 		attr_cnt = tcnt;
 	}
 
-#ifdef XCP
-	if (IS_PGXC_COORDINATOR && onerel->rd_locator_info)
-	{
-		/*
-		 * Fetch relation statistics from remote nodes and update
-		 */
-		vacuum_rel_coordinator(onerel, in_outer_xact);
-
-		/*
-		 * Fetch attribute statistics from remote nodes.
-		 */
-		analyze_rel_coordinator(onerel, inh, attr_cnt, vacattrstats);
-
-		/*
-		 * Skip acquiring local stats. Coordinator does not store data of
-		 * distributed tables.
-		 */
-		nindexes = 0;
-		hasindex = false;
-		Irel = NULL;
-		goto cleanup;
-	}
-#endif
-
 	/*
 	 * Open all indexes of the relation, and see if there are any analyzable
 	 * columns in the indexes.  We do not analyze index columns if there was
@@ -505,6 +482,28 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
 		}
 	}
 
+#ifdef XCP
+	if (IS_PGXC_COORDINATOR && onerel->rd_locator_info)
+	{
+		/*
+		 * Fetch relation statistics from remote nodes and update
+		 */
+		vacuum_rel_coordinator(onerel, in_outer_xact);
+
+		/*
+		 * Fetch attribute statistics from remote nodes.
+		 */
+		analyze_rel_coordinator(onerel, inh, attr_cnt, vacattrstats,
+								nindexes, Irel, indexdata);
+
+		/*
+		 * Skip acquiring local stats. Coordinator does not store data of
+		 * distributed tables.
+		 */
+		goto cleanup;
+	}
+#endif
+
 	/*
 	 * Determine how many rows we need to sample, using the worst case from
 	 * all analyzable columns.  We use a lower bound of 100 rows to avoid
@@ -2944,9 +2943,24 @@ compare_mcvs(const void *a, const void *b)
 
 
 #ifdef XCP
+/*
+ * coord_collect_simple_stats
+ *		Collect simple stats for a relation (pg_statistic contents).
+ *
+ * Collects statistics from the datanodes, and then keeps the one of the
+ * received statistics for each attribute (the first one we receive, but
+ * it's mostly random).
+ *
+ * XXX We do not try to build statistics covering data fro all the nodes,
+ * either by collecting fresh sample of rows or merging the statistics
+ * somehow. The current approach is very simple and cheap, but may have
+ * negative impact on estimate accuracy as the stats only covers data
+ * from a single node, and we may end up with stats from different node
+ * for each attribute.
+ */
 static void
-analyze_rel_coordinator(Relation onerel, bool inh, int attr_cnt,
-						VacAttrStats **vacattrstats)
+coord_collect_simple_stats(Relation onerel, bool inh, int attr_cnt,
+						   VacAttrStats **vacattrstats)
 {
 	char 		   *nspname;
 	char 		   *relname;
@@ -2960,14 +2974,11 @@ analyze_rel_coordinator(Relation onerel, bool inh, int attr_cnt,
 	int 			i;
 	/* Number of data nodes from which attribute statistics are received. */
 	int			   *numnodes;
-	List		   *stat_oids;
 
 	/* Get the relation identifier */
 	relname = RelationGetRelationName(onerel);
 	nspname = get_namespace_name(RelationGetNamespace(onerel));
 
-	elog(LOG, "Getting detailed statistics for %s.%s", nspname, relname);
-
 	/* Make up query string */
 	initStringInfo(&query);
 	/* Generic statistic fields */
@@ -3367,6 +3378,38 @@ analyze_rel_coordinator(Relation onerel, bool inh, int attr_cnt,
 		}
 	}
 	update_attstats(RelationGetRelid(onerel), inh, attr_cnt, vacattrstats);
+}
+
+/*
+ * coord_collect_extended_stats
+ *		Collect extended stats for a relation (pg_statistic_ext contents).
+ *
+ * Collects statistics from the datanodes, and then keeps the one of the
+ * received statistics for each attribute (the first one we receive, but
+ * it's mostly random).
+ *
+ * XXX This has similar issues as coord_collect_simple_stats.
+ */
+static void
+coord_collect_extended_stats(Relation onerel, int attr_cnt)
+{
+	char 		   *nspname;
+	char 		   *relname;
+	/* Fields to run query to read statistics from data nodes */
+	StringInfoData  query;
+	EState 		   *estate;
+	MemoryContext 	oldcontext;
+	RemoteQuery	    *step;
+	RemoteQueryState *node;
+	TupleTableSlot *result;
+	int 			i;
+	/* Number of data nodes from which attribute statistics are received. */
+	int			   *numnodes;
+	List		   *stat_oids;
+
+	/* Get the relation identifier */
+	relname = RelationGetRelationName(onerel);
+	nspname = get_namespace_name(RelationGetNamespace(onerel));
 
 	/*
 	 * Build extended statistics on the coordinator.
@@ -3521,4 +3564,34 @@ analyze_rel_coordinator(Relation onerel, bool inh, int attr_cnt,
 	}
 	ExecEndRemoteQuery(node);
 }
+
+/*
+ * analyze_rel_coordinator
+ *		Collect all statistics for a particular relation.
+ *
+ * We collect three types of statistics for each table:
+ *
+ * - simple statistics (pg_statistic)
+ * - extended statistics (pg_statistic_ext)
+ * - index statistics (including expression indexes)
+ */
+static void
+analyze_rel_coordinator(Relation onerel, bool inh, int attr_cnt,
+						VacAttrStats **vacattrstats, int nindexes,
+						Relation *indexes, AnlIndexData *indexdata)
+{
+	int i;
+
+	/* simple statistics (pg_statistic) for the relation */
+	coord_collect_simple_stats(onerel, inh, attr_cnt, vacattrstats);
+
+	/* simple statistics (pg_statistic) for all indexes */
+	for (i = 0; i < nindexes; i++)
+		coord_collect_simple_stats(indexes[i], false,
+								   indexdata[i].attr_cnt,
+								   indexdata[i].vacattrstats);
+
+	/* extended statistics (pg_statistic) for the relation */
+	coord_collect_extended_stats(onerel, attr_cnt);
+}
 #endif