summaryrefslogtreecommitdiff
path: root/src/test
diff options
context:
space:
mode:
authorTomas Vondra2020-01-13 00:20:57 +0000
committerTomas Vondra2020-01-13 00:21:17 +0000
commiteae056c19ee8f5ebc45ac0fe13181f91c8791e00 (patch)
treed0d41b085dd0460bb4a152f8c4532d7d177605e7 /src/test
parentaaa6761876ba5b06a5c3fa914b2951ace1e31dee (diff)
Apply multiple multivariate MCV lists when possible
Until now we've only used a single multivariate MCV list per relation, covering the largest number of clauses. So for example given a query SELECT * FROM t WHERE a = 1 AND b =1 AND c = 1 AND d = 1 and extended statistics on (a,b) and (c,d), we'd only pick and use one of them. This commit improves this by repeatedly picking and applying the best statistics (matching the largest number of remaining clauses) until no additional statistics is applicable. This greedy algorithm is simple, but may not be optimal. A different choice of statistics may leave fewer clauses unestimated and/or give better estimates for some other reason. This can however happen only when there are overlapping statistics, and selecting one makes it impossible to use the other. E.g. with statistics on (a,b), (c,d), (b,c,d), we may pick either (a,b) and (c,d) or (b,c,d). But it's not clear which option is the best one. We however assume cases like this are rare, and the easiest solution is to define statistics covering the whole group of correlated columns. In the future we might support overlapping stats, using some of the clauses as conditions (in conditional probability sense). Author: Tomas Vondra Reviewed-by: Mark Dilger, Kyotaro Horiguchi Discussion: https://postgr.es/m/20191028152048.jc6pqv5hb7j77ocp@development
Diffstat (limited to 'src/test')
-rw-r--r--src/test/regress/expected/stats_ext.out57
-rw-r--r--src/test/regress/sql/stats_ext.sql35
2 files changed, 92 insertions, 0 deletions
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index d467c0dd7d9..61237dfb113 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -836,6 +836,63 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND
1 | 0
(1 row)
+-- check the ability to use multiple MCV lists
+CREATE TABLE mcv_lists_multi (
+ a INTEGER,
+ b INTEGER,
+ c INTEGER,
+ d INTEGER
+);
+INSERT INTO mcv_lists_multi (a, b, c, d)
+ SELECT
+ mod(i,5),
+ mod(i,5),
+ mod(i,7),
+ mod(i,7)
+ FROM generate_series(1,5000) s(i);
+ANALYZE mcv_lists_multi;
+-- estimates without any mcv statistics
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0');
+ estimated | actual
+-----------+--------
+ 200 | 1000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE c = 0 AND d = 0');
+ estimated | actual
+-----------+--------
+ 102 | 714
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
+ estimated | actual
+-----------+--------
+ 4 | 142
+(1 row)
+
+-- create separate MCV statistics
+CREATE STATISTICS mcv_lists_multi_1 (mcv) ON a, b FROM mcv_lists_multi;
+CREATE STATISTICS mcv_lists_multi_2 (mcv) ON c, d FROM mcv_lists_multi;
+ANALYZE mcv_lists_multi;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0');
+ estimated | actual
+-----------+--------
+ 1000 | 1000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE c = 0 AND d = 0');
+ estimated | actual
+-----------+--------
+ 714 | 714
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
+ estimated | actual
+-----------+--------
+ 143 | 142
+(1 row)
+
+DROP TABLE mcv_lists_multi;
-- Permission tests. Users should not be able to see specific data values in
-- the extended statistics, if they lack permission to see those values in
-- the underlying table.
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 734b1f60c94..84f13e8814a 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -535,6 +535,41 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND NOT c');
+-- check the ability to use multiple MCV lists
+CREATE TABLE mcv_lists_multi (
+ a INTEGER,
+ b INTEGER,
+ c INTEGER,
+ d INTEGER
+);
+
+INSERT INTO mcv_lists_multi (a, b, c, d)
+ SELECT
+ mod(i,5),
+ mod(i,5),
+ mod(i,7),
+ mod(i,7)
+ FROM generate_series(1,5000) s(i);
+
+ANALYZE mcv_lists_multi;
+
+-- estimates without any mcv statistics
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE c = 0 AND d = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
+
+-- create separate MCV statistics
+CREATE STATISTICS mcv_lists_multi_1 (mcv) ON a, b FROM mcv_lists_multi;
+CREATE STATISTICS mcv_lists_multi_2 (mcv) ON c, d FROM mcv_lists_multi;
+
+ANALYZE mcv_lists_multi;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE c = 0 AND d = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
+
+DROP TABLE mcv_lists_multi;
+
-- Permission tests. Users should not be able to see specific data values in
-- the extended statistics, if they lack permission to see those values in
-- the underlying table.