summaryrefslogtreecommitdiff
path: root/src/test
diff options
context:
space:
mode:
authorTomas Vondra2019-11-28 21:20:28 +0000
committerTomas Vondra2019-11-28 21:20:45 +0000
commitc676e659b246f94d571b57b559f80cb2dc03e73b (patch)
tree0bedeaafef4d4d49e90ea2b2d539741600b49a4a /src/test
parent3974c4a72459fc07acef3ee1369d63a7b8305b62 (diff)
Fix choose_best_statistics to check clauses individually
When picking the best extended statistics object for a list of clauses, it's not enough to look at attnums extracted from the clause list as a whole. Consider for example this query with OR clauses: SELECT * FROM t WHERE (t.a = 1) OR (t.b = 1) OR (t.c = 1) with a statistics defined on columns (a,b). Relying on attnums extracted from the whole OR clause, we'd consider the statistics usable. That does not work, as we see the conditions as a single OR-clause, referencing an attribute not covered by the statistic, leading to empty list of clauses to be estimated using the statistics and an assert failure. This changes choose_best_statistics to check which clauses are actually covered, and only using attributes from the fully covered ones. For the previous example this means the statistics object will not be considered as compatible with the OR-clause. Backpatch to 12, where MCVs were introduced. The issue does not affect older versions because functional dependencies don't handle OR clauses. Author: Tomas Vondra Reviewed-by: Dean Rasheed Reported-By: Manuel Rigger Discussion: https://postgr.es/m/CA+u7OA7H5rcE2=8f263w4NZD6ipO_XOrYB816nuLXbmSTH9pQQ@mail.gmail.com Backpatch-through: 12
Diffstat (limited to 'src/test')
-rw-r--r--src/test/regress/expected/stats_ext.out25
-rw-r--r--src/test/regress/sql/stats_ext.sql9
2 files changed, 34 insertions, 0 deletions
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index dfbc41c390d..6fb1aeb596a 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -534,6 +534,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <
1 | 50
(1 row)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1');
+ estimated | actual
+-----------+--------
+ 343 | 200
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
+ estimated | actual
+-----------+--------
+ 343 | 200
+(1 row)
+
-- create statistics
CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
ANALYZE mcv_lists;
@@ -573,6 +585,19 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <
50 | 50
(1 row)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1');
+ estimated | actual
+-----------+--------
+ 200 | 200
+(1 row)
+
+-- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
+ estimated | actual
+-----------+--------
+ 343 | 200
+(1 row)
+
-- check change of unrelated column type does not reset the MCV statistics
ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);
SELECT d.stxdmcv IS NOT NULL
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 6237fb25c20..4999d89c8cd 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -342,6 +342,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND b <
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <= ''0'' AND c <= 4');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
+
-- create statistics
CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
@@ -359,6 +363,11 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < 5 AND b <
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b <= ''0'' AND c <= 4');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1');
+
+-- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
+
-- check change of unrelated column type does not reset the MCV statistics
ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);