-- We will be checking execution plans without/with statistics, so
-- let's make sure we get simple non-parallel plans. Also set the
-- work_mem low so that we can use small amounts of data.
-SET max_parallel_workers = 0;
-SET max_parallel_workers_per_gather = 0;
-SET work_mem = '128kB';
+-- check the number of estimated/actual rows in the top node
+create function check_estimated_rows(text) returns table (estimated int, actual int)
+language plpgsql as
+$$
+declare
+ ln text;
+ tmp text[];
+ first_row bool := true;
+begin
+ for ln in
+ execute format('explain analyze %s', $1)
+ loop
+ if first_row then
+ first_row := false;
+ tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
+ return query select tmp[1]::int, tmp[2]::int;
+ end if;
+ end loop;
+end;
+$$;
-- Verify failures
CREATE STATISTICS tst;
ERROR: syntax error at or near ";"
-- over-estimates when using only per-column statistics
INSERT INTO ndistinct (a, b, c, filler1)
SELECT i/100, i/100, i/100, cash_words((i/100)::money)
- FROM generate_series(1,30000) s(i);
+ FROM generate_series(1,1000) s(i);
ANALYZE ndistinct;
-- Group Aggregate, due to over-estimate of the number of groups
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
- QUERY PLAN
------------------------------------
- GroupAggregate
- Group Key: a, b
- -> Sort
- Sort Key: a, b
- -> Seq Scan on ndistinct
-(5 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
- QUERY PLAN
------------------------------------
- GroupAggregate
- Group Key: b, c
- -> Sort
- Sort Key: b, c
- -> Seq Scan on ndistinct
-(5 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
- QUERY PLAN
------------------------------------
- GroupAggregate
- Group Key: a, b, c
- -> Sort
- Sort Key: a, b, c
- -> Seq Scan on ndistinct
-(5 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
- QUERY PLAN
------------------------------------
- GroupAggregate
- Group Key: a, b, c, d
- -> Sort
- Sort Key: a, b, c, d
- -> Seq Scan on ndistinct
-(5 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
- QUERY PLAN
------------------------------------
- GroupAggregate
- Group Key: b, c, d
- -> Sort
- Sort Key: b, c, d
- -> Seq Scan on ndistinct
-(5 rows)
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual
+-----------+--------
+ 100 | 11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c');
+ estimated | actual
+-----------+--------
+ 100 | 11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
+ estimated | actual
+-----------+--------
+ 100 | 11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
+ estimated | actual
+-----------+--------
+ 200 | 11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
+ estimated | actual
+-----------+--------
+ 200 | 11
+(1 row)
-- correct command
CREATE STATISTICS s10 ON a, b, c FROM ndistinct;
ANALYZE ndistinct;
SELECT stxkind, stxndistinct
FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
- stxkind | stxndistinct
----------+---------------------------------------------------------
- {d,f,m} | {"3, 4": 301, "3, 6": 301, "4, 6": 301, "3, 4, 6": 301}
+ stxkind | stxndistinct
+---------+-----------------------------------------------------
+ {d,f,m} | {"3, 4": 11, "3, 6": 11, "4, 6": 11, "3, 4, 6": 11}
(1 row)
-- Hash Aggregate, thanks to estimates improved by the statistic
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
- QUERY PLAN
------------------------------
- HashAggregate
- Group Key: a, b
- -> Seq Scan on ndistinct
-(3 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
- QUERY PLAN
------------------------------
- HashAggregate
- Group Key: b, c
- -> Seq Scan on ndistinct
-(3 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
- QUERY PLAN
------------------------------
- HashAggregate
- Group Key: a, b, c
- -> Seq Scan on ndistinct
-(3 rows)
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual
+-----------+--------
+ 11 | 11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c');
+ estimated | actual
+-----------+--------
+ 11 | 11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
+ estimated | actual
+-----------+--------
+ 11 | 11
+(1 row)
-- last two plans keep using Group Aggregate, because 'd' is not covered
-- by the statistic and while it's NULL-only we assume 200 values for it
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
- QUERY PLAN
------------------------------------
- GroupAggregate
- Group Key: a, b, c, d
- -> Sort
- Sort Key: a, b, c, d
- -> Seq Scan on ndistinct
-(5 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
- QUERY PLAN
------------------------------------
- GroupAggregate
- Group Key: b, c, d
- -> Sort
- Sort Key: b, c, d
- -> Seq Scan on ndistinct
-(5 rows)
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
+ estimated | actual
+-----------+--------
+ 200 | 11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
+ estimated | actual
+-----------+--------
+ 200 | 11
+(1 row)
TRUNCATE TABLE ndistinct;
-- under-estimates when using only per-column statistics
INSERT INTO ndistinct (a, b, c, filler1)
SELECT mod(i,50), mod(i,51), mod(i,32),
cash_words(mod(i,33)::int::money)
- FROM generate_series(1,10000) s(i);
+ FROM generate_series(1,5000) s(i);
ANALYZE ndistinct;
SELECT stxkind, stxndistinct
FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
- stxkind | stxndistinct
----------+-------------------------------------------------------------
- {d,f,m} | {"3, 4": 2550, "3, 6": 800, "4, 6": 1632, "3, 4, 6": 10000}
-(1 row)
-
--- plans using Group Aggregate, thanks to using correct esimates
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
- QUERY PLAN
------------------------------------
- GroupAggregate
- Group Key: a, b
- -> Sort
- Sort Key: a, b
- -> Seq Scan on ndistinct
-(5 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
- QUERY PLAN
------------------------------------
- GroupAggregate
- Group Key: a, b, c
- -> Sort
- Sort Key: a, b, c
- -> Seq Scan on ndistinct
-(5 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
- QUERY PLAN
------------------------------------
- GroupAggregate
- Group Key: a, b, c, d
- -> Sort
- Sort Key: a, b, c, d
- -> Seq Scan on ndistinct
-(5 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
- QUERY PLAN
------------------------------
- HashAggregate
- Group Key: b, c, d
- -> Seq Scan on ndistinct
-(3 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
- QUERY PLAN
------------------------------
- HashAggregate
- Group Key: a, d
- -> Seq Scan on ndistinct
-(3 rows)
+ stxkind | stxndistinct
+---------+------------------------------------------------------------
+ {d,f,m} | {"3, 4": 2550, "3, 6": 800, "4, 6": 1632, "3, 4, 6": 5000}
+(1 row)
+
+-- correct esimates
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual
+-----------+--------
+ 2550 | 2550
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
+ estimated | actual
+-----------+--------
+ 5000 | 5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
+ estimated | actual
+-----------+--------
+ 5000 | 5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
+ estimated | actual
+-----------+--------
+ 1632 | 1632
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d');
+ estimated | actual
+-----------+--------
+ 500 | 50
+(1 row)
DROP STATISTICS s10;
SELECT stxkind, stxndistinct
---------+--------------
(0 rows)
--- dropping the statistics switches the plans to Hash Aggregate,
--- due to under-estimates
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
- QUERY PLAN
------------------------------
- HashAggregate
- Group Key: a, b
- -> Seq Scan on ndistinct
-(3 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
- QUERY PLAN
------------------------------
- HashAggregate
- Group Key: a, b, c
- -> Seq Scan on ndistinct
-(3 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
- QUERY PLAN
------------------------------
- HashAggregate
- Group Key: a, b, c, d
- -> Seq Scan on ndistinct
-(3 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
- QUERY PLAN
------------------------------
- HashAggregate
- Group Key: b, c, d
- -> Seq Scan on ndistinct
-(3 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
- QUERY PLAN
------------------------------
- HashAggregate
- Group Key: a, d
- -> Seq Scan on ndistinct
-(3 rows)
+-- dropping the statistics results in under-estimates
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual
+-----------+--------
+ 500 | 2550
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
+ estimated | actual
+-----------+--------
+ 500 | 5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
+ estimated | actual
+-----------+--------
+ 500 | 5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
+ estimated | actual
+-----------+--------
+ 500 | 1632
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d');
+ estimated | actual
+-----------+--------
+ 500 | 50
+(1 row)
-- functional dependencies tests
CREATE TABLE functional_dependencies (
c INT,
d TEXT
);
-SET random_page_cost = 1.2;
CREATE INDEX fdeps_ab_idx ON functional_dependencies (a, b);
CREATE INDEX fdeps_abc_idx ON functional_dependencies (a, b, c);
-- random data (no functional dependencies)
INSERT INTO functional_dependencies (a, b, c, filler1)
SELECT mod(i, 23), mod(i, 29), mod(i, 31), i FROM generate_series(1,5000) s(i);
ANALYZE functional_dependencies;
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
- QUERY PLAN
----------------------------------------------------
- Bitmap Heap Scan on functional_dependencies
- Recheck Cond: ((a = 1) AND (b = '1'::text))
- -> Bitmap Index Scan on fdeps_abc_idx
- Index Cond: ((a = 1) AND (b = '1'::text))
-(4 rows)
-
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
- QUERY PLAN
------------------------------------------------------------
- Index Scan using fdeps_abc_idx on functional_dependencies
- Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1))
-(2 rows)
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
+ estimated | actual
+-----------+--------
+ 8 | 8
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
+ estimated | actual
+-----------+--------
+ 1 | 1
+(1 row)
-- create statistics
CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies;
ANALYZE functional_dependencies;
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
- QUERY PLAN
----------------------------------------------------
- Bitmap Heap Scan on functional_dependencies
- Recheck Cond: ((a = 1) AND (b = '1'::text))
- -> Bitmap Index Scan on fdeps_abc_idx
- Index Cond: ((a = 1) AND (b = '1'::text))
-(4 rows)
-
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
- QUERY PLAN
------------------------------------------------------------
- Index Scan using fdeps_abc_idx on functional_dependencies
- Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1))
-(2 rows)
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
+ estimated | actual
+-----------+--------
+ 8 | 8
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
+ estimated | actual
+-----------+--------
+ 1 | 1
+(1 row)
-- a => b, a => c, b => c
TRUNCATE functional_dependencies;
INSERT INTO functional_dependencies (a, b, c, filler1)
SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i);
ANALYZE functional_dependencies;
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
- QUERY PLAN
------------------------------------------------------------
- Index Scan using fdeps_abc_idx on functional_dependencies
- Index Cond: ((a = 1) AND (b = '1'::text))
-(2 rows)
-
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
- QUERY PLAN
------------------------------------------------------------
- Index Scan using fdeps_abc_idx on functional_dependencies
- Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1))
-(2 rows)
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
+ estimated | actual
+-----------+--------
+ 1 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
+ estimated | actual
+-----------+--------
+ 1 | 50
+(1 row)
-- create statistics
CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies;
ANALYZE functional_dependencies;
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
- QUERY PLAN
----------------------------------------------------
- Bitmap Heap Scan on functional_dependencies
- Recheck Cond: ((a = 1) AND (b = '1'::text))
- -> Bitmap Index Scan on fdeps_abc_idx
- Index Cond: ((a = 1) AND (b = '1'::text))
-(4 rows)
-
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
- QUERY PLAN
----------------------------------------------------
- Bitmap Heap Scan on functional_dependencies
- Recheck Cond: ((a = 1) AND (b = '1'::text))
- Filter: (c = 1)
- -> Bitmap Index Scan on fdeps_ab_idx
- Index Cond: ((a = 1) AND (b = '1'::text))
-(5 rows)
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
+ estimated | actual
+-----------+--------
+ 50 | 50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
+ estimated | actual
+-----------+--------
+ 50 | 50
+(1 row)
-- check change of column type doesn't break it
ALTER TABLE functional_dependencies ALTER COLUMN c TYPE numeric;
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
- QUERY PLAN
----------------------------------------------------
- Bitmap Heap Scan on functional_dependencies
- Recheck Cond: ((a = 1) AND (b = '1'::text))
- Filter: (c = '1'::numeric)
- -> Bitmap Index Scan on fdeps_ab_idx
- Index Cond: ((a = 1) AND (b = '1'::text))
-(5 rows)
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
+ estimated | actual
+-----------+--------
+ 50 | 50
+(1 row)
ANALYZE functional_dependencies;
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
- QUERY PLAN
----------------------------------------------------
- Bitmap Heap Scan on functional_dependencies
- Recheck Cond: ((a = 1) AND (b = '1'::text))
- Filter: (c = '1'::numeric)
- -> Bitmap Index Scan on fdeps_ab_idx
- Index Cond: ((a = 1) AND (b = '1'::text))
-(5 rows)
-
-RESET random_page_cost;
--- check the number of estimated/actual rows in the top node
-create function check_estimated_rows(text) returns table (estimated int, actual int)
-language plpgsql as
-$$
-declare
- ln text;
- tmp text[];
- first_row bool := true;
-begin
- for ln in
- execute format('explain analyze %s', $1)
- loop
- if first_row then
- first_row := false;
- tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
- return query select tmp[1]::int, tmp[2]::int;
- end if;
- end loop;
-end;
-$$;
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
+ estimated | actual
+-----------+--------
+ 50 | 50
+(1 row)
+
-- MCV lists
CREATE TABLE mcv_lists (
filler1 TEXT,
0 | {1, 2, 3} | {f,f,f} | 1 | 1
(1 row)
-RESET random_page_cost;
-- mcv with arrays
CREATE TABLE mcv_lists_arrays (
a TEXT[],
-- We will be checking execution plans without/with statistics, so
-- let's make sure we get simple non-parallel plans. Also set the
-- work_mem low so that we can use small amounts of data.
-SET max_parallel_workers = 0;
-SET max_parallel_workers_per_gather = 0;
-SET work_mem = '128kB';
+
+-- check the number of estimated/actual rows in the top node
+create function check_estimated_rows(text) returns table (estimated int, actual int)
+language plpgsql as
+$$
+declare
+ ln text;
+ tmp text[];
+ first_row bool := true;
+begin
+ for ln in
+ execute format('explain analyze %s', $1)
+ loop
+ if first_row then
+ first_row := false;
+ tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
+ return query select tmp[1]::int, tmp[2]::int;
+ end if;
+ end loop;
+end;
+$$;
-- Verify failures
CREATE STATISTICS tst;
-- over-estimates when using only per-column statistics
INSERT INTO ndistinct (a, b, c, filler1)
SELECT i/100, i/100, i/100, cash_words((i/100)::money)
- FROM generate_series(1,30000) s(i);
+ FROM generate_series(1,1000) s(i);
ANALYZE ndistinct;
-- Group Aggregate, due to over-estimate of the number of groups
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c');
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
-- correct command
CREATE STATISTICS s10 ON a, b, c FROM ndistinct;
FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
-- Hash Aggregate, thanks to estimates improved by the statistic
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c');
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
-- last two plans keep using Group Aggregate, because 'd' is not covered
-- by the statistic and while it's NULL-only we assume 200 values for it
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
TRUNCATE TABLE ndistinct;
INSERT INTO ndistinct (a, b, c, filler1)
SELECT mod(i,50), mod(i,51), mod(i,32),
cash_words(mod(i,33)::int::money)
- FROM generate_series(1,10000) s(i);
+ FROM generate_series(1,5000) s(i);
ANALYZE ndistinct;
SELECT stxkind, stxndistinct
FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
--- plans using Group Aggregate, thanks to using correct esimates
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+-- correct esimates
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d');
DROP STATISTICS s10;
SELECT stxkind, stxndistinct
FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
--- dropping the statistics switches the plans to Hash Aggregate,
--- due to under-estimates
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+-- dropping the statistics results in under-estimates
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d');
-- functional dependencies tests
CREATE TABLE functional_dependencies (
d TEXT
);
-SET random_page_cost = 1.2;
-
CREATE INDEX fdeps_ab_idx ON functional_dependencies (a, b);
CREATE INDEX fdeps_abc_idx ON functional_dependencies (a, b, c);
ANALYZE functional_dependencies;
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
-- create statistics
CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies;
ANALYZE functional_dependencies;
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
-- a => b, a => c, b => c
TRUNCATE functional_dependencies;
ANALYZE functional_dependencies;
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
-- create statistics
CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies;
ANALYZE functional_dependencies;
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
-- check change of column type doesn't break it
ALTER TABLE functional_dependencies ALTER COLUMN c TYPE numeric;
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
ANALYZE functional_dependencies;
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
-
-RESET random_page_cost;
-
--- check the number of estimated/actual rows in the top node
-create function check_estimated_rows(text) returns table (estimated int, actual int)
-language plpgsql as
-$$
-declare
- ln text;
- tmp text[];
- first_row bool := true;
-begin
- for ln in
- execute format('explain analyze %s', $1)
- loop
- if first_row then
- first_row := false;
- tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
- return query select tmp[1]::int, tmp[2]::int;
- end if;
- end loop;
-end;
-$$;
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
-- MCV lists
CREATE TABLE mcv_lists (
SELECT m.* FROM pg_statistic_ext,
pg_mcv_list_items(stxmcv) m WHERE stxname = 'mcv_lists_stats';
-RESET random_page_cost;
-
-- mcv with arrays
CREATE TABLE mcv_lists_arrays (
a TEXT[],