Convert pre-existing stats_ext tests to new style

author Tomas Vondra <tomas.vondra@postgresql.org>

Mon, 15 Apr 2019 22:02:22 +0000 (00:02 +0200)

committer Tomas Vondra <tomas.vondra@postgresql.org>

Mon, 15 Apr 2019 22:02:22 +0000 (00:02 +0200)
author Tomas Vondra <tomas.vondra@postgresql.org>
Mon, 15 Apr 2019 22:02:22 +0000 (00:02 +0200)
committer Tomas Vondra <tomas.vondra@postgresql.org>
Mon, 15 Apr 2019 22:02:22 +0000 (00:02 +0200)
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out

index add968abec2a1cf0a56c61abccd096fc07b64d29..6dfca7a6069b5013b360d4a58d1665685fdfda3c 100644 (file)
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -2,9 +2,26 @@
  -- We will be checking execution plans without/with statistics, so
  -- let's make sure we get simple non-parallel plans. Also set the
  -- work_mem low so that we can use small amounts of data.
-SET max_parallel_workers = 0;
-SET max_parallel_workers_per_gather = 0;
-SET work_mem = '128kB';
+-- check the number of estimated/actual rows in the top node
+create function check_estimated_rows(text) returns table (estimated int, actual int)
+language plpgsql as
+$$
+declare
+    ln text;
+    tmp text[];
+    first_row bool := true;
+begin
+    for ln in
+        execute format('explain analyze %s', $1)
+    loop
+        if first_row then
+            first_row := false;
+            tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
+            return query select tmp[1]::int, tmp[2]::int;
+        end if;
+    end loop;
+end;
+$$;
  -- Verify failures
  CREATE STATISTICS tst;
  ERROR:  syntax error at or near ";"
@@ -146,191 +163,126 @@ CREATE TABLE ndistinct (
  -- over-estimates when using only per-column statistics
  INSERT INTO ndistinct (a, b, c, filler1)
       SELECT i/100, i/100, i/100, cash_words((i/100)::money)
-       FROM generate_series(1,30000) s(i);
+       FROM generate_series(1,1000) s(i);
  ANALYZE ndistinct;
  -- Group Aggregate, due to over-estimate of the number of groups
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
-            QUERY PLAN             
------------------------------------
- GroupAggregate
-   Group Key: a, b
-   ->  Sort
-         Sort Key: a, b
-         ->  Seq Scan on ndistinct
-(5 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
-            QUERY PLAN             
------------------------------------
- GroupAggregate
-   Group Key: b, c
-   ->  Sort
-         Sort Key: b, c
-         ->  Seq Scan on ndistinct
-(5 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
-            QUERY PLAN             
------------------------------------
- GroupAggregate
-   Group Key: a, b, c
-   ->  Sort
-         Sort Key: a, b, c
-         ->  Seq Scan on ndistinct
-(5 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
-            QUERY PLAN             
------------------------------------
- GroupAggregate
-   Group Key: a, b, c, d
-   ->  Sort
-         Sort Key: a, b, c, d
-         ->  Seq Scan on ndistinct
-(5 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
-            QUERY PLAN             
------------------------------------
- GroupAggregate
-   Group Key: b, c, d
-   ->  Sort
-         Sort Key: b, c, d
-         ->  Seq Scan on ndistinct
-(5 rows)
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual 
+-----------+--------
+       100 |     11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c');
+ estimated | actual 
+-----------+--------
+       100 |     11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
+ estimated | actual 
+-----------+--------
+       100 |     11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
+ estimated | actual 
+-----------+--------
+       200 |     11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
+ estimated | actual 
+-----------+--------
+       200 |     11
+(1 row)
  
  -- correct command
  CREATE STATISTICS s10 ON a, b, c FROM ndistinct;
  ANALYZE ndistinct;
  SELECT stxkind, stxndistinct
    FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
- stxkind |                      stxndistinct                       
----------+---------------------------------------------------------
- {d,f,m} | {"3, 4": 301, "3, 6": 301, "4, 6": 301, "3, 4, 6": 301}
+ stxkind |                    stxndistinct                     
+---------+-----------------------------------------------------
+ {d,f,m} | {"3, 4": 11, "3, 6": 11, "4, 6": 11, "3, 4, 6": 11}
  (1 row)
  
  -- Hash Aggregate, thanks to estimates improved by the statistic
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
-         QUERY PLAN          
------------------------------
- HashAggregate
-   Group Key: a, b
-   ->  Seq Scan on ndistinct
-(3 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
-         QUERY PLAN          
------------------------------
- HashAggregate
-   Group Key: b, c
-   ->  Seq Scan on ndistinct
-(3 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
-         QUERY PLAN          
------------------------------
- HashAggregate
-   Group Key: a, b, c
-   ->  Seq Scan on ndistinct
-(3 rows)
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual 
+-----------+--------
+        11 |     11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c');
+ estimated | actual 
+-----------+--------
+        11 |     11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
+ estimated | actual 
+-----------+--------
+        11 |     11
+(1 row)
  
  -- last two plans keep using Group Aggregate, because 'd' is not covered
  -- by the statistic and while it's NULL-only we assume 200 values for it
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
-            QUERY PLAN             
------------------------------------
- GroupAggregate
-   Group Key: a, b, c, d
-   ->  Sort
-         Sort Key: a, b, c, d
-         ->  Seq Scan on ndistinct
-(5 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
-            QUERY PLAN             
------------------------------------
- GroupAggregate
-   Group Key: b, c, d
-   ->  Sort
-         Sort Key: b, c, d
-         ->  Seq Scan on ndistinct
-(5 rows)
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
+ estimated | actual 
+-----------+--------
+       200 |     11
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
+ estimated | actual 
+-----------+--------
+       200 |     11
+(1 row)
  
  TRUNCATE TABLE ndistinct;
  -- under-estimates when using only per-column statistics
  INSERT INTO ndistinct (a, b, c, filler1)
       SELECT mod(i,50), mod(i,51), mod(i,32),
              cash_words(mod(i,33)::int::money)
-       FROM generate_series(1,10000) s(i);
+       FROM generate_series(1,5000) s(i);
  ANALYZE ndistinct;
  SELECT stxkind, stxndistinct
    FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
- stxkind |                        stxndistinct                         
----------+-------------------------------------------------------------
- {d,f,m} | {"3, 4": 2550, "3, 6": 800, "4, 6": 1632, "3, 4, 6": 10000}
-(1 row)
-
--- plans using Group Aggregate, thanks to using correct esimates
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
-            QUERY PLAN             
------------------------------------
- GroupAggregate
-   Group Key: a, b
-   ->  Sort
-         Sort Key: a, b
-         ->  Seq Scan on ndistinct
-(5 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
-            QUERY PLAN             
------------------------------------
- GroupAggregate
-   Group Key: a, b, c
-   ->  Sort
-         Sort Key: a, b, c
-         ->  Seq Scan on ndistinct
-(5 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
-            QUERY PLAN             
------------------------------------
- GroupAggregate
-   Group Key: a, b, c, d
-   ->  Sort
-         Sort Key: a, b, c, d
-         ->  Seq Scan on ndistinct
-(5 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
-         QUERY PLAN          
------------------------------
- HashAggregate
-   Group Key: b, c, d
-   ->  Seq Scan on ndistinct
-(3 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
-         QUERY PLAN          
------------------------------
- HashAggregate
-   Group Key: a, d
-   ->  Seq Scan on ndistinct
-(3 rows)
+ stxkind |                        stxndistinct                        
+---------+------------------------------------------------------------
+ {d,f,m} | {"3, 4": 2550, "3, 6": 800, "4, 6": 1632, "3, 4, 6": 5000}
+(1 row)
+
+-- correct esimates
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual 
+-----------+--------
+      2550 |   2550
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
+ estimated | actual 
+-----------+--------
+      5000 |   5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
+ estimated | actual 
+-----------+--------
+      5000 |   5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
+ estimated | actual 
+-----------+--------
+      1632 |   1632
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d');
+ estimated | actual 
+-----------+--------
+       500 |     50
+(1 row)
  
  DROP STATISTICS s10;
  SELECT stxkind, stxndistinct
@@ -339,52 +291,36 @@ SELECT stxkind, stxndistinct
  ---------+--------------
  (0 rows)
  
--- dropping the statistics switches the plans to Hash Aggregate,
--- due to under-estimates
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
-         QUERY PLAN          
------------------------------
- HashAggregate
-   Group Key: a, b
-   ->  Seq Scan on ndistinct
-(3 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
-         QUERY PLAN          
------------------------------
- HashAggregate
-   Group Key: a, b, c
-   ->  Seq Scan on ndistinct
-(3 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
-         QUERY PLAN          
------------------------------
- HashAggregate
-   Group Key: a, b, c, d
-   ->  Seq Scan on ndistinct
-(3 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
-         QUERY PLAN          
------------------------------
- HashAggregate
-   Group Key: b, c, d
-   ->  Seq Scan on ndistinct
-(3 rows)
-
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
-         QUERY PLAN          
------------------------------
- HashAggregate
-   Group Key: a, d
-   ->  Seq Scan on ndistinct
-(3 rows)
+-- dropping the statistics results in under-estimates
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
+ estimated | actual 
+-----------+--------
+       500 |   2550
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
+ estimated | actual 
+-----------+--------
+       500 |   5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
+ estimated | actual 
+-----------+--------
+       500 |   5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
+ estimated | actual 
+-----------+--------
+       500 |   1632
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d');
+ estimated | actual 
+-----------+--------
+       500 |     50
+(1 row)
  
  -- functional dependencies tests
  CREATE TABLE functional_dependencies (
@@ -396,51 +332,38 @@ CREATE TABLE functional_dependencies (
      c INT,
      d TEXT
  );
-SET random_page_cost = 1.2;
  CREATE INDEX fdeps_ab_idx ON functional_dependencies (a, b);
  CREATE INDEX fdeps_abc_idx ON functional_dependencies (a, b, c);
  -- random data (no functional dependencies)
  INSERT INTO functional_dependencies (a, b, c, filler1)
       SELECT mod(i, 23), mod(i, 29), mod(i, 31), i FROM generate_series(1,5000) s(i);
  ANALYZE functional_dependencies;
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
-                    QUERY PLAN                     
----------------------------------------------------
- Bitmap Heap Scan on functional_dependencies
-   Recheck Cond: ((a = 1) AND (b = '1'::text))
-   ->  Bitmap Index Scan on fdeps_abc_idx
-         Index Cond: ((a = 1) AND (b = '1'::text))
-(4 rows)
-
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
-                        QUERY PLAN                         
------------------------------------------------------------
- Index Scan using fdeps_abc_idx on functional_dependencies
-   Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1))
-(2 rows)
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
+ estimated | actual 
+-----------+--------
+         8 |      8
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
+ estimated | actual 
+-----------+--------
+         1 |      1
+(1 row)
  
  -- create statistics
  CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies;
  ANALYZE functional_dependencies;
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
-                    QUERY PLAN                     
----------------------------------------------------
- Bitmap Heap Scan on functional_dependencies
-   Recheck Cond: ((a = 1) AND (b = '1'::text))
-   ->  Bitmap Index Scan on fdeps_abc_idx
-         Index Cond: ((a = 1) AND (b = '1'::text))
-(4 rows)
-
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
-                        QUERY PLAN                         
------------------------------------------------------------
- Index Scan using fdeps_abc_idx on functional_dependencies
-   Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1))
-(2 rows)
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
+ estimated | actual 
+-----------+--------
+         8 |      8
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
+ estimated | actual 
+-----------+--------
+         1 |      1
+(1 row)
  
  -- a => b, a => c, b => c
  TRUNCATE functional_dependencies;
@@ -448,92 +371,48 @@ DROP STATISTICS func_deps_stat;
  INSERT INTO functional_dependencies (a, b, c, filler1)
       SELECT mod(i,100), mod(i,50), mod(i,25), i FROM generate_series(1,5000) s(i);
  ANALYZE functional_dependencies;
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
-                        QUERY PLAN                         
------------------------------------------------------------
- Index Scan using fdeps_abc_idx on functional_dependencies
-   Index Cond: ((a = 1) AND (b = '1'::text))
-(2 rows)
-
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
-                        QUERY PLAN                         
------------------------------------------------------------
- Index Scan using fdeps_abc_idx on functional_dependencies
-   Index Cond: ((a = 1) AND (b = '1'::text) AND (c = 1))
-(2 rows)
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
+ estimated | actual 
+-----------+--------
+         1 |     50
+(1 row)
  
  -- create statistics
  CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies;
  ANALYZE functional_dependencies;
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
-                    QUERY PLAN                     
----------------------------------------------------
- Bitmap Heap Scan on functional_dependencies
-   Recheck Cond: ((a = 1) AND (b = '1'::text))
-   ->  Bitmap Index Scan on fdeps_abc_idx
-         Index Cond: ((a = 1) AND (b = '1'::text))
-(4 rows)
-
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
-                    QUERY PLAN                     
----------------------------------------------------
- Bitmap Heap Scan on functional_dependencies
-   Recheck Cond: ((a = 1) AND (b = '1'::text))
-   Filter: (c = 1)
-   ->  Bitmap Index Scan on fdeps_ab_idx
-         Index Cond: ((a = 1) AND (b = '1'::text))
-(5 rows)
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
  
  -- check change of column type doesn't break it
  ALTER TABLE functional_dependencies ALTER COLUMN c TYPE numeric;
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
-                    QUERY PLAN                     
----------------------------------------------------
- Bitmap Heap Scan on functional_dependencies
-   Recheck Cond: ((a = 1) AND (b = '1'::text))
-   Filter: (c = '1'::numeric)
-   ->  Bitmap Index Scan on fdeps_ab_idx
-         Index Cond: ((a = 1) AND (b = '1'::text))
-(5 rows)
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
  
  ANALYZE functional_dependencies;
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
-                    QUERY PLAN                     
----------------------------------------------------
- Bitmap Heap Scan on functional_dependencies
-   Recheck Cond: ((a = 1) AND (b = '1'::text))
-   Filter: (c = '1'::numeric)
-   ->  Bitmap Index Scan on fdeps_ab_idx
-         Index Cond: ((a = 1) AND (b = '1'::text))
-(5 rows)
-
-RESET random_page_cost;
--- check the number of estimated/actual rows in the top node
-create function check_estimated_rows(text) returns table (estimated int, actual int)
-language plpgsql as
-$$
-declare
-    ln text;
-    tmp text[];
-    first_row bool := true;
-begin
-    for ln in
-        execute format('explain analyze %s', $1)
-    loop
-        if first_row then
-            first_row := false;
-            tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
-            return query select tmp[1]::int, tmp[2]::int;
-        end if;
-    end loop;
-end;
-$$;
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
+ estimated | actual 
+-----------+--------
+        50 |     50
+(1 row)
+
  -- MCV lists
  CREATE TABLE mcv_lists (
      filler1 TEXT,
@@ -728,7 +607,6 @@ SELECT m.* FROM pg_statistic_ext,
       0 | {1, 2, 3} | {f,f,f} |         1 |              1
  (1 row)
  
-RESET random_page_cost;
  -- mcv with arrays
  CREATE TABLE mcv_lists_arrays (
      a TEXT[],
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql

index d4b2732493adac4d369ec51e4d3bd9fe973511d6..c6a5776120e3f613e0d91ecb3df00abee528fcf6 100644 (file)
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -3,9 +3,27 @@
  -- We will be checking execution plans without/with statistics, so
  -- let's make sure we get simple non-parallel plans. Also set the
  -- work_mem low so that we can use small amounts of data.
-SET max_parallel_workers = 0;
-SET max_parallel_workers_per_gather = 0;
-SET work_mem = '128kB';
+
+-- check the number of estimated/actual rows in the top node
+create function check_estimated_rows(text) returns table (estimated int, actual int)
+language plpgsql as
+$$
+declare
+    ln text;
+    tmp text[];
+    first_row bool := true;
+begin
+    for ln in
+        execute format('explain analyze %s', $1)
+    loop
+        if first_row then
+            first_row := false;
+            tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
+            return query select tmp[1]::int, tmp[2]::int;
+        end if;
+    end loop;
+end;
+$$;
  
  -- Verify failures
  CREATE STATISTICS tst;
@@ -106,25 +124,20 @@ CREATE TABLE ndistinct (
  -- over-estimates when using only per-column statistics
  INSERT INTO ndistinct (a, b, c, filler1)
       SELECT i/100, i/100, i/100, cash_words((i/100)::money)
-       FROM generate_series(1,30000) s(i);
+       FROM generate_series(1,1000) s(i);
  
  ANALYZE ndistinct;
  
  -- Group Aggregate, due to over-estimate of the number of groups
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
  
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c');
  
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
  
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
  
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
  
  -- correct command
  CREATE STATISTICS s10 ON a, b, c FROM ndistinct;
@@ -135,22 +148,17 @@ SELECT stxkind, stxndistinct
    FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
  
  -- Hash Aggregate, thanks to estimates improved by the statistic
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
  
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c');
  
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
  
  -- last two plans keep using Group Aggregate, because 'd' is not covered
  -- by the statistic and while it's NULL-only we assume 200 values for it
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
  
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
  
  TRUNCATE TABLE ndistinct;
  
@@ -158,50 +166,39 @@ TRUNCATE TABLE ndistinct;
  INSERT INTO ndistinct (a, b, c, filler1)
       SELECT mod(i,50), mod(i,51), mod(i,32),
              cash_words(mod(i,33)::int::money)
-       FROM generate_series(1,10000) s(i);
+       FROM generate_series(1,5000) s(i);
  
  ANALYZE ndistinct;
  
  SELECT stxkind, stxndistinct
    FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
  
--- plans using Group Aggregate, thanks to using correct esimates
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+-- correct esimates
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
  
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
  
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
  
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
  
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d');
  
  DROP STATISTICS s10;
  
  SELECT stxkind, stxndistinct
    FROM pg_statistic_ext WHERE stxrelid = 'ndistinct'::regclass;
  
--- dropping the statistics switches the plans to Hash Aggregate,
--- due to under-estimates
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b;
+-- dropping the statistics results in under-estimates
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');
  
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c');
  
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d');
  
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d');
  
-EXPLAIN (COSTS off)
- SELECT COUNT(*) FROM ndistinct GROUP BY a, d;
+SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, d');
  
  -- functional dependencies tests
  CREATE TABLE functional_dependencies (
@@ -214,8 +211,6 @@ CREATE TABLE functional_dependencies (
      d TEXT
  );
  
-SET random_page_cost = 1.2;
-
  CREATE INDEX fdeps_ab_idx ON functional_dependencies (a, b);
  CREATE INDEX fdeps_abc_idx ON functional_dependencies (a, b, c);
  
@@ -225,22 +220,18 @@ INSERT INTO functional_dependencies (a, b, c, filler1)
  
  ANALYZE functional_dependencies;
  
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
  
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
  
  -- create statistics
  CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies;
  
  ANALYZE functional_dependencies;
  
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
  
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
  
  -- a => b, a => c, b => c
  TRUNCATE functional_dependencies;
@@ -251,56 +242,27 @@ INSERT INTO functional_dependencies (a, b, c, filler1)
  
  ANALYZE functional_dependencies;
  
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
  
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
  
  -- create statistics
  CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies;
  
  ANALYZE functional_dependencies;
  
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1';
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1''');
  
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
  
  -- check change of column type doesn't break it
  ALTER TABLE functional_dependencies ALTER COLUMN c TYPE numeric;
  
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
  
  ANALYZE functional_dependencies;
  
-EXPLAIN (COSTS OFF)
- SELECT * FROM functional_dependencies WHERE a = 1 AND b = '1' AND c = 1;
-
-RESET random_page_cost;
-
--- check the number of estimated/actual rows in the top node
-create function check_estimated_rows(text) returns table (estimated int, actual int)
-language plpgsql as
-$$
-declare
-    ln text;
-    tmp text[];
-    first_row bool := true;
-begin
-    for ln in
-        execute format('explain analyze %s', $1)
-    loop
-        if first_row then
-            first_row := false;
-            tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
-            return query select tmp[1]::int, tmp[2]::int;
-        end if;
-    end loop;
-end;
-$$;
+SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'' AND c = 1');
  
  -- MCV lists
  CREATE TABLE mcv_lists (
@@ -419,8 +381,6 @@ ANALYZE mcv_lists;
  SELECT m.* FROM pg_statistic_ext,
                pg_mcv_list_items(stxmcv) m WHERE stxname = 'mcv_lists_stats';
  
-RESET random_page_cost;
-
  -- mcv with arrays
  CREATE TABLE mcv_lists_arrays (
      a TEXT[],
author	Tomas Vondra <tomas.vondra@postgresql.org>
	Mon, 15 Apr 2019 22:02:22 +0000 (00:02 +0200)
committer	Tomas Vondra <tomas.vondra@postgresql.org>
	Mon, 15 Apr 2019 22:02:22 +0000 (00:02 +0200)
src/test/regress/expected/stats_ext.out		patch \| blob \| blame \| history
src/test/regress/sql/stats_ext.sql		patch \| blob \| blame \| history