Have the planner consider Incremental Sort for DISTINCT

Prior to this, we only considered a full sort on the cheapest input path and uniquifying any path which was already sorted in the required sort order. Here we adjust create_final_distinct_paths() so that it also adds an Incremental Sort path on any path which has presorted keys. Additionally, this adjusts the parallel distinct code so that we now consider sorting the cheapest partial path and incrementally sorting any partial paths with presorted keys. Previously we didn't consider any sorting for parallel distinct and only added a unique path atop any path which had the required pathkeys already. Author: David Rowley Reviewed-by: Richard Guo Discussion: https://postgr.es/m/CAApHDvo8Lz2H=42urBbfP65LTcEUOh288MT7DsG2_EWtW1AXHQ@mail.gmail.com
author: David Rowley 2023-01-10 21:25:43 +0000
committer: David Rowley 2023-01-10 21:25:43 +0000
commit: 3c6fc58209f24b959ee18f5d19ef96403d08f15c (patch)
tree: 6c27d7704fa2bc60554f1383c787ada232c6837c /src/test
parent: e5b8a4c098ad6add39626a14475148872cd687e0 (diff)
4 files changed, 44 insertions, 18 deletions
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index 1a1e8b2365b..0c3433f8e58 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1484,15 +1484,16 @@ explain (costs off) select * from t union select * from t order by 1,3;
 -- Full sort, not just incremental sort can be pushed below a gather merge path
 -- by generate_useful_gather_paths.
 explain (costs off) select distinct a,b from t;
-                QUERY PLAN                
-------------------------------------------
+                   QUERY PLAN                   
+------------------------------------------------
  Unique
    ->  Gather Merge
          Workers Planned: 2
-         ->  Sort
-               Sort Key: a, b
-               ->  Parallel Seq Scan on t
-(6 rows)
+         ->  Unique
+               ->  Sort
+                     Sort Key: a, b
+                     ->  Parallel Seq Scan on t
+(7 rows)
 
 drop table t;
 -- Sort pushdown can't go below where expressions are part of the rel target.
diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out
index 6ce889d87c1..1fc07f220fb 100644
--- a/src/test/regress/expected/select_distinct.out
+++ b/src/test/regress/expected/select_distinct.out
@@ -171,6 +171,20 @@ SELECT DISTINCT g%1000 FROM generate_series(0,9999) g;
 SET jit_above_cost TO DEFAULT;
 CREATE TABLE distinct_group_2 AS
 SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
+SET enable_seqscan = 0;
+-- Check to see we get an incremental sort plan
+EXPLAIN (costs off)
+SELECT DISTINCT hundred, two FROM tenk1;
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Unique
+   ->  Incremental Sort
+         Sort Key: hundred, two
+         Presorted Key: hundred
+         ->  Index Scan using tenk1_hundred on tenk1
+(5 rows)
+
+RESET enable_seqscan;
 SET enable_hashagg=TRUE;
 -- Produce results with hash aggregation.
 SET enable_sort=FALSE;
@@ -265,15 +279,16 @@ $$ LANGUAGE plpgsql PARALLEL SAFE;
 -- Ensure we do parallel distinct now that the function is parallel safe
 EXPLAIN (COSTS OFF)
 SELECT DISTINCT distinct_func(1) FROM tenk1;
-                  QUERY PLAN                  
-----------------------------------------------
+                     QUERY PLAN                     
+----------------------------------------------------
  Unique
-   ->  Sort
-         Sort Key: (distinct_func(1))
-         ->  Gather
-               Workers Planned: 2
-               ->  Parallel Seq Scan on tenk1
-(6 rows)
+   ->  Gather Merge
+         Workers Planned: 2
+         ->  Unique
+               ->  Sort
+                     Sort Key: (distinct_func(1))
+                     ->  Parallel Seq Scan on tenk1
+(7 rows)
 
 RESET max_parallel_workers_per_gather;
 RESET min_parallel_table_scan_size;
diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out
index 90e89fb5b68..b2c6605e60c 100644
--- a/src/test/regress/expected/window.out
+++ b/src/test/regress/expected/window.out
@@ -3944,8 +3944,9 @@ ORDER BY depname, enroll_date;
                                           QUERY PLAN                                           
 -----------------------------------------------------------------------------------------------
  Unique
-   ->  Sort
+   ->  Incremental Sort
          Sort Key: depname, enroll_date, empno, (sum(salary) OVER (?)), (min(salary) OVER (?))
+         Presorted Key: depname, enroll_date
          ->  WindowAgg
                ->  Incremental Sort
                      Sort Key: depname, enroll_date
@@ -3954,7 +3955,7 @@ ORDER BY depname, enroll_date;
                            ->  Sort
                                  Sort Key: depname, empno
                                  ->  Seq Scan on empsalary
-(11 rows)
+(12 rows)
 
 -- As above but adjust the ORDER BY clause to help ensure the plan with the
 -- minimum amount of sorting wasn't a fluke.
@@ -3970,8 +3971,9 @@ ORDER BY depname, empno;
                                           QUERY PLAN                                           
 -----------------------------------------------------------------------------------------------
  Unique
-   ->  Sort
+   ->  Incremental Sort
          Sort Key: depname, empno, enroll_date, (sum(salary) OVER (?)), (min(salary) OVER (?))
+         Presorted Key: depname, empno
          ->  WindowAgg
                ->  Incremental Sort
                      Sort Key: depname, empno
@@ -3980,7 +3982,7 @@ ORDER BY depname, empno;
                            ->  Sort
                                  Sort Key: depname, enroll_date
                                  ->  Seq Scan on empsalary
-(11 rows)
+(12 rows)
 
 RESET enable_hashagg;
 -- Test Sort node reordering
diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql
index 34020adad1d..1643526d991 100644
--- a/src/test/regress/sql/select_distinct.sql
+++ b/src/test/regress/sql/select_distinct.sql
@@ -69,6 +69,14 @@ SET jit_above_cost TO DEFAULT;
 CREATE TABLE distinct_group_2 AS
 SELECT DISTINCT (g%1000)::text FROM generate_series(0,9999) g;
 
+SET enable_seqscan = 0;
+
+-- Check to see we get an incremental sort plan
+EXPLAIN (costs off)
+SELECT DISTINCT hundred, two FROM tenk1;
+
+RESET enable_seqscan;
+
 SET enable_hashagg=TRUE;
 
 -- Produce results with hash aggregation.
author	David Rowley	2023-01-10 21:25:43 +0000
committer	David Rowley	2023-01-10 21:25:43 +0000
commit	3c6fc58209f24b959ee18f5d19ef96403d08f15c (patch)
tree	6c27d7704fa2bc60554f1383c787ada232c6837c /src/test
parent	e5b8a4c098ad6add39626a14475148872cd687e0 (diff)