summaryrefslogtreecommitdiff
path: root/src/test
diff options
context:
space:
mode:
authorDavid Rowley2022-12-22 23:43:52 +0000
committerDavid Rowley2022-12-22 23:43:52 +0000
commited1a88ddaccfe883e4cf74d30319accfeae6cfe5 (patch)
treeb3c2e52d5d70bc20b6fb9a1737f8647b4815934d /src/test
parentcc150596341e2a7913519769a88a1537c2e94720 (diff)
Allow window functions to adjust their frameOptions
WindowFuncs such as row_number() don't care if it's called with ROWS UNBOUNDED PRECEDING AND CURRENT ROW or with RANGE UNBOUNDED PRECEDING AND CURRENT ROW. The latter is less efficient as the RANGE option requires that the executor check for peer rows, so using the ROW option instead would cause less overhead. Because RANGE is part of the default frame options for WindowClauses, it means WindowAgg is, by default, working much harder than it needs to for window functions where the ROWS / RANGE option has no effect on the window function's result. On a test query from the discussion thread, a performance improvement of 344% was seen by using ROWS instead of RANGE. Here we add a new support function node type to allow support functions to be called for window functions so that the most optimal version of the frame options can be set. The planner has been adjusted so that the frame options are changed only if all window functions sharing the same window clause agree on what the optimized frame options are. Here we give the ability for row_number(), rank(), dense_rank(), percent_rank(), cume_dist() and ntile() to alter their WindowClause's frameOptions. Reviewed-by: Vik Fearing, Erwin Brandstetter, Zhihong Yu Discussion: https://postgr.es/m/CAGHENJ7LBBszxS+SkWWFVnBmOT2oVsBhDMB1DFrgerCeYa_DyA@mail.gmail.com Discussion: https://postgr.es/m/CAApHDvohAKEtTXxq7Pc-ic2dKT8oZfbRKeEJP64M0B6+S88z+A@mail.gmail.com
Diffstat (limited to 'src/test')
-rw-r--r--src/test/regress/expected/window.out73
-rw-r--r--src/test/regress/sql/window.sql42
2 files changed, 115 insertions, 0 deletions
diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out
index 170bea23c28..776861808bc 100644
--- a/src/test/regress/expected/window.out
+++ b/src/test/regress/expected/window.out
@@ -3312,6 +3312,79 @@ FROM empsalary GROUP BY depname;
14600 | 3 | | sales
(3 rows)
+--
+-- Test SupportRequestOptimizeWindowClause's ability to de-duplicate
+-- WindowClauses
+--
+-- Ensure WindowClause frameOptions are changed so that only a single
+-- WindowAgg exists in the plan.
+EXPLAIN (COSTS OFF)
+SELECT
+ empno,
+ depname,
+ row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn,
+ rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN
+ UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk,
+ dense_rank() OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN
+ CURRENT ROW AND CURRENT ROW) drnk
+FROM empsalary;
+ QUERY PLAN
+----------------------------------------
+ WindowAgg
+ -> Sort
+ Sort Key: depname, enroll_date
+ -> Seq Scan on empsalary
+(4 rows)
+
+-- Ensure WindowFuncs which cannot support their WindowClause's frameOptions
+-- being changed are untouched
+EXPLAIN (COSTS OFF, VERBOSE)
+SELECT
+ empno,
+ depname,
+ row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn,
+ rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN
+ UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk,
+ count(*) OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN
+ CURRENT ROW AND CURRENT ROW) cnt
+FROM empsalary;
+ QUERY PLAN
+------------------------------------------------------------------------------------------------------
+ WindowAgg
+ Output: empno, depname, (row_number() OVER (?)), (rank() OVER (?)), count(*) OVER (?), enroll_date
+ -> WindowAgg
+ Output: depname, enroll_date, empno, row_number() OVER (?), rank() OVER (?)
+ -> Sort
+ Output: depname, enroll_date, empno
+ Sort Key: empsalary.depname, empsalary.enroll_date
+ -> Seq Scan on pg_temp.empsalary
+ Output: depname, enroll_date, empno
+(9 rows)
+
+-- Ensure the above query gives us the expected results
+SELECT
+ empno,
+ depname,
+ row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn,
+ rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN
+ UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk,
+ count(*) OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN
+ CURRENT ROW AND CURRENT ROW) cnt
+FROM empsalary;
+ empno | depname | rn | rnk | cnt
+-------+-----------+----+-----+-----
+ 8 | develop | 1 | 1 | 1
+ 10 | develop | 2 | 2 | 1
+ 11 | develop | 3 | 3 | 1
+ 9 | develop | 4 | 4 | 2
+ 7 | develop | 5 | 4 | 2
+ 2 | personnel | 1 | 1 | 1
+ 5 | personnel | 2 | 2 | 1
+ 1 | sales | 1 | 1 | 1
+ 3 | sales | 2 | 2 | 1
+ 4 | sales | 3 | 3 | 1
+(10 rows)
+
-- Test pushdown of quals into a subquery containing window functions
-- pushdown is safe because all PARTITION BY clauses include depname:
EXPLAIN (COSTS OFF)
diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql
index 1138453131e..deaf2217a63 100644
--- a/src/test/regress/sql/window.sql
+++ b/src/test/regress/sql/window.sql
@@ -972,6 +972,48 @@ SELECT sum(salary), row_number() OVER (ORDER BY depname), sum(
depname
FROM empsalary GROUP BY depname;
+--
+-- Test SupportRequestOptimizeWindowClause's ability to de-duplicate
+-- WindowClauses
+--
+
+-- Ensure WindowClause frameOptions are changed so that only a single
+-- WindowAgg exists in the plan.
+EXPLAIN (COSTS OFF)
+SELECT
+ empno,
+ depname,
+ row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn,
+ rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN
+ UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk,
+ dense_rank() OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN
+ CURRENT ROW AND CURRENT ROW) drnk
+FROM empsalary;
+
+-- Ensure WindowFuncs which cannot support their WindowClause's frameOptions
+-- being changed are untouched
+EXPLAIN (COSTS OFF, VERBOSE)
+SELECT
+ empno,
+ depname,
+ row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn,
+ rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN
+ UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk,
+ count(*) OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN
+ CURRENT ROW AND CURRENT ROW) cnt
+FROM empsalary;
+
+-- Ensure the above query gives us the expected results
+SELECT
+ empno,
+ depname,
+ row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn,
+ rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN
+ UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk,
+ count(*) OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN
+ CURRENT ROW AND CURRENT ROW) cnt
+FROM empsalary;
+
-- Test pushdown of quals into a subquery containing window functions
-- pushdown is safe because all PARTITION BY clauses include depname: