diff options
| author | David Rowley | 2022-12-22 23:43:52 +0000 |
|---|---|---|
| committer | David Rowley | 2022-12-22 23:43:52 +0000 |
| commit | ed1a88ddaccfe883e4cf74d30319accfeae6cfe5 (patch) | |
| tree | b3c2e52d5d70bc20b6fb9a1737f8647b4815934d /src/test | |
| parent | cc150596341e2a7913519769a88a1537c2e94720 (diff) | |
Allow window functions to adjust their frameOptions
WindowFuncs such as row_number() don't care if it's called with ROWS
UNBOUNDED PRECEDING AND CURRENT ROW or with RANGE UNBOUNDED PRECEDING AND
CURRENT ROW. The latter is less efficient as the RANGE option requires
that the executor check for peer rows, so using the ROW option instead
would cause less overhead. Because RANGE is part of the default frame
options for WindowClauses, it means WindowAgg is, by default, working much
harder than it needs to for window functions where the ROWS / RANGE option
has no effect on the window function's result.
On a test query from the discussion thread, a performance improvement of
344% was seen by using ROWS instead of RANGE.
Here we add a new support function node type to allow support functions to
be called for window functions so that the most optimal version of the
frame options can be set. The planner has been adjusted so that the frame
options are changed only if all window functions sharing the same window
clause agree on what the optimized frame options are.
Here we give the ability for row_number(), rank(), dense_rank(),
percent_rank(), cume_dist() and ntile() to alter their WindowClause's
frameOptions.
Reviewed-by: Vik Fearing, Erwin Brandstetter, Zhihong Yu
Discussion: https://postgr.es/m/CAGHENJ7LBBszxS+SkWWFVnBmOT2oVsBhDMB1DFrgerCeYa_DyA@mail.gmail.com
Discussion: https://postgr.es/m/CAApHDvohAKEtTXxq7Pc-ic2dKT8oZfbRKeEJP64M0B6+S88z+A@mail.gmail.com
Diffstat (limited to 'src/test')
| -rw-r--r-- | src/test/regress/expected/window.out | 73 | ||||
| -rw-r--r-- | src/test/regress/sql/window.sql | 42 |
2 files changed, 115 insertions, 0 deletions
diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out index 170bea23c28..776861808bc 100644 --- a/src/test/regress/expected/window.out +++ b/src/test/regress/expected/window.out @@ -3312,6 +3312,79 @@ FROM empsalary GROUP BY depname; 14600 | 3 | | sales (3 rows) +-- +-- Test SupportRequestOptimizeWindowClause's ability to de-duplicate +-- WindowClauses +-- +-- Ensure WindowClause frameOptions are changed so that only a single +-- WindowAgg exists in the plan. +EXPLAIN (COSTS OFF) +SELECT + empno, + depname, + row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn, + rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN + UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk, + dense_rank() OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN + CURRENT ROW AND CURRENT ROW) drnk +FROM empsalary; + QUERY PLAN +---------------------------------------- + WindowAgg + -> Sort + Sort Key: depname, enroll_date + -> Seq Scan on empsalary +(4 rows) + +-- Ensure WindowFuncs which cannot support their WindowClause's frameOptions +-- being changed are untouched +EXPLAIN (COSTS OFF, VERBOSE) +SELECT + empno, + depname, + row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn, + rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN + UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk, + count(*) OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN + CURRENT ROW AND CURRENT ROW) cnt +FROM empsalary; + QUERY PLAN +------------------------------------------------------------------------------------------------------ + WindowAgg + Output: empno, depname, (row_number() OVER (?)), (rank() OVER (?)), count(*) OVER (?), enroll_date + -> WindowAgg + Output: depname, enroll_date, empno, row_number() OVER (?), rank() OVER (?) + -> Sort + Output: depname, enroll_date, empno + Sort Key: empsalary.depname, empsalary.enroll_date + -> Seq Scan on pg_temp.empsalary + Output: depname, enroll_date, empno +(9 rows) + +-- Ensure the above query gives us the expected results +SELECT + empno, + depname, + row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn, + rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN + UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk, + count(*) OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN + CURRENT ROW AND CURRENT ROW) cnt +FROM empsalary; + empno | depname | rn | rnk | cnt +-------+-----------+----+-----+----- + 8 | develop | 1 | 1 | 1 + 10 | develop | 2 | 2 | 1 + 11 | develop | 3 | 3 | 1 + 9 | develop | 4 | 4 | 2 + 7 | develop | 5 | 4 | 2 + 2 | personnel | 1 | 1 | 1 + 5 | personnel | 2 | 2 | 1 + 1 | sales | 1 | 1 | 1 + 3 | sales | 2 | 2 | 1 + 4 | sales | 3 | 3 | 1 +(10 rows) + -- Test pushdown of quals into a subquery containing window functions -- pushdown is safe because all PARTITION BY clauses include depname: EXPLAIN (COSTS OFF) diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql index 1138453131e..deaf2217a63 100644 --- a/src/test/regress/sql/window.sql +++ b/src/test/regress/sql/window.sql @@ -972,6 +972,48 @@ SELECT sum(salary), row_number() OVER (ORDER BY depname), sum( depname FROM empsalary GROUP BY depname; +-- +-- Test SupportRequestOptimizeWindowClause's ability to de-duplicate +-- WindowClauses +-- + +-- Ensure WindowClause frameOptions are changed so that only a single +-- WindowAgg exists in the plan. +EXPLAIN (COSTS OFF) +SELECT + empno, + depname, + row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn, + rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN + UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk, + dense_rank() OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN + CURRENT ROW AND CURRENT ROW) drnk +FROM empsalary; + +-- Ensure WindowFuncs which cannot support their WindowClause's frameOptions +-- being changed are untouched +EXPLAIN (COSTS OFF, VERBOSE) +SELECT + empno, + depname, + row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn, + rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN + UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk, + count(*) OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN + CURRENT ROW AND CURRENT ROW) cnt +FROM empsalary; + +-- Ensure the above query gives us the expected results +SELECT + empno, + depname, + row_number() OVER (PARTITION BY depname ORDER BY enroll_date) rn, + rank() OVER (PARTITION BY depname ORDER BY enroll_date ROWS BETWEEN + UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk, + count(*) OVER (PARTITION BY depname ORDER BY enroll_date RANGE BETWEEN + CURRENT ROW AND CURRENT ROW) cnt +FROM empsalary; + -- Test pushdown of quals into a subquery containing window functions -- pushdown is safe because all PARTITION BY clauses include depname: |
