diff options
| author | Tomas Vondra | 2022-03-30 22:09:11 +0000 |
|---|---|---|
| committer | Tomas Vondra | 2022-03-30 23:13:33 +0000 |
| commit | db0d67db2401eb6238ccc04c6407a4fd4f985832 (patch) | |
| tree | a1956b9a26f48b06e4c3a07d860645b0b6e12eb8 /src/test | |
| parent | 606948b058dc16bce494270eea577011a602810e (diff) | |
Optimize order of GROUP BY keys
When evaluating a query with a multi-column GROUP BY clause using sort,
the cost may be heavily dependent on the order in which the keys are
compared when building the groups. Grouping does not imply any ordering,
so we're allowed to compare the keys in arbitrary order, and a Hash Agg
leverages this. But for Group Agg, we simply compared keys in the order
as specified in the query. This commit explores alternative ordering of
the keys, trying to find a cheaper one.
In principle, we might generate grouping paths for all permutations of
the keys, and leave the rest to the optimizer. But that might get very
expensive, so we try to pick only a couple interesting orderings based
on both local and global information.
When planning the grouping path, we explore statistics (number of
distinct values, cost of the comparison function) for the keys and
reorder them to minimize comparison costs. Intuitively, it may be better
to perform more expensive comparisons (for complex data types etc.)
last, because maybe the cheaper comparisons will be enough. Similarly,
the higher the cardinality of a key, the lower the probability we’ll
need to compare more keys. The patch generates and costs various
orderings, picking the cheapest ones.
The ordering of group keys may interact with other parts of the query,
some of which may not be known while planning the grouping. E.g. there
may be an explicit ORDER BY clause, or some other ordering-dependent
operation, higher up in the query, and using the same ordering may allow
using either incremental sort or even eliminate the sort entirely.
The patch generates orderings and picks those minimizing the comparison
cost (for various pathkeys), and then adds orderings that might be
useful for operations higher up in the plan (ORDER BY, etc.). Finally,
it always keeps the ordering specified in the query, on the assumption
the user might have additional insights.
This introduces a new GUC enable_group_by_reordering, so that the
optimization may be disabled if needed.
The original patch was proposed by Teodor Sigaev, and later improved and
reworked by Dmitry Dolgov. Reviews by a number of people, including me,
Andrey Lepikhov, Claudio Freire, Ibrar Ahmed and Zhihong Yu.
Author: Dmitry Dolgov, Teodor Sigaev, Tomas Vondra
Reviewed-by: Tomas Vondra, Andrey Lepikhov, Claudio Freire, Ibrar Ahmed, Zhihong Yu
Discussion: https://postgr.es/m/7c79e6a5-8597-74e8-0671-1c39d124c9d6%40sigaev.ru
Discussion: https://postgr.es/m/CA%2Bq6zcW_4o2NC0zutLkOJPsFt80megSpX_dVRo6GK9PC-Jx_Ag%40mail.gmail.com
Diffstat (limited to 'src/test')
| -rw-r--r-- | src/test/regress/expected/aggregates.out | 244 | ||||
| -rw-r--r-- | src/test/regress/expected/incremental_sort.out | 2 | ||||
| -rw-r--r-- | src/test/regress/expected/join.out | 51 | ||||
| -rw-r--r-- | src/test/regress/expected/partition_aggregate.out | 136 | ||||
| -rw-r--r-- | src/test/regress/expected/partition_join.out | 75 | ||||
| -rw-r--r-- | src/test/regress/expected/sysviews.out | 3 | ||||
| -rw-r--r-- | src/test/regress/expected/union.out | 60 | ||||
| -rw-r--r-- | src/test/regress/sql/aggregates.sql | 99 | ||||
| -rw-r--r-- | src/test/regress/sql/incremental_sort.sql | 2 |
9 files changed, 491 insertions, 181 deletions
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 0a23a39aa29..601047fa3dd 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -1210,7 +1210,8 @@ explain (costs off) select distinct min(f1), max(f1) from minmaxtest; QUERY PLAN --------------------------------------------------------------------------------------------- - Unique + HashAggregate + Group Key: $0, $1 InitPlan 1 (returns $0) -> Limit -> Merge Append @@ -1233,10 +1234,8 @@ explain (costs off) -> Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8 Index Cond: (f1 IS NOT NULL) -> Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9 - -> Sort - Sort Key: ($0), ($1) - -> Result -(26 rows) + -> Result +(25 rows) select distinct min(f1), max(f1) from minmaxtest; min | max @@ -2448,6 +2447,241 @@ SELECT balk(hundred) FROM tenk1; (1 row) ROLLBACK; +-- GROUP BY optimization by reorder columns +SELECT + i AS id, + i/2 AS p, + format('%60s', i%2) AS v, + i/4 AS c, + i/8 AS d, + (random() * (10000/8))::int as e --the same as d but no correlation with p + INTO btg +FROM + generate_series(1, 10000) i; +VACUUM btg; +ANALYZE btg; +-- GROUP BY optimization by reorder columns by frequency +SET enable_hashagg=off; +SET max_parallel_workers= 0; +SET max_parallel_workers_per_gather = 0; +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, v; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: p, v + -> Sort + Sort Key: p, v + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: p, v + -> Sort + Sort Key: p, v + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: p, c, v + -> Sort + Sort Key: p, c, v + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: v, p, c + -> Sort + Sort Key: v, p, c + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, d, c; + QUERY PLAN +------------------------------ + GroupAggregate + Group Key: p, d, c, v + -> Sort + Sort Key: p, d, c, v + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c; + QUERY PLAN +------------------------------ + GroupAggregate + Group Key: v, p, d, c + -> Sort + Sort Key: v, p, d, c + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c; + QUERY PLAN +------------------------------ + GroupAggregate + Group Key: p, v, d, c + -> Sort + Sort Key: p, v, d, c + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, d, e; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: p, d, e + -> Sort + Sort Key: p, d, e + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, e, d; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: p, e, d + -> Sort + Sort Key: p, e, d + -> Seq Scan on btg +(5 rows) + +CREATE STATISTICS btg_dep ON d, e, p FROM btg; +ANALYZE btg; +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, d, e; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: p, d, e + -> Sort + Sort Key: p, d, e + -> Seq Scan on btg +(5 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, e, d; + QUERY PLAN +----------------------------- + GroupAggregate + Group Key: p, e, d + -> Sort + Sort Key: p, e, d + -> Seq Scan on btg +(5 rows) + +-- GROUP BY optimization by reorder columns by index scan +CREATE INDEX ON btg(p, v); +SET enable_seqscan=off; +SET enable_bitmapscan=off; +VACUUM btg; +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, v; + QUERY PLAN +------------------------------------------------ + GroupAggregate + Group Key: p, v + -> Index Only Scan using btg_p_v_idx on btg +(3 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v; + QUERY PLAN +------------------------------------------------ + GroupAggregate + Group Key: p, v + -> Index Only Scan using btg_p_v_idx on btg +(3 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p; + QUERY PLAN +------------------------------------------------ + GroupAggregate + Group Key: p, v + -> Index Only Scan using btg_p_v_idx on btg +(3 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v; + QUERY PLAN +------------------------------------------------ + GroupAggregate + Group Key: p, v + -> Index Only Scan using btg_p_v_idx on btg +(3 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c; + QUERY PLAN +------------------------------------------------- + GroupAggregate + Group Key: p, c, v + -> Incremental Sort + Sort Key: p, c, v + Presorted Key: p + -> Index Scan using btg_p_v_idx on btg +(6 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v; + QUERY PLAN +------------------------------------------------- + GroupAggregate + Group Key: p, v, c + -> Incremental Sort + Sort Key: p, v, c + Presorted Key: p, v + -> Index Scan using btg_p_v_idx on btg +(6 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, c, p, d; + QUERY PLAN +------------------------------------------------- + GroupAggregate + Group Key: p, c, d, v + -> Incremental Sort + Sort Key: p, c, d, v + Presorted Key: p + -> Index Scan using btg_p_v_idx on btg +(6 rows) + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v; + QUERY PLAN +------------------------------------------------- + GroupAggregate + Group Key: p, v, c, d + -> Incremental Sort + Sort Key: p, v, c, d + Presorted Key: p, v + -> Index Scan using btg_p_v_idx on btg +(6 rows) + +DROP TABLE btg; +RESET enable_hashagg; +RESET max_parallel_workers; +RESET max_parallel_workers_per_gather; +RESET enable_seqscan; +RESET enable_bitmapscan; -- Secondly test the case of a parallel aggregate combiner function -- returning NULL. For that use normal transition function, but a -- combiner function returning NULL. diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out index 545e301e482..21c429226f7 100644 --- a/src/test/regress/expected/incremental_sort.out +++ b/src/test/regress/expected/incremental_sort.out @@ -1439,7 +1439,7 @@ set parallel_setup_cost = 0; set parallel_tuple_cost = 0; set max_parallel_workers_per_gather = 2; create table t (a int, b int, c int); -insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i); +insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i); create index on t (a); analyze t; set enable_incremental_sort = off; diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 19caebabd01..bf1a2db2cf0 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -1984,8 +1984,8 @@ USING (name); ------+----+---- bb | 12 | 13 cc | 22 | 23 - dd | | 33 ee | 42 | + dd | | 33 (4 rows) -- Cases with non-nullable expressions in subquery results; @@ -2019,8 +2019,8 @@ NATURAL FULL JOIN ------+------+------+------+------ bb | 12 | 2 | 13 | 3 cc | 22 | 2 | 23 | 3 - dd | | | 33 | 3 ee | 42 | 2 | | + dd | | | 33 | 3 (4 rows) SELECT * FROM @@ -4618,18 +4618,20 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s explain (costs off) select d.* from d left join (select distinct * from b) s on d.a = s.id; - QUERY PLAN --------------------------------------- - Merge Right Join - Merge Cond: (b.id = d.a) - -> Unique - -> Sort - Sort Key: b.id, b.c_id - -> Seq Scan on b + QUERY PLAN +--------------------------------------------- + Merge Left Join + Merge Cond: (d.a = s.id) -> Sort Sort Key: d.a -> Seq Scan on d -(9 rows) + -> Sort + Sort Key: s.id + -> Subquery Scan on s + -> HashAggregate + Group Key: b.id, b.c_id + -> Seq Scan on b +(11 rows) -- check join removal works when uniqueness of the join condition is enforced -- by a UNION @@ -6336,44 +6338,39 @@ select * from j1 natural join j2; explain (verbose, costs off) select * from j1 inner join (select distinct id from j3) j3 on j1.id = j3.id; - QUERY PLAN ------------------------------------------ + QUERY PLAN +----------------------------------- Nested Loop Output: j1.id, j3.id Inner Unique: true Join Filter: (j1.id = j3.id) - -> Unique + -> HashAggregate Output: j3.id - -> Sort + Group Key: j3.id + -> Seq Scan on public.j3 Output: j3.id - Sort Key: j3.id - -> Seq Scan on public.j3 - Output: j3.id -> Seq Scan on public.j1 Output: j1.id -(13 rows) +(11 rows) -- ensure group by clause allows the inner to become unique explain (verbose, costs off) select * from j1 inner join (select id from j3 group by id) j3 on j1.id = j3.id; - QUERY PLAN ------------------------------------------ + QUERY PLAN +----------------------------------- Nested Loop Output: j1.id, j3.id Inner Unique: true Join Filter: (j1.id = j3.id) - -> Group + -> HashAggregate Output: j3.id Group Key: j3.id - -> Sort + -> Seq Scan on public.j3 Output: j3.id - Sort Key: j3.id - -> Seq Scan on public.j3 - Output: j3.id -> Seq Scan on public.j1 Output: j1.id -(14 rows) +(11 rows) drop table j1; drop table j2; diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out index dfa4b036b52..a08a3825ff6 100644 --- a/src/test/regress/expected/partition_aggregate.out +++ b/src/test/regress/expected/partition_aggregate.out @@ -952,32 +952,30 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA -------------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c)) - -> Gather - Workers Planned: 2 - -> Parallel Append - -> GroupAggregate - Group Key: pagg_tab_ml.a - Filter: (avg(pagg_tab_ml.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml.a - -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml - -> GroupAggregate - Group Key: pagg_tab_ml_5.a - Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml_5.a - -> Append - -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 - -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 - -> GroupAggregate - Group Key: pagg_tab_ml_2.a - Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml_2.a - -> Append - -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 - -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 -(27 rows) + -> Append + -> GroupAggregate + Group Key: pagg_tab_ml.a + Filter: (avg(pagg_tab_ml.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml.a + -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml + -> GroupAggregate + Group Key: pagg_tab_ml_2.a + Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_2.a + -> Append + -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 + -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 + -> GroupAggregate + Group Key: pagg_tab_ml_5.a + Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_5.a + -> Append + -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 + -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 +(25 rows) SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; a | sum | array_agg | count @@ -996,34 +994,32 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA -- Without ORDER BY clause, to test Gather at top-most path EXPLAIN (COSTS OFF) SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3; - QUERY PLAN ---------------------------------------------------------------------------- - Gather - Workers Planned: 2 - -> Parallel Append - -> GroupAggregate - Group Key: pagg_tab_ml.a - Filter: (avg(pagg_tab_ml.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml.a - -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml - -> GroupAggregate - Group Key: pagg_tab_ml_5.a - Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml_5.a - -> Append - -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 - -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 - -> GroupAggregate - Group Key: pagg_tab_ml_2.a - Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) - -> Sort - Sort Key: pagg_tab_ml_2.a - -> Append - -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 - -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 -(25 rows) + QUERY PLAN +--------------------------------------------------------------------- + Append + -> GroupAggregate + Group Key: pagg_tab_ml.a + Filter: (avg(pagg_tab_ml.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml.a + -> Seq Scan on pagg_tab_ml_p1 pagg_tab_ml + -> GroupAggregate + Group Key: pagg_tab_ml_2.a + Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_2.a + -> Append + -> Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2 + -> Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3 + -> GroupAggregate + Group Key: pagg_tab_ml_5.a + Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_5.a + -> Append + -> Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5 + -> Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6 +(23 rows) -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY -- for level 1 only. For subpartitions, GROUP BY clause does not match with @@ -1379,28 +1375,26 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < -- When GROUP BY clause does not match; partial aggregation is performed for each partition. EXPLAIN (COSTS OFF) SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; - QUERY PLAN -------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------- Sort Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x)) - -> Finalize GroupAggregate + -> Finalize HashAggregate Group Key: pagg_tab_para.y Filter: (avg(pagg_tab_para.x) < '12'::numeric) - -> Gather Merge + -> Gather Workers Planned: 2 - -> Sort - Sort Key: pagg_tab_para.y - -> Parallel Append - -> Partial HashAggregate - Group Key: pagg_tab_para.y - -> Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para - -> Partial HashAggregate - Group Key: pagg_tab_para_1.y - -> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1 - -> Partial HashAggregate - Group Key: pagg_tab_para_2.y - -> Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2 -(19 rows) + -> Parallel Append + -> Partial HashAggregate + Group Key: pagg_tab_para.y + -> Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para + -> Partial HashAggregate + Group Key: pagg_tab_para_1.y + -> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1 + -> Partial HashAggregate + Group Key: pagg_tab_para_2.y + -> Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2 +(17 rows) SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; y | sum | avg | count diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out index bb5b7c47a45..03926a84138 100644 --- a/src/test/regress/expected/partition_join.out +++ b/src/test/regress/expected/partition_join.out @@ -466,52 +466,41 @@ EXPLAIN (COSTS OFF) SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) WHERE a BETWEEN 490 AND 510 GROUP BY 1, 2 ORDER BY 1, 2; - QUERY PLAN ------------------------------------------------------------------------------------------------------------------ + QUERY PLAN +----------------------------------------------------------------------------------------------------------- Group Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b)) - -> Merge Append + -> Sort Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b)) - -> Group - Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b)) - -> Sort - Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b)) - -> Merge Full Join - Merge Cond: ((prt1.a = p2.a) AND (prt1.b = p2.b)) - Filter: ((COALESCE(prt1.a, p2.a) >= 490) AND (COALESCE(prt1.a, p2.a) <= 510)) - -> Sort - Sort Key: prt1.a, prt1.b - -> Seq Scan on prt1_p1 prt1 - -> Sort - Sort Key: p2.a, p2.b - -> Seq Scan on prt2_p1 p2 - -> Group - Group Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b)) - -> Sort - Sort Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b)) - -> Merge Full Join - Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b)) - Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510)) - -> Sort - Sort Key: prt1_1.a, prt1_1.b - -> Seq Scan on prt1_p2 prt1_1 - -> Sort - Sort Key: p2_1.a, p2_1.b - -> Seq Scan on prt2_p2 p2_1 - -> Group - Group Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b)) - -> Sort - Sort Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b)) - -> Merge Full Join - Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b)) - Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510)) - -> Sort - Sort Key: prt1_2.a, prt1_2.b - -> Seq Scan on prt1_p3 prt1_2 - -> Sort - Sort Key: p2_2.a, p2_2.b - -> Seq Scan on prt2_p3 p2_2 -(43 rows) + -> Append + -> Merge Full Join + Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b)) + Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510)) + -> Sort + Sort Key: prt1_1.a, prt1_1.b + -> Seq Scan on prt1_p1 prt1_1 + -> Sort + Sort Key: p2_1.a, p2_1.b + -> Seq Scan on prt2_p1 p2_1 + -> Merge Full Join + Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b)) + Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510)) + -> Sort + Sort Key: prt1_2.a, prt1_2.b + -> Seq Scan on prt1_p2 prt1_2 + -> Sort + Sort Key: p2_2.a, p2_2.b + -> Seq Scan on prt2_p2 p2_2 + -> Merge Full Join + Merge Cond: ((prt1_3.b = p2_3.b) AND (prt1_3.a = p2_3.a)) + Filter: ((COALESCE(prt1_3.a, p2_3.a) >= 490) AND (COALESCE(prt1_3.a, p2_3.a) <= 510)) + -> Sort + Sort Key: prt1_3.b, prt1_3.a + -> Seq Scan on prt1_p3 prt1_3 + -> Sort + Sort Key: p2_3.b, p2_3.a + -> Seq Scan on prt2_p3 p2_3 +(32 rows) SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b) WHERE a BETWEEN 490 AND 510 diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index 579b861d84f..4e775af1758 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -114,6 +114,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_async_append | on enable_bitmapscan | on enable_gathermerge | on + enable_group_by_reordering | on enable_hashagg | on enable_hashjoin | on enable_incremental_sort | on @@ -131,7 +132,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_seqscan | on enable_sort | on enable_tidscan | on -(20 rows) +(21 rows) -- Test that the pg_timezone_names and pg_timezone_abbrevs views are -- more-or-less working. We can't test their contents in any great detail diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out index dece7310cfe..7ac4a9380e2 100644 --- a/src/test/regress/expected/union.out +++ b/src/test/regress/expected/union.out @@ -1303,24 +1303,22 @@ select distinct q1 from union all select distinct * from int8_tbl i82) ss where q2 = q2; - QUERY PLAN ----------------------------------------------------------- - Unique - -> Merge Append - Sort Key: "*SELECT* 1".q1 + QUERY PLAN +---------------------------------------------------- + HashAggregate + Group Key: "*SELECT* 1".q1 + -> Append -> Subquery Scan on "*SELECT* 1" - -> Unique - -> Sort - Sort Key: i81.q1, i81.q2 - -> Seq Scan on int8_tbl i81 - Filter: (q2 IS NOT NULL) + -> HashAggregate + Group Key: i81.q1, i81.q2 + -> Seq Scan on int8_tbl i81 + Filter: (q2 IS NOT NULL) -> Subquery Scan on "*SELECT* 2" - -> Unique - -> Sort - Sort Key: i82.q1, i82.q2 - -> Seq Scan on int8_tbl i82 - Filter: (q2 IS NOT NULL) -(15 rows) + -> HashAggregate + Group Key: i82.q1, i82.q2 + -> Seq Scan on int8_tbl i82 + Filter: (q2 IS NOT NULL) +(13 rows) select distinct q1 from (select distinct * from int8_tbl i81 @@ -1339,24 +1337,22 @@ select distinct q1 from union all select distinct * from int8_tbl i82) ss where -q1 = q2; - QUERY PLAN --------------------------------------------------------- - Unique - -> Merge Append - Sort Key: "*SELECT* 1".q1 + QUERY PLAN +-------------------------------------------------- + HashAggregate + Group Key: "*SELECT* 1".q1 + -> Append -> Subquery Scan on "*SELECT* 1" - -> Unique - -> Sort - Sort Key: i81.q1, i81.q2 - -> Seq Scan on int8_tbl i81 - Filter: ((- q1) = q2) + -> HashAggregate + Group Key: i81.q1, i81.q2 + -> Seq Scan on int8_tbl i81 + Filter: ((- q1) = q2) -> Subquery Scan on "*SELECT* 2" - -> Unique - -> Sort - Sort Key: i82.q1, i82.q2 - -> Seq Scan on int8_tbl i82 - Filter: ((- q1) = q2) -(15 rows) + -> HashAggregate + Group Key: i82.q1, i82.q2 + -> Seq Scan on int8_tbl i82 + Filter: ((- q1) = q2) +(13 rows) select distinct q1 from (select distinct * from int8_tbl i81 diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 2f5d0e00f3d..c6e0d7ba2b7 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -1025,6 +1025,105 @@ SELECT balk(hundred) FROM tenk1; ROLLBACK; +-- GROUP BY optimization by reorder columns + +SELECT + i AS id, + i/2 AS p, + format('%60s', i%2) AS v, + i/4 AS c, + i/8 AS d, + (random() * (10000/8))::int as e --the same as d but no correlation with p + INTO btg +FROM + generate_series(1, 10000) i; + +VACUUM btg; +ANALYZE btg; + +-- GROUP BY optimization by reorder columns by frequency + +SET enable_hashagg=off; +SET max_parallel_workers= 0; +SET max_parallel_workers_per_gather = 0; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, v; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY v, p, c; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, d, c; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY v, p, d ,c; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, d, c ORDER BY p, v, d ,c; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, d, e; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, e, d; + +CREATE STATISTICS btg_dep ON d, e, p FROM btg; +ANALYZE btg; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, d, e; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, e, d; + + +-- GROUP BY optimization by reorder columns by index scan + +CREATE INDEX ON btg(p, v); +SET enable_seqscan=off; +SET enable_bitmapscan=off; +VACUUM btg; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, v; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY p, v ORDER BY p, v; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p ORDER BY p, v; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, p, c ORDER BY p, v; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, c, p, d; + +EXPLAIN (COSTS off) +SELECT count(*) FROM btg GROUP BY v, c, p, d ORDER BY p, v; + +DROP TABLE btg; + +RESET enable_hashagg; +RESET max_parallel_workers; +RESET max_parallel_workers_per_gather; +RESET enable_seqscan; +RESET enable_bitmapscan; + + -- Secondly test the case of a parallel aggregate combiner function -- returning NULL. For that use normal transition function, but a -- combiner function returning NULL. diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql index d8768a6b54d..1de163e0395 100644 --- a/src/test/regress/sql/incremental_sort.sql +++ b/src/test/regress/sql/incremental_sort.sql @@ -213,7 +213,7 @@ set parallel_tuple_cost = 0; set max_parallel_workers_per_gather = 2; create table t (a int, b int, c int); -insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i); +insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i); create index on t (a); analyze t; |
