diff options
| author | Tom Lane | 2020-10-28 15:15:47 +0000 |
|---|---|---|
| committer | Tom Lane | 2020-10-28 15:15:47 +0000 |
| commit | ad1c36b0709e47cdb3cc4abd6c939fe64279b63f (patch) | |
| tree | 61b6c959d98f9f1dc52bf7f359d703c1e4b2a964 /src/test | |
| parent | ce7f772c5e6066e0bbafea5759e652c9757c8e6b (diff) | |
Fix foreign-key selectivity estimation in the presence of constants.
get_foreign_key_join_selectivity() looks for join clauses that equate
the two sides of the FK constraint. However, if we have a query like
"WHERE fktab.a = pktab.a and fktab.a = 1", it won't find any such join
clause, because equivclass.c replaces the given clauses with "fktab.a
= 1 and pktab.a = 1", which can be enforced at the scan level, leaving
nothing to be done for column "a" at the join level.
We can fix that expectation without much trouble, but then a new problem
arises: applying the foreign-key-based selectivity rule produces a
rowcount underestimate, because we're effectively double-counting the
selectivity of the "fktab.a = 1" clause. So we have to cancel that
selectivity out of the estimate.
To fix, refactor process_implied_equality() so that it can pass back the
new RestrictInfo to its callers in equivclass.c, allowing the generated
"fktab.a = 1" clause to be saved in the EquivalenceClass's ec_derives
list. Then it's not much trouble to dig out the relevant RestrictInfo
when we need to adjust an FK selectivity estimate. (While at it, we
can also remove the expensive use of initialize_mergeclause_eclasses()
to set up the new RestrictInfo's left_ec and right_ec pointers.
The equivclass.c code can set those basically for free.)
This seems like clearly a bug fix, but I'm hesitant to back-patch it,
first because there's some API/ABI risk for extensions and second because
we're usually loath to destabilize plan choices in stable branches.
Per report from Sigrid Ehrenreich.
Discussion: https://postgr.es/m/1019549.1603770457@sss.pgh.pa.us
Discussion: https://postgr.es/m/AM6PR02MB5287A0ADD936C1FA80973E72AB190@AM6PR02MB5287.eurprd02.prod.outlook.com
Diffstat (limited to 'src/test')
| -rw-r--r-- | src/test/regress/expected/join.out | 50 | ||||
| -rw-r--r-- | src/test/regress/sql/join.sql | 29 |
2 files changed, 79 insertions, 0 deletions
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index a46b1573bd4..6c9a5e26dde 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -5843,6 +5843,56 @@ select t1.b, ss.phv from join_ut1 t1 left join lateral drop table join_pt1; drop table join_ut1; -- +-- test estimation behavior with multi-column foreign key and constant qual +-- +begin; +create table fkest (x integer, x10 integer, x10b integer, x100 integer); +insert into fkest select x, x/10, x/10, x/100 from generate_series(1,1000) x; +create unique index on fkest(x, x10, x100); +analyze fkest; +explain (costs off) +select * from fkest f1 + join fkest f2 on (f1.x = f2.x and f1.x10 = f2.x10b and f1.x100 = f2.x100) + join fkest f3 on f1.x = f3.x + where f1.x100 = 2; + QUERY PLAN +----------------------------------------------------------- + Nested Loop + -> Hash Join + Hash Cond: ((f2.x = f1.x) AND (f2.x10b = f1.x10)) + -> Seq Scan on fkest f2 + Filter: (x100 = 2) + -> Hash + -> Seq Scan on fkest f1 + Filter: (x100 = 2) + -> Index Scan using fkest_x_x10_x100_idx on fkest f3 + Index Cond: (x = f1.x) +(10 rows) + +alter table fkest add constraint fk + foreign key (x, x10b, x100) references fkest (x, x10, x100); +explain (costs off) +select * from fkest f1 + join fkest f2 on (f1.x = f2.x and f1.x10 = f2.x10b and f1.x100 = f2.x100) + join fkest f3 on f1.x = f3.x + where f1.x100 = 2; + QUERY PLAN +----------------------------------------------------- + Hash Join + Hash Cond: ((f2.x = f1.x) AND (f2.x10b = f1.x10)) + -> Hash Join + Hash Cond: (f3.x = f2.x) + -> Seq Scan on fkest f3 + -> Hash + -> Seq Scan on fkest f2 + Filter: (x100 = 2) + -> Hash + -> Seq Scan on fkest f1 + Filter: (x100 = 2) +(11 rows) + +rollback; +-- -- test that foreign key join estimation performs sanely for outer joins -- begin; diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 1403e0ffe7b..dd60d6a1f3b 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1975,6 +1975,35 @@ select t1.b, ss.phv from join_ut1 t1 left join lateral drop table join_pt1; drop table join_ut1; + +-- +-- test estimation behavior with multi-column foreign key and constant qual +-- + +begin; + +create table fkest (x integer, x10 integer, x10b integer, x100 integer); +insert into fkest select x, x/10, x/10, x/100 from generate_series(1,1000) x; +create unique index on fkest(x, x10, x100); +analyze fkest; + +explain (costs off) +select * from fkest f1 + join fkest f2 on (f1.x = f2.x and f1.x10 = f2.x10b and f1.x100 = f2.x100) + join fkest f3 on f1.x = f3.x + where f1.x100 = 2; + +alter table fkest add constraint fk + foreign key (x, x10b, x100) references fkest (x, x10, x100); + +explain (costs off) +select * from fkest f1 + join fkest f2 on (f1.x = f2.x and f1.x10 = f2.x10b and f1.x100 = f2.x100) + join fkest f3 on f1.x = f3.x + where f1.x100 = 2; + +rollback; + -- -- test that foreign key join estimation performs sanely for outer joins -- |
