summaryrefslogtreecommitdiff
path: root/src/test
diff options
context:
space:
mode:
authorAlexander Korotkov2025-02-12 22:56:03 +0000
committerAlexander Korotkov2025-02-17 10:44:12 +0000
commitfc069a3a6319b5bf40d2f0f1efceae1c9b7a68a8 (patch)
tree3a82ac49a60ad4d82ec3070cf412fa73b62c3695 /src/test
parent3fb58625d18fd226cb929c9700d0db72ac92c075 (diff)
Implement Self-Join Elimination
The Self-Join Elimination (SJE) feature removes an inner join of a plain table to itself in the query tree if it is proven that the join can be replaced with a scan without impacting the query result. Self-join and inner relation get replaced with the outer in query, equivalence classes, and planner info structures. Also, the inner restrictlist moves to the outer one with the removal of duplicated clauses. Thus, this optimization reduces the length of the range table list (this especially makes sense for partitioned relations), reduces the number of restriction clauses and, in turn, selectivity estimations, and potentially improves total planner prediction for the query. This feature is dedicated to avoiding redundancy, which can appear after pull-up transformations or the creation of an EquivalenceClass-derived clause like the below. SELECT * FROM t1 WHERE x IN (SELECT t3.x FROM t1 t3); SELECT * FROM t1 WHERE EXISTS (SELECT t3.x FROM t1 t3 WHERE t3.x = t1.x); SELECT * FROM t1,t2, t1 t3 WHERE t1.x = t2.x AND t2.x = t3.x; In the future, we could also reduce redundancy caused by subquery pull-up after unnecessary outer join removal in cases like the one below. SELECT * FROM t1 WHERE x IN (SELECT t3.x FROM t1 t3 LEFT JOIN t2 ON t2.x = t1.x); Also, it can drastically help to join partitioned tables, removing entries even before their expansion. The SJE proof is based on innerrel_is_unique() machinery. We can remove a self-join when for each outer row: 1. At most, one inner row matches the join clause; 2. Each matched inner row must be (physically) the same as the outer one; 3. Inner and outer rows have the same row mark. In this patch, we use the next approach to identify a self-join: 1. Collect all merge-joinable join quals which look like a.x = b.x; 2. Add to the list above the baseretrictinfo of the inner table; 3. Check innerrel_is_unique() for the qual list. If it returns false, skip this pair of joining tables; 4. Check uniqueness, proved by the baserestrictinfo clauses. To prove the possibility of self-join elimination, the inner and outer clauses must match exactly. The relation replacement procedure is not trivial and is partly combined with the one used to remove useless left joins. Tests covering this feature were added to join.sql. Some of the existing regression tests changed due to self-join removal logic. Discussion: https://postgr.es/m/flat/64486b0b-0404-e39e-322d-0801154901f3%40postgrespro.ru Author: Andrey Lepikhov <a.lepikhov@postgrespro.ru> Author: Alexander Kuzmenkov <a.kuzmenkov@postgrespro.ru> Co-authored-by: Alexander Korotkov <aekorotkov@gmail.com> Co-authored-by: Alena Rybakina <lena.ribackina@yandex.ru> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: Robert Haas <robertmhaas@gmail.com> Reviewed-by: Andres Freund <andres@anarazel.de> Reviewed-by: Simon Riggs <simon@2ndquadrant.com> Reviewed-by: Jonathan S. Katz <jkatz@postgresql.org> Reviewed-by: David Rowley <david.rowley@2ndquadrant.com> Reviewed-by: Thomas Munro <thomas.munro@enterprisedb.com> Reviewed-by: Konstantin Knizhnik <k.knizhnik@postgrespro.ru> Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi> Reviewed-by: Hywel Carver <hywel@skillerwhale.com> Reviewed-by: Laurenz Albe <laurenz.albe@cybertec.at> Reviewed-by: Ronan Dunklau <ronan.dunklau@aiven.io> Reviewed-by: vignesh C <vignesh21@gmail.com> Reviewed-by: Zhihong Yu <zyu@yugabyte.com> Reviewed-by: Greg Stark <stark@mit.edu> Reviewed-by: Jaime Casanova <jcasanov@systemguards.com.ec> Reviewed-by: Michał Kłeczek <michal@kleczek.org> Reviewed-by: Alena Rybakina <lena.ribackina@yandex.ru> Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com>
Diffstat (limited to 'src/test')
-rw-r--r--src/test/regress/expected/equivclass.out30
-rw-r--r--src/test/regress/expected/join.out1083
-rw-r--r--src/test/regress/expected/sysviews.out3
-rw-r--r--src/test/regress/sql/equivclass.sql16
-rw-r--r--src/test/regress/sql/join.sql494
5 files changed, 1625 insertions, 1 deletions
diff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out
index 56227505009..ad8ab294ff6 100644
--- a/src/test/regress/expected/equivclass.out
+++ b/src/test/regress/expected/equivclass.out
@@ -434,6 +434,36 @@ explain (costs off)
Filter: ((unique1 IS NOT NULL) AND (unique2 IS NOT NULL))
(2 rows)
+-- Test that broken ECs are processed correctly during self join removal.
+-- Disable merge joins so that we don't get an error about missing commutator.
+-- Test both orientations of the join clause, because only one of them breaks
+-- the EC.
+set enable_mergejoin to off;
+explain (costs off)
+ select * from ec0 m join ec0 n on m.ff = n.ff
+ join ec1 p on m.ff + n.ff = p.f1;
+ QUERY PLAN
+---------------------------------------
+ Nested Loop
+ Join Filter: ((n.ff + n.ff) = p.f1)
+ -> Seq Scan on ec0 n
+ -> Materialize
+ -> Seq Scan on ec1 p
+(5 rows)
+
+explain (costs off)
+ select * from ec0 m join ec0 n on m.ff = n.ff
+ join ec1 p on p.f1::int8 = (m.ff + n.ff)::int8alias1;
+ QUERY PLAN
+---------------------------------------------------------------
+ Nested Loop
+ Join Filter: ((p.f1)::bigint = ((n.ff + n.ff))::int8alias1)
+ -> Seq Scan on ec0 n
+ -> Materialize
+ -> Seq Scan on ec1 p
+(5 rows)
+
+reset enable_mergejoin;
-- this could be converted, but isn't at present
explain (costs off)
select * from tenk1 where unique1 = unique1 or unique2 = unique2;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 3ffc066b1f8..a57bb18c24f 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -5966,6 +5966,27 @@ select c.id, ss.a from c
-> Seq Scan on c
(7 rows)
+-- check the case when the placeholder relates to an outer join and its
+-- inner in the press field but actually uses only the outer side of the join
+explain (costs off)
+SELECT q.val FROM b LEFT JOIN (
+ SELECT (q1.z IS NOT NULL) AS val
+ FROM b LEFT JOIN (
+ SELECT (t1.b_id IS NOT NULL) AS z FROM a t1 LEFT JOIN a t2 USING (id)
+ ) AS q1
+ ON true
+) AS q ON true;
+ QUERY PLAN
+------------------------------------------
+ Nested Loop Left Join
+ -> Seq Scan on b
+ -> Materialize
+ -> Nested Loop Left Join
+ -> Seq Scan on b b_1
+ -> Materialize
+ -> Seq Scan on a t1
+(7 rows)
+
CREATE TEMP TABLE parted_b (id int PRIMARY KEY) partition by range(id);
CREATE TEMP TABLE parted_b1 partition of parted_b for values from (0) to (10);
-- test join removals on a partitioned table
@@ -6378,6 +6399,1068 @@ select * from
(0 rows)
--
+-- test that semi- or inner self-joins on a unique column are removed
+--
+-- enable only nestloop to get more predictable plans
+set enable_hashjoin to off;
+set enable_mergejoin to off;
+create table sj (a int unique, b int, c int unique);
+insert into sj values (1, null, 2), (null, 2, null), (2, 1, 1);
+analyze sj;
+-- Trivial self-join case.
+explain (costs off)
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+ QUERY PLAN
+-----------------------------------------------
+ Seq Scan on sj q
+ Filter: ((a IS NOT NULL) AND (b = (a - 1)))
+(2 rows)
+
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+ a | b | c
+---+---+---
+ 2 | 1 | 1
+(1 row)
+
+-- Self-join removal performs after a subquery pull-up process and could remove
+-- such kind of self-join too. Check this option.
+explain (costs off)
+select * from sj p
+where exists (select * from sj q
+ where q.a = p.a and q.b < 10);
+ QUERY PLAN
+------------------------------------------
+ Seq Scan on sj q
+ Filter: ((a IS NOT NULL) AND (b < 10))
+(2 rows)
+
+select * from sj p
+where exists (select * from sj q
+ where q.a = p.a and q.b < 10);
+ a | b | c
+---+---+---
+ 2 | 1 | 1
+(1 row)
+
+-- Don't remove self-join for the case of equality of two different unique columns.
+explain (costs off)
+select * from sj t1, sj t2 where t1.a = t2.c and t1.b is not null;
+ QUERY PLAN
+---------------------------------------
+ Nested Loop
+ Join Filter: (t1.a = t2.c)
+ -> Seq Scan on sj t2
+ -> Materialize
+ -> Seq Scan on sj t1
+ Filter: (b IS NOT NULL)
+(6 rows)
+
+-- Ensure that relations with TABLESAMPLE clauses are not considered as
+-- candidates to be removed
+explain (costs off)
+select * from sj t1
+ join lateral
+ (select * from sj tablesample system(t1.b)) s
+ on t1.a = s.a;
+ QUERY PLAN
+---------------------------------------
+ Nested Loop
+ -> Seq Scan on sj t1
+ -> Memoize
+ Cache Key: t1.a, t1.b
+ Cache Mode: binary
+ -> Sample Scan on sj
+ Sampling: system (t1.b)
+ Filter: (t1.a = a)
+(8 rows)
+
+-- Ensure that SJE does not form a self-referential lateral dependency
+explain (costs off)
+select * from sj t1
+ left join lateral
+ (select t1.a as t1a, * from sj t2) s
+ on true
+where t1.a = s.a;
+ QUERY PLAN
+---------------------------
+ Seq Scan on sj t2
+ Filter: (a IS NOT NULL)
+(2 rows)
+
+-- Degenerated case.
+explain (costs off)
+select * from
+ (select a as x from sj where false) as q1,
+ (select a as y from sj where false) as q2
+where q1.x = q2.y;
+ QUERY PLAN
+--------------------------
+ Result
+ One-Time Filter: false
+(2 rows)
+
+-- We can't use a cross-EC generated self join qual because of current logic of
+-- the generate_join_implied_equalities routine.
+explain (costs off)
+select * from sj t1, sj t2 where t1.a = t1.b and t1.b = t2.b and t2.b = t2.a;
+ QUERY PLAN
+------------------------------
+ Nested Loop
+ Join Filter: (t1.a = t2.b)
+ -> Seq Scan on sj t1
+ Filter: (a = b)
+ -> Seq Scan on sj t2
+ Filter: (b = a)
+(6 rows)
+
+explain (costs off)
+select * from sj t1, sj t2, sj t3
+where t1.a = t1.b and t1.b = t2.b and t2.b = t2.a and
+ t1.b = t3.b and t3.b = t3.a;
+ QUERY PLAN
+------------------------------------
+ Nested Loop
+ Join Filter: (t1.a = t3.b)
+ -> Nested Loop
+ Join Filter: (t1.a = t2.b)
+ -> Seq Scan on sj t1
+ Filter: (a = b)
+ -> Seq Scan on sj t2
+ Filter: (b = a)
+ -> Seq Scan on sj t3
+ Filter: (b = a)
+(10 rows)
+
+-- Double self-join removal.
+-- Use a condition on "b + 1", not on "b", for the second join, so that
+-- the equivalence class is different from the first one, and we can
+-- test the non-ec code path.
+explain (costs off)
+select *
+from sj t1
+ join sj t2 on t1.a = t2.a and t1.b = t2.b
+ join sj t3 on t2.a = t3.a and t2.b + 1 = t3.b + 1;
+ QUERY PLAN
+---------------------------------------------------------------------------
+ Seq Scan on sj t3
+ Filter: ((a IS NOT NULL) AND (b IS NOT NULL) AND ((b + 1) IS NOT NULL))
+(2 rows)
+
+-- subselect that references the removed relation
+explain (costs off)
+select t1.a, (select a from sj where a = t2.a and a = t1.a)
+from sj t1, sj t2
+where t1.a = t2.a;
+ QUERY PLAN
+------------------------------------------
+ Seq Scan on sj t2
+ Filter: (a IS NOT NULL)
+ SubPlan 1
+ -> Result
+ One-Time Filter: (t2.a = t2.a)
+ -> Seq Scan on sj
+ Filter: (a = t2.a)
+(7 rows)
+
+-- self-join under outer join
+explain (costs off)
+select * from sj x join sj y on x.a = y.a
+left join int8_tbl z on x.a = z.q1;
+ QUERY PLAN
+------------------------------------
+ Nested Loop Left Join
+ Join Filter: (y.a = z.q1)
+ -> Seq Scan on sj y
+ Filter: (a IS NOT NULL)
+ -> Materialize
+ -> Seq Scan on int8_tbl z
+(6 rows)
+
+explain (costs off)
+select * from sj x join sj y on x.a = y.a
+left join int8_tbl z on y.a = z.q1;
+ QUERY PLAN
+------------------------------------
+ Nested Loop Left Join
+ Join Filter: (y.a = z.q1)
+ -> Seq Scan on sj y
+ Filter: (a IS NOT NULL)
+ -> Materialize
+ -> Seq Scan on int8_tbl z
+(6 rows)
+
+explain (costs off)
+select * from (
+ select t1.*, t2.a as ax from sj t1 join sj t2
+ on (t1.a = t2.a and t1.c * t1.c = t2.c + 2 and t2.b is null)
+) as q1
+left join
+ (select t3.* from sj t3, sj t4 where t3.c = t4.c) as q2
+on q1.ax = q2.a;
+ QUERY PLAN
+---------------------------------------------------------------------------
+ Nested Loop Left Join
+ Join Filter: (t2.a = t4.a)
+ -> Seq Scan on sj t2
+ Filter: ((b IS NULL) AND (a IS NOT NULL) AND ((c * c) = (c + 2)))
+ -> Seq Scan on sj t4
+ Filter: (c IS NOT NULL)
+(6 rows)
+
+-- Test that placeholders are updated correctly after join removal
+explain (costs off)
+select * from (values (1)) x
+left join (select coalesce(y.q1, 1) from int8_tbl y
+ right join sj j1 inner join sj j2 on j1.a = j2.a
+ on true) z
+on true;
+ QUERY PLAN
+------------------------------------------
+ Nested Loop Left Join
+ -> Result
+ -> Nested Loop Left Join
+ -> Seq Scan on sj j2
+ Filter: (a IS NOT NULL)
+ -> Materialize
+ -> Seq Scan on int8_tbl y
+(7 rows)
+
+-- Test that references to the removed rel in lateral subqueries are replaced
+-- correctly after join removal
+explain (verbose, costs off)
+select t3.a from sj t1
+ join sj t2 on t1.a = t2.a
+ join lateral (select t1.a offset 0) t3 on true;
+ QUERY PLAN
+------------------------------------
+ Nested Loop
+ Output: (t2.a)
+ -> Seq Scan on public.sj t2
+ Output: t2.a, t2.b, t2.c
+ Filter: (t2.a IS NOT NULL)
+ -> Result
+ Output: t2.a
+(7 rows)
+
+explain (verbose, costs off)
+select t3.a from sj t1
+ join sj t2 on t1.a = t2.a
+ join lateral (select * from (select t1.a offset 0) offset 0) t3 on true;
+ QUERY PLAN
+------------------------------------
+ Nested Loop
+ Output: (t2.a)
+ -> Seq Scan on public.sj t2
+ Output: t2.a, t2.b, t2.c
+ Filter: (t2.a IS NOT NULL)
+ -> Result
+ Output: t2.a
+(7 rows)
+
+explain (verbose, costs off)
+select t4.a from sj t1
+ join sj t2 on t1.a = t2.a
+ join lateral (select t3.a from sj t3, (select t1.a) offset 0) t4 on true;
+ QUERY PLAN
+------------------------------------
+ Nested Loop
+ Output: t3.a
+ -> Seq Scan on public.sj t2
+ Output: t2.a, t2.b, t2.c
+ Filter: (t2.a IS NOT NULL)
+ -> Seq Scan on public.sj t3
+ Output: t3.a
+(7 rows)
+
+-- Check updating of semi_rhs_exprs links from upper-level semi join to
+-- the removing relation
+explain (verbose, costs off)
+select t1.a from sj t1 where t1.b in (
+ select t2.b from sj t2 join sj t3 on t2.c=t3.c);
+ QUERY PLAN
+------------------------------------------
+ Nested Loop Semi Join
+ Output: t1.a
+ Join Filter: (t1.b = t3.b)
+ -> Seq Scan on public.sj t1
+ Output: t1.a, t1.b, t1.c
+ -> Materialize
+ Output: t3.c, t3.b
+ -> Seq Scan on public.sj t3
+ Output: t3.c, t3.b
+ Filter: (t3.c IS NOT NULL)
+(10 rows)
+
+--
+-- SJE corner case: uniqueness of an inner is [partially] derived from
+-- baserestrictinfo clauses.
+-- XXX: We really should allow SJE for these corner cases?
+--
+INSERT INTO sj VALUES (3, 1, 3);
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 3;
+ QUERY PLAN
+------------------------------
+ Nested Loop
+ Join Filter: (j1.b = j2.b)
+ -> Seq Scan on sj j1
+ Filter: (a = 2)
+ -> Seq Scan on sj j2
+ Filter: (a = 3)
+(6 rows)
+
+-- Return one row
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 3;
+ a | b | c | a | b | c
+---+---+---+---+---+---
+ 2 | 1 | 1 | 3 | 1 | 3
+(1 row)
+
+-- Remove SJ, define uniqueness by a constant
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 2;
+ QUERY PLAN
+-----------------------------------------
+ Seq Scan on sj j2
+ Filter: ((b IS NOT NULL) AND (a = 2))
+(2 rows)
+
+-- Return one row
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 2;
+ a | b | c | a | b | c
+---+---+---+---+---+---
+ 2 | 1 | 1 | 2 | 1 | 1
+(1 row)
+
+-- Remove SJ, define uniqueness by a constant expression
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND j1.a = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+ AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = j2.a;
+ QUERY PLAN
+----------------------------------------------------------------------------------------------------------------------------
+ Seq Scan on sj j2
+ Filter: ((b IS NOT NULL) AND (a = (((EXTRACT(dow FROM CURRENT_TIMESTAMP(0)) / '15'::numeric) + '3'::numeric))::integer))
+(2 rows)
+
+-- Return one row
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND j1.a = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+ AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = j2.a;
+ a | b | c | a | b | c
+---+---+---+---+---+---
+ 3 | 1 | 3 | 3 | 1 | 3
+(1 row)
+
+-- Remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 1 AND j2.a = 1;
+ QUERY PLAN
+-----------------------------------------
+ Seq Scan on sj j2
+ Filter: ((b IS NOT NULL) AND (a = 1))
+(2 rows)
+
+-- Return no rows
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 1 AND j2.a = 1;
+ a | b | c | a | b | c
+---+---+---+---+---+---
+(0 rows)
+
+-- Shuffle a clause. Remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 1 = j1.a AND j2.a = 1;
+ QUERY PLAN
+-----------------------------------------
+ Seq Scan on sj j2
+ Filter: ((b IS NOT NULL) AND (a = 1))
+(2 rows)
+
+-- Return no rows
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 1 = j1.a AND j2.a = 1;
+ a | b | c | a | b | c
+---+---+---+---+---+---
+(0 rows)
+
+-- SJE Corner case: a 'a.x=a.x' clause, have replaced with 'a.x IS NOT NULL'
+-- after SJ elimination it shouldn't be a mergejoinable clause.
+EXPLAIN (COSTS OFF)
+SELECT t4.*
+FROM (SELECT t1.*, t2.a AS a1 FROM sj t1, sj t2 WHERE t1.b = t2.b) AS t3
+JOIN sj t4 ON (t4.a = t3.a) WHERE t3.a1 = 42;
+ QUERY PLAN
+---------------------------------
+ Nested Loop
+ Join Filter: (t1.b = t2.b)
+ -> Seq Scan on sj t2
+ Filter: (a = 42)
+ -> Seq Scan on sj t1
+ Filter: (a IS NOT NULL)
+(6 rows)
+
+SELECT t4.*
+FROM (SELECT t1.*, t2.a AS a1 FROM sj t1, sj t2 WHERE t1.b = t2.b) AS t3
+JOIN sj t4 ON (t4.a = t3.a) WHERE t3.a1 = 42;
+ a | b | c
+---+---+---
+(0 rows)
+
+-- Functional index
+CREATE UNIQUE INDEX sj_fn_idx ON sj((a * a));
+-- Remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+ WHERE j1.b = j2.b AND j1.a*j1.a = 1 AND j2.a*j2.a = 1;
+ QUERY PLAN
+-----------------------------------------------
+ Seq Scan on sj j2
+ Filter: ((b IS NOT NULL) AND ((a * a) = 1))
+(2 rows)
+
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+ WHERE j1.b = j2.b AND j1.a*j1.a = 1 AND j2.a*j2.a = 2;
+ QUERY PLAN
+-------------------------------
+ Nested Loop
+ Join Filter: (j1.b = j2.b)
+ -> Seq Scan on sj j1
+ Filter: ((a * a) = 1)
+ -> Seq Scan on sj j2
+ Filter: ((a * a) = 2)
+(6 rows)
+
+-- Restriction contains expressions in both sides, Remove SJ.
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND (j1.a*j1.a) = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+ AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = (j2.a*j2.a);
+ QUERY PLAN
+----------------------------------------------------------------------------------------------------------------------------------
+ Seq Scan on sj j2
+ Filter: ((b IS NOT NULL) AND ((a * a) = (((EXTRACT(dow FROM CURRENT_TIMESTAMP(0)) / '15'::numeric) + '3'::numeric))::integer))
+(2 rows)
+
+-- Empty set of rows should be returned
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND (j1.a*j1.a) = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+ AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = (j2.a*j2.a);
+ a | b | c | a | b | c
+---+---+---+---+---+---
+(0 rows)
+
+-- Restriction contains volatile function - disable SJE feature.
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND (j1.a*j1.c/3) = (random()/3 + 3)::int
+ AND (random()/3 + 3)::int = (j2.a*j2.c/3);
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------------
+ Nested Loop
+ Join Filter: (j1.b = j2.b)
+ -> Seq Scan on sj j1
+ Filter: (((a * c) / 3) = (((random() / '3'::double precision) + '3'::double precision))::integer)
+ -> Seq Scan on sj j2
+ Filter: ((((random() / '3'::double precision) + '3'::double precision))::integer = ((a * c) / 3))
+(6 rows)
+
+-- Return one row
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND (j1.a*j1.c/3) = (random()/3 + 3)::int
+ AND (random()/3 + 3)::int = (j2.a*j2.c/3);
+ a | b | c | a | b | c
+---+---+---+---+---+---
+ 3 | 1 | 3 | 3 | 1 | 3
+(1 row)
+
+-- Multiple filters
+CREATE UNIQUE INDEX sj_temp_idx1 ON sj(a,b,c);
+-- Remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+ WHERE j1.b = j2.b AND j1.a = 2 AND j1.c = 3 AND j2.a = 2 AND 3 = j2.c;
+ QUERY PLAN
+-----------------------------------------------------
+ Seq Scan on sj j2
+ Filter: ((b IS NOT NULL) AND (a = 2) AND (c = 3))
+(2 rows)
+
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+ SELECT * FROM sj j1, sj j2
+ WHERE j1.b = j2.b AND 2 = j1.a AND j1.c = 3 AND j2.a = 1 AND 3 = j2.c;
+ QUERY PLAN
+---------------------------------------
+ Nested Loop
+ Join Filter: (j1.b = j2.b)
+ -> Seq Scan on sj j1
+ Filter: ((2 = a) AND (c = 3))
+ -> Seq Scan on sj j2
+ Filter: ((c = 3) AND (a = 1))
+(6 rows)
+
+CREATE UNIQUE INDEX sj_temp_idx ON sj(a,b);
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2;
+ QUERY PLAN
+------------------------------
+ Nested Loop
+ Join Filter: (j1.b = j2.b)
+ -> Seq Scan on sj j1
+ Filter: (a = 2)
+ -> Seq Scan on sj j2
+(5 rows)
+
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 2 = j2.a;
+ QUERY PLAN
+------------------------------
+ Nested Loop
+ Join Filter: (j1.b = j2.b)
+ -> Seq Scan on sj j2
+ Filter: (2 = a)
+ -> Seq Scan on sj j1
+(5 rows)
+
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND (j1.a = 1 OR j2.a = 1);
+ QUERY PLAN
+---------------------------------------------------------------
+ Nested Loop
+ Join Filter: ((j1.b = j2.b) AND ((j1.a = 1) OR (j2.a = 1)))
+ -> Seq Scan on sj j1
+ -> Materialize
+ -> Seq Scan on sj j2
+(5 rows)
+
+DROP INDEX sj_fn_idx, sj_temp_idx1, sj_temp_idx;
+-- Test that OR predicated are updated correctly after join removal
+CREATE TABLE tab_with_flag ( id INT PRIMARY KEY, is_flag SMALLINT);
+CREATE INDEX idx_test_is_flag ON tab_with_flag (is_flag);
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) FROM tab_with_flag
+WHERE
+ (is_flag IS NULL OR is_flag = 0)
+ AND id IN (SELECT id FROM tab_with_flag WHERE id IN (2, 3));
+ QUERY PLAN
+-----------------------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on tab_with_flag
+ Recheck Cond: (id = ANY ('{2,3}'::integer[]))
+ Filter: ((is_flag IS NULL) OR (is_flag = 0))
+ -> Bitmap Index Scan on tab_with_flag_pkey
+ Index Cond: (id = ANY ('{2,3}'::integer[]))
+(6 rows)
+
+DROP TABLE tab_with_flag;
+-- HAVING clause
+explain (costs off)
+select p.b from sj p join sj q on p.a = q.a group by p.b having sum(p.a) = 1;
+ QUERY PLAN
+---------------------------------
+ HashAggregate
+ Group Key: q.b
+ Filter: (sum(q.a) = 1)
+ -> Seq Scan on sj q
+ Filter: (a IS NOT NULL)
+(5 rows)
+
+-- update lateral references and range table entry reference
+explain (verbose, costs off)
+select 1 from (select x.* from sj x, sj y where x.a = y.a) q,
+ lateral generate_series(1, q.a) gs(i);
+ QUERY PLAN
+------------------------------------------------------
+ Nested Loop
+ Output: 1
+ -> Seq Scan on public.sj y
+ Output: y.a, y.b, y.c
+ Filter: (y.a IS NOT NULL)
+ -> Function Scan on pg_catalog.generate_series gs
+ Output: gs.i
+ Function Call: generate_series(1, y.a)
+(8 rows)
+
+explain (verbose, costs off)
+select 1 from (select y.* from sj x, sj y where x.a = y.a) q,
+ lateral generate_series(1, q.a) gs(i);
+ QUERY PLAN
+------------------------------------------------------
+ Nested Loop
+ Output: 1
+ -> Seq Scan on public.sj y
+ Output: y.a, y.b, y.c
+ Filter: (y.a IS NOT NULL)
+ -> Function Scan on pg_catalog.generate_series gs
+ Output: gs.i
+ Function Call: generate_series(1, y.a)
+(8 rows)
+
+-- Test that a non-EC-derived join clause is processed correctly. Use an
+-- outer join so that we can't form an EC.
+explain (costs off) select * from sj p join sj q on p.a = q.a
+ left join sj r on p.a + q.a = r.a;
+ QUERY PLAN
+------------------------------------
+ Nested Loop Left Join
+ Join Filter: ((q.a + q.a) = r.a)
+ -> Seq Scan on sj q
+ Filter: (a IS NOT NULL)
+ -> Materialize
+ -> Seq Scan on sj r
+(6 rows)
+
+-- FIXME this constant false filter doesn't look good. Should we merge
+-- equivalence classes?
+explain (costs off)
+select * from sj p, sj q where p.a = q.a and p.b = 1 and q.b = 2;
+ QUERY PLAN
+-----------------------------------------------------
+ Seq Scan on sj q
+ Filter: ((a IS NOT NULL) AND (b = 2) AND (b = 1))
+(2 rows)
+
+-- Check that attr_needed is updated correctly after self-join removal. In this
+-- test, the join of j1 with j2 is removed. k1.b is required at either j1 or j2.
+-- If this info is lost, join targetlist for (k1, k2) will not contain k1.b.
+-- Use index scan for k1 so that we don't get 'b' from physical tlist used for
+-- seqscan. Also disable reordering of joins because this test depends on a
+-- particular join tree.
+create table sk (a int, b int);
+create index on sk(a);
+set join_collapse_limit to 1;
+set enable_seqscan to off;
+explain (costs off) select 1 from
+ (sk k1 join sk k2 on k1.a = k2.a)
+ join (sj j1 join sj j2 on j1.a = j2.a) on j1.b = k1.b;
+ QUERY PLAN
+-----------------------------------------------------
+ Nested Loop
+ Join Filter: (k1.b = j2.b)
+ -> Nested Loop
+ -> Index Scan using sk_a_idx on sk k1
+ -> Index Only Scan using sk_a_idx on sk k2
+ Index Cond: (a = k1.a)
+ -> Materialize
+ -> Index Scan using sj_a_key on sj j2
+ Index Cond: (a IS NOT NULL)
+(9 rows)
+
+explain (costs off) select 1 from
+ (sk k1 join sk k2 on k1.a = k2.a)
+ join (sj j1 join sj j2 on j1.a = j2.a) on j2.b = k1.b;
+ QUERY PLAN
+-----------------------------------------------------
+ Nested Loop
+ Join Filter: (k1.b = j2.b)
+ -> Nested Loop
+ -> Index Scan using sk_a_idx on sk k1
+ -> Index Only Scan using sk_a_idx on sk k2
+ Index Cond: (a = k1.a)
+ -> Materialize
+ -> Index Scan using sj_a_key on sj j2
+ Index Cond: (a IS NOT NULL)
+(9 rows)
+
+reset join_collapse_limit;
+reset enable_seqscan;
+-- Check that clauses from the join filter list is not lost on the self-join removal
+CREATE TABLE emp1 (id SERIAL PRIMARY KEY NOT NULL, code int);
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT * FROM emp1 e1, emp1 e2 WHERE e1.id = e2.id AND e2.code <> e1.code;
+ QUERY PLAN
+------------------------------------------
+ Seq Scan on public.emp1 e2
+ Output: e2.id, e2.code, e2.id, e2.code
+ Filter: (e2.code <> e2.code)
+(3 rows)
+
+-- Shuffle self-joined relations. Only in the case of iterative deletion
+-- attempts explains of these queries will be identical.
+CREATE UNIQUE INDEX ON emp1((id*id));
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3
+WHERE c1.id=c2.id AND c1.id*c2.id=c3.id*c3.id;
+ QUERY PLAN
+-----------------------------------------
+ Aggregate
+ -> Seq Scan on emp1 c3
+ Filter: ((id * id) IS NOT NULL)
+(3 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3
+WHERE c1.id=c3.id AND c1.id*c3.id=c2.id*c2.id;
+ QUERY PLAN
+-----------------------------------------
+ Aggregate
+ -> Seq Scan on emp1 c3
+ Filter: ((id * id) IS NOT NULL)
+(3 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3
+WHERE c3.id=c2.id AND c3.id*c2.id=c1.id*c1.id;
+ QUERY PLAN
+-----------------------------------------
+ Aggregate
+ -> Seq Scan on emp1 c3
+ Filter: ((id * id) IS NOT NULL)
+(3 rows)
+
+-- Check the usage of a parse tree by the set operations (bug #18170)
+EXPLAIN (COSTS OFF)
+SELECT c1.code FROM emp1 c1 LEFT JOIN emp1 c2 ON c1.id = c2.id
+WHERE c2.id IS NOT NULL
+EXCEPT ALL
+SELECT c3.code FROM emp1 c3;
+ QUERY PLAN
+---------------------------
+ HashSetOp Except All
+ -> Seq Scan on emp1 c2
+ -> Seq Scan on emp1 c3
+(3 rows)
+
+-- Check that SJE removes references from PHVs correctly
+explain (costs off)
+select * from emp1 t1 left join
+ (select coalesce(t3.code, 1) from emp1 t2
+ left join (emp1 t3 join emp1 t4 on t3.id = t4.id)
+ on true)
+on true;
+ QUERY PLAN
+---------------------------------------------
+ Nested Loop Left Join
+ -> Seq Scan on emp1 t1
+ -> Materialize
+ -> Nested Loop Left Join
+ -> Seq Scan on emp1 t2
+ -> Materialize
+ -> Seq Scan on emp1 t4
+(7 rows)
+
+-- Check that SJE removes the whole PHVs correctly
+explain (verbose, costs off)
+select 1 from emp1 t1 left join
+ ((select 1 as x, * from emp1 t2) s1 inner join
+ (select * from emp1 t3) s2 on s1.id = s2.id)
+ on true
+where s1.x = 1;
+ QUERY PLAN
+----------------------------------------
+ Nested Loop
+ Output: 1
+ -> Seq Scan on public.emp1 t1
+ Output: t1.id, t1.code
+ -> Materialize
+ Output: t3.id
+ -> Seq Scan on public.emp1 t3
+ Output: t3.id
+ Filter: (1 = 1)
+(9 rows)
+
+-- Check that PHVs do not impose any constraints on removing self joins
+explain (verbose, costs off)
+select * from emp1 t1 join emp1 t2 on t1.id = t2.id left join
+ lateral (select t1.id as t1id, * from generate_series(1,1) t3) s on true;
+ QUERY PLAN
+----------------------------------------------------------
+ Nested Loop Left Join
+ Output: t2.id, t2.code, t2.id, t2.code, (t2.id), t3.t3
+ -> Seq Scan on public.emp1 t2
+ Output: t2.id, t2.code
+ -> Function Scan on pg_catalog.generate_series t3
+ Output: t3.t3, t2.id
+ Function Call: generate_series(1, 1)
+(7 rows)
+
+explain (verbose, costs off)
+select * from generate_series(1,10) t1(id) left join
+ lateral (select t1.id as t1id, t2.id from emp1 t2 join emp1 t3 on t2.id = t3.id)
+on true;
+ QUERY PLAN
+------------------------------------------------------
+ Nested Loop Left Join
+ Output: t1.id, (t1.id), t3.id
+ -> Function Scan on pg_catalog.generate_series t1
+ Output: t1.id
+ Function Call: generate_series(1, 10)
+ -> Seq Scan on public.emp1 t3
+ Output: t3.id, t1.id
+(7 rows)
+
+-- Check that SJE replaces join clauses involving the removed rel correctly
+explain (costs off)
+select * from emp1 t1
+ inner join emp1 t2 on t1.id = t2.id
+ left join emp1 t3 on t1.id > 1 and t1.id < 2;
+ QUERY PLAN
+----------------------------------------------
+ Nested Loop Left Join
+ Join Filter: ((t2.id > 1) AND (t2.id < 2))
+ -> Seq Scan on emp1 t2
+ -> Materialize
+ -> Seq Scan on emp1 t3
+(5 rows)
+
+-- Check that SJE doesn't replace the target relation
+EXPLAIN (COSTS OFF)
+WITH t1 AS (SELECT * FROM emp1)
+UPDATE emp1 SET code = t1.code + 1 FROM t1
+WHERE t1.id = emp1.id RETURNING emp1.id, emp1.code, t1.code;
+ QUERY PLAN
+-------------------------------------------------------
+ Update on emp1
+ -> Nested Loop
+ -> Seq Scan on emp1
+ -> Index Scan using emp1_pkey on emp1 emp1_1
+ Index Cond: (id = emp1.id)
+(5 rows)
+
+INSERT INTO emp1 VALUES (1, 1), (2, 1);
+WITH t1 AS (SELECT * FROM emp1)
+UPDATE emp1 SET code = t1.code + 1 FROM t1
+WHERE t1.id = emp1.id RETURNING emp1.id, emp1.code, t1.code;
+ id | code | code
+----+------+------
+ 1 | 2 | 1
+ 2 | 2 | 1
+(2 rows)
+
+TRUNCATE emp1;
+EXPLAIN (COSTS OFF)
+UPDATE sj sq SET b = 1 FROM sj as sz WHERE sq.a = sz.a;
+ QUERY PLAN
+-------------------------------------
+ Update on sj sq
+ -> Nested Loop
+ Join Filter: (sq.a = sz.a)
+ -> Seq Scan on sj sq
+ -> Materialize
+ -> Seq Scan on sj sz
+(6 rows)
+
+CREATE RULE sj_del_rule AS ON DELETE TO sj
+ DO INSTEAD
+ UPDATE sj SET a = 1 WHERE a = old.a;
+EXPLAIN (COSTS OFF) DELETE FROM sj;
+ QUERY PLAN
+--------------------------------------
+ Update on sj sj_1
+ -> Nested Loop
+ Join Filter: (sj.a = sj_1.a)
+ -> Seq Scan on sj sj_1
+ -> Materialize
+ -> Seq Scan on sj
+(6 rows)
+
+DROP RULE sj_del_rule ON sj CASCADE;
+-- Check that SJE does not mistakenly omit qual clauses (bug #18187)
+insert into emp1 values (1, 1);
+explain (costs off)
+select 1 from emp1 full join
+ (select * from emp1 t1 join
+ emp1 t2 join emp1 t3 on t2.id = t3.id
+ on true
+ where false) s on true
+where false;
+ QUERY PLAN
+--------------------------
+ Result
+ One-Time Filter: false
+(2 rows)
+
+select 1 from emp1 full join
+ (select * from emp1 t1 join
+ emp1 t2 join emp1 t3 on t2.id = t3.id
+ on true
+ where false) s on true
+where false;
+ ?column?
+----------
+(0 rows)
+
+-- Check that SJE does not mistakenly re-use knowledge of relation uniqueness
+-- made with different set of quals
+insert into emp1 values (2, 1);
+explain (costs off)
+select * from emp1 t1 where exists (select * from emp1 t2
+ where t2.id = t1.code and t2.code > 0);
+ QUERY PLAN
+---------------------------------------------
+ Nested Loop
+ -> Seq Scan on emp1 t1
+ -> Index Scan using emp1_pkey on emp1 t2
+ Index Cond: (id = t1.code)
+ Filter: (code > 0)
+(5 rows)
+
+select * from emp1 t1 where exists (select * from emp1 t2
+ where t2.id = t1.code and t2.code > 0);
+ id | code
+----+------
+ 1 | 1
+ 2 | 1
+(2 rows)
+
+-- We can remove the join even if we find the join can't duplicate rows and
+-- the base quals of each side are different. In the following case we end up
+-- moving quals over to s1 to make it so it can't match any rows.
+create table sl(a int, b int, c int);
+create unique index on sl(a, b);
+vacuum analyze sl;
+-- Both sides are unique, but base quals are different
+explain (costs off)
+select * from sl t1, sl t2 where t1.a = t2.a and t1.b = 1 and t2.b = 2;
+ QUERY PLAN
+------------------------------
+ Nested Loop
+ Join Filter: (t1.a = t2.a)
+ -> Seq Scan on sl t1
+ Filter: (b = 1)
+ -> Seq Scan on sl t2
+ Filter: (b = 2)
+(6 rows)
+
+-- Check NullTest in baserestrictinfo list
+explain (costs off)
+select * from sl t1, sl t2
+where t1.a = t2.a and t1.b = 1 and t2.b = 2
+ and t1.c IS NOT NULL and t2.c IS NOT NULL
+ and t2.b IS NOT NULL and t1.b IS NOT NULL
+ and t1.a IS NOT NULL and t2.a IS NOT NULL;
+ QUERY PLAN
+---------------------------------------------------------------------------------------
+ Nested Loop
+ Join Filter: (t1.a = t2.a)
+ -> Seq Scan on sl t1
+ Filter: ((c IS NOT NULL) AND (b IS NOT NULL) AND (a IS NOT NULL) AND (b = 1))
+ -> Seq Scan on sl t2
+ Filter: ((c IS NOT NULL) AND (b IS NOT NULL) AND (a IS NOT NULL) AND (b = 2))
+(6 rows)
+
+explain (verbose, costs off)
+select * from sl t1, sl t2
+where t1.b = t2.b and t2.a = 3 and t1.a = 3
+ and t1.c IS NOT NULL and t2.c IS NOT NULL
+ and t2.b IS NOT NULL and t1.b IS NOT NULL
+ and t1.a IS NOT NULL and t2.a IS NOT NULL;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------
+ Seq Scan on public.sl t2
+ Output: t2.a, t2.b, t2.c, t2.a, t2.b, t2.c
+ Filter: ((t2.c IS NOT NULL) AND (t2.b IS NOT NULL) AND (t2.a IS NOT NULL) AND (t2.a = 3))
+(3 rows)
+
+-- Join qual isn't mergejoinable, but inner is unique.
+EXPLAIN (COSTS OFF)
+SELECT n2.a FROM sj n1, sj n2 WHERE n1.a <> n2.a AND n2.a = 1;
+ QUERY PLAN
+-------------------------------
+ Nested Loop
+ Join Filter: (n1.a <> n2.a)
+ -> Seq Scan on sj n2
+ Filter: (a = 1)
+ -> Seq Scan on sj n1
+(5 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+(SELECT n2.a FROM sj n1, sj n2 WHERE n1.a <> n2.a) q0, sl
+WHERE q0.a = 1;
+ QUERY PLAN
+-------------------------------
+ Nested Loop
+ Join Filter: (n1.a <> n2.a)
+ -> Nested Loop
+ -> Seq Scan on sl
+ -> Seq Scan on sj n2
+ Filter: (a = 1)
+ -> Seq Scan on sj n1
+(7 rows)
+
+-- Check optimization disabling if it will violate special join conditions.
+-- Two identical joined relations satisfies self join removal conditions but
+-- stay in different special join infos.
+CREATE TABLE sj_t1 (id serial, a int);
+CREATE TABLE sj_t2 (id serial, a int);
+CREATE TABLE sj_t3 (id serial, a int);
+CREATE TABLE sj_t4 (id serial, a int);
+CREATE UNIQUE INDEX ON sj_t3 USING btree (a,id);
+CREATE UNIQUE INDEX ON sj_t2 USING btree (id);
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj_t1
+JOIN (
+ SELECT sj_t2.id AS id FROM sj_t2
+ WHERE EXISTS
+ (
+ SELECT TRUE FROM sj_t3,sj_t4 WHERE sj_t3.a = 1 AND sj_t3.id = sj_t2.id
+ )
+ ) t2t3t4
+ON sj_t1.id = t2t3t4.id
+JOIN (
+ SELECT sj_t2.id AS id FROM sj_t2
+ WHERE EXISTS
+ (
+ SELECT TRUE FROM sj_t3,sj_t4 WHERE sj_t3.a = 1 AND sj_t3.id = sj_t2.id
+ )
+ ) _t2t3t4
+ON sj_t1.id = _t2t3t4.id;
+ QUERY PLAN
+-------------------------------------------------------------------------------------
+ Nested Loop
+ Join Filter: (sj_t3.id = sj_t1.id)
+ -> Nested Loop
+ Join Filter: (sj_t2.id = sj_t3.id)
+ -> Nested Loop Semi Join
+ -> Nested Loop
+ -> HashAggregate
+ Group Key: sj_t3.id
+ -> Nested Loop
+ -> Seq Scan on sj_t4
+ -> Materialize
+ -> Bitmap Heap Scan on sj_t3
+ Recheck Cond: (a = 1)
+ -> Bitmap Index Scan on sj_t3_a_id_idx
+ Index Cond: (a = 1)
+ -> Index Only Scan using sj_t2_id_idx on sj_t2 sj_t2_1
+ Index Cond: (id = sj_t3.id)
+ -> Nested Loop
+ -> Index Only Scan using sj_t3_a_id_idx on sj_t3 sj_t3_1
+ Index Cond: ((a = 1) AND (id = sj_t3.id))
+ -> Seq Scan on sj_t4 sj_t4_1
+ -> Index Only Scan using sj_t2_id_idx on sj_t2
+ Index Cond: (id = sj_t2_1.id)
+ -> Seq Scan on sj_t1
+(24 rows)
+
+--
+-- Test RowMarks-related code
+--
+-- Both sides have explicit LockRows marks
+EXPLAIN (COSTS OFF)
+SELECT a1.a FROM sj a1,sj a2 WHERE (a1.a=a2.a) FOR UPDATE;
+ QUERY PLAN
+---------------------------------
+ LockRows
+ -> Seq Scan on sj a2
+ Filter: (a IS NOT NULL)
+(3 rows)
+
+reset enable_hashjoin;
+reset enable_mergejoin;
+--
-- Test hints given on incorrect column references are useful
--
select t1.uunique1 from
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 352abc0bd42..83228cfca29 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -168,10 +168,11 @@ select name, setting from pg_settings where name like 'enable%';
enable_partitionwise_aggregate | off
enable_partitionwise_join | off
enable_presorted_aggregate | on
+ enable_self_join_elimination | on
enable_seqscan | on
enable_sort | on
enable_tidscan | on
-(23 rows)
+(24 rows)
-- There are always wait event descriptions for various types. InjectionPoint
-- may be present or absent, depending on history since last postmaster start.
diff --git a/src/test/regress/sql/equivclass.sql b/src/test/regress/sql/equivclass.sql
index 28ed7910d01..7fc2159349b 100644
--- a/src/test/regress/sql/equivclass.sql
+++ b/src/test/regress/sql/equivclass.sql
@@ -259,6 +259,22 @@ drop user regress_user_ectest;
explain (costs off)
select * from tenk1 where unique1 = unique1 and unique2 = unique2;
+-- Test that broken ECs are processed correctly during self join removal.
+-- Disable merge joins so that we don't get an error about missing commutator.
+-- Test both orientations of the join clause, because only one of them breaks
+-- the EC.
+set enable_mergejoin to off;
+
+explain (costs off)
+ select * from ec0 m join ec0 n on m.ff = n.ff
+ join ec1 p on m.ff + n.ff = p.f1;
+
+explain (costs off)
+ select * from ec0 m join ec0 n on m.ff = n.ff
+ join ec1 p on p.f1::int8 = (m.ff + n.ff)::int8alias1;
+
+reset enable_mergejoin;
+
-- this could be converted, but isn't at present
explain (costs off)
select * from tenk1 where unique1 = unique1 or unique2 = unique2;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index c7349eab933..c29d13b9fed 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -2175,6 +2175,17 @@ select c.id, ss.a from c
left join (select d.a from onerow, d left join b on d.a = b.id) ss
on c.id = ss.a;
+-- check the case when the placeholder relates to an outer join and its
+-- inner in the press field but actually uses only the outer side of the join
+explain (costs off)
+SELECT q.val FROM b LEFT JOIN (
+ SELECT (q1.z IS NOT NULL) AS val
+ FROM b LEFT JOIN (
+ SELECT (t1.b_id IS NOT NULL) AS z FROM a t1 LEFT JOIN a t2 USING (id)
+ ) AS q1
+ ON true
+) AS q ON true;
+
CREATE TEMP TABLE parted_b (id int PRIMARY KEY) partition by range(id);
CREATE TEMP TABLE parted_b1 partition of parted_b for values from (0) to (10);
@@ -2410,6 +2421,489 @@ select * from
int8_tbl x join (int4_tbl x cross join int4_tbl y(ff)) j on q1 = f1; -- ok
--
+-- test that semi- or inner self-joins on a unique column are removed
+--
+
+-- enable only nestloop to get more predictable plans
+set enable_hashjoin to off;
+set enable_mergejoin to off;
+
+create table sj (a int unique, b int, c int unique);
+insert into sj values (1, null, 2), (null, 2, null), (2, 1, 1);
+analyze sj;
+
+-- Trivial self-join case.
+explain (costs off)
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+
+-- Self-join removal performs after a subquery pull-up process and could remove
+-- such kind of self-join too. Check this option.
+explain (costs off)
+select * from sj p
+where exists (select * from sj q
+ where q.a = p.a and q.b < 10);
+select * from sj p
+where exists (select * from sj q
+ where q.a = p.a and q.b < 10);
+
+-- Don't remove self-join for the case of equality of two different unique columns.
+explain (costs off)
+select * from sj t1, sj t2 where t1.a = t2.c and t1.b is not null;
+
+-- Ensure that relations with TABLESAMPLE clauses are not considered as
+-- candidates to be removed
+explain (costs off)
+select * from sj t1
+ join lateral
+ (select * from sj tablesample system(t1.b)) s
+ on t1.a = s.a;
+
+-- Ensure that SJE does not form a self-referential lateral dependency
+explain (costs off)
+select * from sj t1
+ left join lateral
+ (select t1.a as t1a, * from sj t2) s
+ on true
+where t1.a = s.a;
+
+-- Degenerated case.
+explain (costs off)
+select * from
+ (select a as x from sj where false) as q1,
+ (select a as y from sj where false) as q2
+where q1.x = q2.y;
+
+-- We can't use a cross-EC generated self join qual because of current logic of
+-- the generate_join_implied_equalities routine.
+explain (costs off)
+select * from sj t1, sj t2 where t1.a = t1.b and t1.b = t2.b and t2.b = t2.a;
+explain (costs off)
+select * from sj t1, sj t2, sj t3
+where t1.a = t1.b and t1.b = t2.b and t2.b = t2.a and
+ t1.b = t3.b and t3.b = t3.a;
+
+-- Double self-join removal.
+-- Use a condition on "b + 1", not on "b", for the second join, so that
+-- the equivalence class is different from the first one, and we can
+-- test the non-ec code path.
+explain (costs off)
+select *
+from sj t1
+ join sj t2 on t1.a = t2.a and t1.b = t2.b
+ join sj t3 on t2.a = t3.a and t2.b + 1 = t3.b + 1;
+
+-- subselect that references the removed relation
+explain (costs off)
+select t1.a, (select a from sj where a = t2.a and a = t1.a)
+from sj t1, sj t2
+where t1.a = t2.a;
+
+-- self-join under outer join
+explain (costs off)
+select * from sj x join sj y on x.a = y.a
+left join int8_tbl z on x.a = z.q1;
+
+explain (costs off)
+select * from sj x join sj y on x.a = y.a
+left join int8_tbl z on y.a = z.q1;
+
+explain (costs off)
+select * from (
+ select t1.*, t2.a as ax from sj t1 join sj t2
+ on (t1.a = t2.a and t1.c * t1.c = t2.c + 2 and t2.b is null)
+) as q1
+left join
+ (select t3.* from sj t3, sj t4 where t3.c = t4.c) as q2
+on q1.ax = q2.a;
+
+-- Test that placeholders are updated correctly after join removal
+explain (costs off)
+select * from (values (1)) x
+left join (select coalesce(y.q1, 1) from int8_tbl y
+ right join sj j1 inner join sj j2 on j1.a = j2.a
+ on true) z
+on true;
+
+-- Test that references to the removed rel in lateral subqueries are replaced
+-- correctly after join removal
+explain (verbose, costs off)
+select t3.a from sj t1
+ join sj t2 on t1.a = t2.a
+ join lateral (select t1.a offset 0) t3 on true;
+
+explain (verbose, costs off)
+select t3.a from sj t1
+ join sj t2 on t1.a = t2.a
+ join lateral (select * from (select t1.a offset 0) offset 0) t3 on true;
+
+explain (verbose, costs off)
+select t4.a from sj t1
+ join sj t2 on t1.a = t2.a
+ join lateral (select t3.a from sj t3, (select t1.a) offset 0) t4 on true;
+
+-- Check updating of semi_rhs_exprs links from upper-level semi join to
+-- the removing relation
+explain (verbose, costs off)
+select t1.a from sj t1 where t1.b in (
+ select t2.b from sj t2 join sj t3 on t2.c=t3.c);
+
+--
+-- SJE corner case: uniqueness of an inner is [partially] derived from
+-- baserestrictinfo clauses.
+-- XXX: We really should allow SJE for these corner cases?
+--
+
+INSERT INTO sj VALUES (3, 1, 3);
+
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 3;
+-- Return one row
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 3;
+
+-- Remove SJ, define uniqueness by a constant
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 2;
+-- Return one row
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 2;
+
+-- Remove SJ, define uniqueness by a constant expression
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND j1.a = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+ AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = j2.a;
+-- Return one row
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND j1.a = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+ AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = j2.a;
+
+-- Remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 1 AND j2.a = 1;
+-- Return no rows
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 1 AND j2.a = 1;
+
+-- Shuffle a clause. Remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 1 = j1.a AND j2.a = 1;
+-- Return no rows
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 1 = j1.a AND j2.a = 1;
+
+-- SJE Corner case: a 'a.x=a.x' clause, have replaced with 'a.x IS NOT NULL'
+-- after SJ elimination it shouldn't be a mergejoinable clause.
+EXPLAIN (COSTS OFF)
+SELECT t4.*
+FROM (SELECT t1.*, t2.a AS a1 FROM sj t1, sj t2 WHERE t1.b = t2.b) AS t3
+JOIN sj t4 ON (t4.a = t3.a) WHERE t3.a1 = 42;
+SELECT t4.*
+FROM (SELECT t1.*, t2.a AS a1 FROM sj t1, sj t2 WHERE t1.b = t2.b) AS t3
+JOIN sj t4 ON (t4.a = t3.a) WHERE t3.a1 = 42;
+
+-- Functional index
+CREATE UNIQUE INDEX sj_fn_idx ON sj((a * a));
+
+-- Remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+ WHERE j1.b = j2.b AND j1.a*j1.a = 1 AND j2.a*j2.a = 1;
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+ WHERE j1.b = j2.b AND j1.a*j1.a = 1 AND j2.a*j2.a = 2;
+
+-- Restriction contains expressions in both sides, Remove SJ.
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND (j1.a*j1.a) = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+ AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = (j2.a*j2.a);
+-- Empty set of rows should be returned
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND (j1.a*j1.a) = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+ AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = (j2.a*j2.a);
+
+-- Restriction contains volatile function - disable SJE feature.
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND (j1.a*j1.c/3) = (random()/3 + 3)::int
+ AND (random()/3 + 3)::int = (j2.a*j2.c/3);
+-- Return one row
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND (j1.a*j1.c/3) = (random()/3 + 3)::int
+ AND (random()/3 + 3)::int = (j2.a*j2.c/3);
+
+-- Multiple filters
+CREATE UNIQUE INDEX sj_temp_idx1 ON sj(a,b,c);
+
+-- Remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+ WHERE j1.b = j2.b AND j1.a = 2 AND j1.c = 3 AND j2.a = 2 AND 3 = j2.c;
+
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+ SELECT * FROM sj j1, sj j2
+ WHERE j1.b = j2.b AND 2 = j1.a AND j1.c = 3 AND j2.a = 1 AND 3 = j2.c;
+
+CREATE UNIQUE INDEX sj_temp_idx ON sj(a,b);
+
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2;
+
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 2 = j2.a;
+
+-- Don't remove SJ
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND (j1.a = 1 OR j2.a = 1);
+
+DROP INDEX sj_fn_idx, sj_temp_idx1, sj_temp_idx;
+
+-- Test that OR predicated are updated correctly after join removal
+CREATE TABLE tab_with_flag ( id INT PRIMARY KEY, is_flag SMALLINT);
+CREATE INDEX idx_test_is_flag ON tab_with_flag (is_flag);
+
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) FROM tab_with_flag
+WHERE
+ (is_flag IS NULL OR is_flag = 0)
+ AND id IN (SELECT id FROM tab_with_flag WHERE id IN (2, 3));
+DROP TABLE tab_with_flag;
+
+-- HAVING clause
+explain (costs off)
+select p.b from sj p join sj q on p.a = q.a group by p.b having sum(p.a) = 1;
+
+-- update lateral references and range table entry reference
+explain (verbose, costs off)
+select 1 from (select x.* from sj x, sj y where x.a = y.a) q,
+ lateral generate_series(1, q.a) gs(i);
+
+explain (verbose, costs off)
+select 1 from (select y.* from sj x, sj y where x.a = y.a) q,
+ lateral generate_series(1, q.a) gs(i);
+
+-- Test that a non-EC-derived join clause is processed correctly. Use an
+-- outer join so that we can't form an EC.
+explain (costs off) select * from sj p join sj q on p.a = q.a
+ left join sj r on p.a + q.a = r.a;
+
+-- FIXME this constant false filter doesn't look good. Should we merge
+-- equivalence classes?
+explain (costs off)
+select * from sj p, sj q where p.a = q.a and p.b = 1 and q.b = 2;
+
+-- Check that attr_needed is updated correctly after self-join removal. In this
+-- test, the join of j1 with j2 is removed. k1.b is required at either j1 or j2.
+-- If this info is lost, join targetlist for (k1, k2) will not contain k1.b.
+-- Use index scan for k1 so that we don't get 'b' from physical tlist used for
+-- seqscan. Also disable reordering of joins because this test depends on a
+-- particular join tree.
+create table sk (a int, b int);
+create index on sk(a);
+set join_collapse_limit to 1;
+set enable_seqscan to off;
+explain (costs off) select 1 from
+ (sk k1 join sk k2 on k1.a = k2.a)
+ join (sj j1 join sj j2 on j1.a = j2.a) on j1.b = k1.b;
+explain (costs off) select 1 from
+ (sk k1 join sk k2 on k1.a = k2.a)
+ join (sj j1 join sj j2 on j1.a = j2.a) on j2.b = k1.b;
+reset join_collapse_limit;
+reset enable_seqscan;
+
+-- Check that clauses from the join filter list is not lost on the self-join removal
+CREATE TABLE emp1 (id SERIAL PRIMARY KEY NOT NULL, code int);
+EXPLAIN (VERBOSE, COSTS OFF)
+SELECT * FROM emp1 e1, emp1 e2 WHERE e1.id = e2.id AND e2.code <> e1.code;
+
+-- Shuffle self-joined relations. Only in the case of iterative deletion
+-- attempts explains of these queries will be identical.
+CREATE UNIQUE INDEX ON emp1((id*id));
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3
+WHERE c1.id=c2.id AND c1.id*c2.id=c3.id*c3.id;
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3
+WHERE c1.id=c3.id AND c1.id*c3.id=c2.id*c2.id;
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3
+WHERE c3.id=c2.id AND c3.id*c2.id=c1.id*c1.id;
+
+-- Check the usage of a parse tree by the set operations (bug #18170)
+EXPLAIN (COSTS OFF)
+SELECT c1.code FROM emp1 c1 LEFT JOIN emp1 c2 ON c1.id = c2.id
+WHERE c2.id IS NOT NULL
+EXCEPT ALL
+SELECT c3.code FROM emp1 c3;
+
+-- Check that SJE removes references from PHVs correctly
+explain (costs off)
+select * from emp1 t1 left join
+ (select coalesce(t3.code, 1) from emp1 t2
+ left join (emp1 t3 join emp1 t4 on t3.id = t4.id)
+ on true)
+on true;
+
+-- Check that SJE removes the whole PHVs correctly
+explain (verbose, costs off)
+select 1 from emp1 t1 left join
+ ((select 1 as x, * from emp1 t2) s1 inner join
+ (select * from emp1 t3) s2 on s1.id = s2.id)
+ on true
+where s1.x = 1;
+
+-- Check that PHVs do not impose any constraints on removing self joins
+explain (verbose, costs off)
+select * from emp1 t1 join emp1 t2 on t1.id = t2.id left join
+ lateral (select t1.id as t1id, * from generate_series(1,1) t3) s on true;
+
+explain (verbose, costs off)
+select * from generate_series(1,10) t1(id) left join
+ lateral (select t1.id as t1id, t2.id from emp1 t2 join emp1 t3 on t2.id = t3.id)
+on true;
+
+-- Check that SJE replaces join clauses involving the removed rel correctly
+explain (costs off)
+select * from emp1 t1
+ inner join emp1 t2 on t1.id = t2.id
+ left join emp1 t3 on t1.id > 1 and t1.id < 2;
+
+-- Check that SJE doesn't replace the target relation
+EXPLAIN (COSTS OFF)
+WITH t1 AS (SELECT * FROM emp1)
+UPDATE emp1 SET code = t1.code + 1 FROM t1
+WHERE t1.id = emp1.id RETURNING emp1.id, emp1.code, t1.code;
+
+INSERT INTO emp1 VALUES (1, 1), (2, 1);
+
+WITH t1 AS (SELECT * FROM emp1)
+UPDATE emp1 SET code = t1.code + 1 FROM t1
+WHERE t1.id = emp1.id RETURNING emp1.id, emp1.code, t1.code;
+
+TRUNCATE emp1;
+
+EXPLAIN (COSTS OFF)
+UPDATE sj sq SET b = 1 FROM sj as sz WHERE sq.a = sz.a;
+
+CREATE RULE sj_del_rule AS ON DELETE TO sj
+ DO INSTEAD
+ UPDATE sj SET a = 1 WHERE a = old.a;
+EXPLAIN (COSTS OFF) DELETE FROM sj;
+DROP RULE sj_del_rule ON sj CASCADE;
+
+-- Check that SJE does not mistakenly omit qual clauses (bug #18187)
+insert into emp1 values (1, 1);
+explain (costs off)
+select 1 from emp1 full join
+ (select * from emp1 t1 join
+ emp1 t2 join emp1 t3 on t2.id = t3.id
+ on true
+ where false) s on true
+where false;
+select 1 from emp1 full join
+ (select * from emp1 t1 join
+ emp1 t2 join emp1 t3 on t2.id = t3.id
+ on true
+ where false) s on true
+where false;
+
+-- Check that SJE does not mistakenly re-use knowledge of relation uniqueness
+-- made with different set of quals
+insert into emp1 values (2, 1);
+explain (costs off)
+select * from emp1 t1 where exists (select * from emp1 t2
+ where t2.id = t1.code and t2.code > 0);
+select * from emp1 t1 where exists (select * from emp1 t2
+ where t2.id = t1.code and t2.code > 0);
+
+-- We can remove the join even if we find the join can't duplicate rows and
+-- the base quals of each side are different. In the following case we end up
+-- moving quals over to s1 to make it so it can't match any rows.
+create table sl(a int, b int, c int);
+create unique index on sl(a, b);
+vacuum analyze sl;
+
+-- Both sides are unique, but base quals are different
+explain (costs off)
+select * from sl t1, sl t2 where t1.a = t2.a and t1.b = 1 and t2.b = 2;
+
+-- Check NullTest in baserestrictinfo list
+explain (costs off)
+select * from sl t1, sl t2
+where t1.a = t2.a and t1.b = 1 and t2.b = 2
+ and t1.c IS NOT NULL and t2.c IS NOT NULL
+ and t2.b IS NOT NULL and t1.b IS NOT NULL
+ and t1.a IS NOT NULL and t2.a IS NOT NULL;
+explain (verbose, costs off)
+select * from sl t1, sl t2
+where t1.b = t2.b and t2.a = 3 and t1.a = 3
+ and t1.c IS NOT NULL and t2.c IS NOT NULL
+ and t2.b IS NOT NULL and t1.b IS NOT NULL
+ and t1.a IS NOT NULL and t2.a IS NOT NULL;
+
+-- Join qual isn't mergejoinable, but inner is unique.
+EXPLAIN (COSTS OFF)
+SELECT n2.a FROM sj n1, sj n2 WHERE n1.a <> n2.a AND n2.a = 1;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM
+(SELECT n2.a FROM sj n1, sj n2 WHERE n1.a <> n2.a) q0, sl
+WHERE q0.a = 1;
+
+-- Check optimization disabling if it will violate special join conditions.
+-- Two identical joined relations satisfies self join removal conditions but
+-- stay in different special join infos.
+CREATE TABLE sj_t1 (id serial, a int);
+CREATE TABLE sj_t2 (id serial, a int);
+CREATE TABLE sj_t3 (id serial, a int);
+CREATE TABLE sj_t4 (id serial, a int);
+
+CREATE UNIQUE INDEX ON sj_t3 USING btree (a,id);
+CREATE UNIQUE INDEX ON sj_t2 USING btree (id);
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj_t1
+JOIN (
+ SELECT sj_t2.id AS id FROM sj_t2
+ WHERE EXISTS
+ (
+ SELECT TRUE FROM sj_t3,sj_t4 WHERE sj_t3.a = 1 AND sj_t3.id = sj_t2.id
+ )
+ ) t2t3t4
+ON sj_t1.id = t2t3t4.id
+JOIN (
+ SELECT sj_t2.id AS id FROM sj_t2
+ WHERE EXISTS
+ (
+ SELECT TRUE FROM sj_t3,sj_t4 WHERE sj_t3.a = 1 AND sj_t3.id = sj_t2.id
+ )
+ ) _t2t3t4
+ON sj_t1.id = _t2t3t4.id;
+
+--
+-- Test RowMarks-related code
+--
+
+-- Both sides have explicit LockRows marks
+EXPLAIN (COSTS OFF)
+SELECT a1.a FROM sj a1,sj a2 WHERE (a1.a=a2.a) FOR UPDATE;
+
+reset enable_hashjoin;
+reset enable_mergejoin;
+
+--
-- Test hints given on incorrect column references are useful
--