diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index e7cb3fede66..d6d35d3430d 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -1458,6 +1458,10 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, int varno; Relids clause_varnos; Relids upper_varnos; + ListCell *lc; + List *newWhere = NIL; + List *all_clauses = NIL; + bool upper_reference_exists = false; Assert(sublink->subLinkType == EXISTS_SUBLINK); @@ -1487,32 +1491,71 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, if (!simplify_EXISTS_query(root, subselect)) return NULL; - /* - * Separate out the WHERE clause. (We could theoretically also remove - * top-level plain JOIN/ON clauses, but it's probably not worth the - * trouble.) - */ - whereClause = subselect->jointree->quals; + if (subselect->jointree->quals) + all_clauses = lappend(all_clauses, subselect->jointree->quals); + subselect->jointree->quals = NULL; - /* - * The rest of the sub-select must not refer to any Vars of the parent - * query. (Vars of higher levels should be okay, though.) - */ - if (contain_vars_of_level((Node *) subselect, 1)) - return NULL; + /* Gather all clauses in main list for the further consideration */ + all_clauses = list_concat(all_clauses, subselect->jointree->fromlist); /* - * On the other hand, the WHERE clause must contain some Vars of the - * parent query, else it's not gonna be a join. + * We will able to remove top-level plain JOIN/ON clauses if they are not outer join. */ - if (!contain_vars_of_level(whereClause, 1)) - return NULL; + foreach (lc, all_clauses) + { + Node *je = ((Node *) lfirst(lc)); + + whereClause = copyObject(je); + + if (IsA(whereClause, RangeTblRef)) + continue; + + if (IsA(whereClause, JoinExpr)) + { + if (((JoinExpr *) whereClause)->jointype != JOIN_INNER) + { + /* + * Clauses must not refer to any Vars of the parent + * query. (Vars of higher levels should be okay, though.) + */ + if (contain_vars_of_level(whereClause, 1)) + return NULL; + else + continue; + } + else if (((JoinExpr *) whereClause)->quals != NULL) + whereClause = ((JoinExpr *) whereClause)->quals; + } + + /* + * We don't risk optimizing if the WHERE clause is volatile, either. + */ + if (contain_volatile_functions(whereClause)) + return NULL; + + /* + * Clean up the WHERE clause by doing const-simplification etc on it. + */ + whereClause = eval_const_expressions(root, whereClause); + whereClause = (Node *) canonicalize_qual((Expr *) whereClause, false); + + if(!IsA(whereClause, JoinExpr)) + newWhere = lappend(newWhere, whereClause); + else + return NULL; + + if (contain_vars_of_level((Node *) whereClause, 1)) + upper_reference_exists = true; + } + + list_free(all_clauses); /* - * We don't risk optimizing if the WHERE clause is volatile, either. + * There are no WHERE clause containing some Vars of the + * parent query, so it's not gonna be a join. */ - if (contain_volatile_functions(whereClause)) + if(!upper_reference_exists) return NULL; /* @@ -1537,7 +1580,6 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, */ rtoffset = list_length(parse->rtable); OffsetVarNodes((Node *) subselect, rtoffset, 0); - OffsetVarNodes(whereClause, rtoffset, 0); /* * Upper-level vars in subquery will now be one level closer to their @@ -1545,7 +1587,9 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, * becomes level zero. */ IncrementVarSublevelsUp((Node *) subselect, -1, 1); - IncrementVarSublevelsUp(whereClause, -1, 1); + + OffsetVarNodes((Node *) newWhere, rtoffset, 0); + IncrementVarSublevelsUp((Node *) newWhere, -1, 1); /* * Now that the WHERE clause is adjusted to match the parent query @@ -1553,7 +1597,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, * The ones <= rtoffset belong to the upper query; the ones > rtoffset do * not. */ - clause_varnos = pull_varnos(root, whereClause); + clause_varnos = pull_varnos(root, (Node *) newWhere); upper_varnos = NULL; varno = -1; while ((varno = bms_next_member(clause_varnos, varno)) >= 0) @@ -1571,6 +1615,31 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, if (!bms_is_subset(upper_varnos, available_rels)) return NULL; + /* + * In case of a successful attempt, replaces it with the correct condition. + * We were sure that inner relations are independent, so we confidently + * can replace their join condition on true. + */ + foreach(lc, subselect->jointree->fromlist) + { + Node *node = lfirst(lc); + + if (IsA(node, RangeTblRef)) + continue; + + if ((IsA(node, JoinExpr) && ((JoinExpr *)node)->jointype != JOIN_INNER)) + continue; + + if (IsA(node, JoinExpr) && ((JoinExpr *) node)->quals != NULL) + ((JoinExpr *) node)->quals = (Node *) makeConst(BOOLOID, + -1, + InvalidOid, + sizeof(bool), + (Datum) 1, + false, + true); + } + /* * Now we can attach the modified subquery rtable to the parent. This also * adds subquery's RTEPermissionInfos into the upper query. @@ -1592,7 +1661,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, result->rarg = (Node *) subselect->jointree; result->usingClause = NIL; result->join_using_alias = NULL; - result->quals = whereClause; + result->quals = (Node *) make_ands_explicit(newWhere); result->alias = NULL; result->rtindex = 0; /* we don't need an RTE for it */ diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 40d8056fcea..965c609b045 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -813,6 +813,643 @@ where exists ( where road.name = ss.f1 ); rollback; -- +-- Test case for exist sublink where we can consider some undependent expression +-- with outer link +-- +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id +); + QUERY PLAN +------------------------------------------------- + Nested Loop Semi Join + -> Seq Scan on ta + -> Nested Loop + -> Index Only Scan using tb_pkey on tb + Index Cond: (id = ta.id) + -> Seq Scan on tc +(6 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tc.id +); + QUERY PLAN +------------------------------------------------- + Nested Loop Semi Join + -> Seq Scan on ta + -> Nested Loop + -> Index Only Scan using tc_pkey on tc + Index Cond: (id = ta.id) + -> Seq Scan on tb +(6 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON 1 = 1 + WHERE ta.id = tc.id +); + QUERY PLAN +------------------------------------------------- + Nested Loop Semi Join + -> Seq Scan on ta + -> Nested Loop + -> Index Only Scan using tc_pkey on tc + Index Cond: (id = ta.id) + -> Seq Scan on tb +(6 rows) + +-- Join compound expression +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tc.id + AND ta.id = tb.id +); + QUERY PLAN +------------------------------------ + Hash Right Semi Join + Hash Cond: (tc.id = ta.id) + -> Hash Join + Hash Cond: (tb.id = tc.id) + -> Seq Scan on tb + -> Hash + -> Seq Scan on tc + -> Hash + -> Seq Scan on ta +(9 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta ta1 +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON tb.aval = tc.aid + AND tb.aval = ta1.id +); + QUERY PLAN +---------------------------------------------------- + Hash Join + Hash Cond: (ta1.id = tb.aval) + -> Seq Scan on ta ta1 + -> Hash + -> HashAggregate + Group Key: tb.aval + -> Merge Join + Merge Cond: (tb.aval = tc.aid) + -> Sort + Sort Key: tb.aval + -> Seq Scan on tb + -> Sort + Sort Key: tc.aid + -> Seq Scan on tc +(14 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +JOIN tb ON true +WHERE EXISTS ( + SELECT 1 + FROM tb tb1 + JOIN tc ON ta.id = tb.id +); + QUERY PLAN +------------------------------------ + Nested Loop Semi Join + -> Hash Join + Hash Cond: (ta.id = tb.id) + -> Seq Scan on ta + -> Hash + -> Seq Scan on tb + -> Nested Loop + -> Seq Scan on tb tb1 + -> Materialize + -> Seq Scan on tc +(10 rows) + +-- Compound expression with const type or other type of expressions +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tc.id + AND ta.id = 1 +); + QUERY PLAN +------------------------------------------------- + Nested Loop Semi Join + -> Index Only Scan using ta_pkey on ta + Index Cond: (id = 1) + -> Nested Loop + -> Index Only Scan using tc_pkey on tc + Index Cond: (id = 1) + -> Seq Scan on tb +(7 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tc.id + AND tb.id = 1 +); + QUERY PLAN +------------------------------------------------- + Hash Right Semi Join + Hash Cond: (tc.id = ta.id) + -> Nested Loop + -> Index Only Scan using tb_pkey on tb + Index Cond: (id = 1) + -> Seq Scan on tc + -> Hash + -> Seq Scan on ta +(8 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + RIGHT JOIN tc ON ta.id = tc.id + WHERE ta.val = 1 +); + QUERY PLAN +---------------------------------------------- + Seq Scan on ta + Filter: EXISTS(SubPlan 1) + SubPlan 1 + -> Result + One-Time Filter: (ta.val = 1) + -> Nested Loop Left Join + Join Filter: (ta.id = tc.id) + -> Seq Scan on tc + -> Materialize + -> Seq Scan on tb +(10 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id + AND tb.aval = ANY ('{1}'::int[]) +); + QUERY PLAN +------------------------------------------------------------------------- + Hash Join + Hash Cond: (ta.id = tb.id) + -> Seq Scan on ta + -> Hash + -> HashAggregate + Group Key: tb.id + -> Nested Loop + -> Seq Scan on tc + -> Materialize + -> Seq Scan on tb + Filter: (aval = ANY ('{1}'::integer[])) +(11 rows) + +-- Exists SubLink expression within expression +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta ta1 +WHERE EXISTS ( + SELECT 1 + FROM ta + JOIN tb ON ta.id = ta1.id + AND ta1.val = 1 + WHERE EXISTS ( + SELECT 1 + FROM ta ta2 + WHERE ta2.id = ta1.id + ) +); + QUERY PLAN +------------------------------------------------- + Nested Loop Semi Join + -> Seq Scan on ta ta2 + Filter: (val = 1) + -> Nested Loop + -> Index Only Scan using ta_pkey on ta + Index Cond: (id = ta2.id) + -> Seq Scan on tb +(7 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta ta1 +WHERE EXISTS ( + SELECT 1 + FROM ta + JOIN tb ON ta.val = ta1.id + AND ta1.id = 1 + WHERE EXISTS ( + SELECT 1 + FROM ta ta2 + WHERE ta2.id = ta.id + ) +); + QUERY PLAN +----------------------------------------------- + Nested Loop Semi Join + -> Index Only Scan using ta_pkey on ta ta1 + Index Cond: (id = 1) + -> Nested Loop + -> Seq Scan on tb + -> Materialize + -> Seq Scan on ta ta2 + Filter: (val = 1) +(8 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + WHERE ta.id = tb.id + AND EXISTS ( + SELECT 1 + FROM tc + WHERE tc.id = tb.id + AND tc.aid + tb.aval > 0 + ) +); + QUERY PLAN +----------------------------------------------------- + Hash Semi Join + Hash Cond: (ta.id = tc.id) + -> Seq Scan on ta + -> Hash + -> Hash Join + Hash Cond: (tb.id = tc.id) + Join Filter: ((tc.aid + tb.aval) > 0) + -> Seq Scan on tb + -> Hash + -> Seq Scan on tc +(10 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + WHERE ta.id = tb.id + AND EXISTS ( + SELECT 1 + FROM tc + WHERE tc.id = tb.id + AND tc.aid + ta.val > 0 + ) +); + QUERY PLAN +---------------------------------------- + Hash Join + Hash Cond: (ta.id = tb.id) + Join Filter: EXISTS(SubPlan 1) + -> Seq Scan on ta + -> Hash + -> Seq Scan on tb + SubPlan 1 + -> Index Scan using tc_pkey on tc + Index Cond: (id = tb.id) + Filter: ((aid + ta.val) > 0) +(10 rows) + +-- Check with NULL and NOT NULL expressions +ALTER TABLE ta ADD COLUMN is_active bool; +UPDATE ta SET is_active = true; +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id + AND COALESCE(ta.is_active, true) +); + QUERY PLAN +------------------------------------------------- + Nested Loop Semi Join + -> Seq Scan on ta + Filter: COALESCE(is_active, true) + -> Nested Loop + -> Index Only Scan using tb_pkey on tb + Index Cond: (id = ta.id) + -> Seq Scan on tc +(7 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM tb +WHERE EXISTS ( + SELECT 1 + FROM ta + JOIN tc ON ta.id = tb.id + AND COALESCE(ta.is_active, true) +); + QUERY PLAN +------------------------------------------------------------------- + Hash Join + Hash Cond: (tb.id = ta.id) + -> Seq Scan on tb + -> Hash + -> HashAggregate + Group Key: ta.id + -> Nested Loop + -> Seq Scan on tc + -> Materialize + -> Seq Scan on ta + Filter: COALESCE(is_active, true) +(11 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id + AND CASE + WHEN ta.is_active THEN true + ELSE false + END = true +); + QUERY PLAN +-------------------------------------------------------------- + Nested Loop Semi Join + -> Seq Scan on ta + Filter: CASE WHEN is_active THEN true ELSE false END + -> Nested Loop + -> Index Only Scan using tb_pkey on tb + Index Cond: (id = ta.id) + -> Seq Scan on tc +(7 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM tb +WHERE EXISTS ( + SELECT 1 + FROM ta + JOIN tc ON ta.id = tb.id + AND CASE + WHEN ta.is_active THEN true + ELSE false + END = true +); + QUERY PLAN +-------------------------------------------------------------------------------------- + Hash Join + Hash Cond: (tb.id = ta.id) + -> Seq Scan on tb + -> Hash + -> HashAggregate + Group Key: ta.id + -> Nested Loop + -> Seq Scan on tc + -> Materialize + -> Seq Scan on ta + Filter: CASE WHEN is_active THEN true ELSE false END +(11 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id + AND ta.is_active +); + QUERY PLAN +------------------------------------------------- + Nested Loop Semi Join + -> Seq Scan on ta + Filter: is_active + -> Nested Loop + -> Index Only Scan using tb_pkey on tb + Index Cond: (id = ta.id) + -> Seq Scan on tc +(7 rows) + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id + AND ta.is_active IS NOT NULL +); + QUERY PLAN +------------------------------------------------- + Nested Loop Semi Join + -> Seq Scan on ta + Filter: (is_active IS NOT NULL) + -> Nested Loop + -> Index Only Scan using tb_pkey on tb + Index Cond: (id = ta.id) + -> Seq Scan on tc +(7 rows) + +-- Disabled pull up because it is applcapable for INNER JOIN connection +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + RIGHT JOIN tc ON ta.id = tc.id +); + QUERY PLAN +---------------------------------------- + Seq Scan on ta + Filter: EXISTS(SubPlan 1) + SubPlan 1 + -> Nested Loop Left Join + Join Filter: (ta.id = tc.id) + -> Seq Scan on tc + -> Materialize + -> Seq Scan on tb +(8 rows) + +-- Disable pull-up due to lack of the outer var +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON tc.id = tb.id +); + QUERY PLAN +--------------------------------------------------- + Result + One-Time Filter: (InitPlan 1).col1 + InitPlan 1 + -> Nested Loop + -> Seq Scan on tb + -> Index Only Scan using tc_pkey on tc + Index Cond: (id = tb.id) + -> Seq Scan on ta +(8 rows) + +CREATE TABLE td (id int, tc_id bytea, val int); +INSERT INTO td +SELECT g.id, 'Test1'::bytea AS tc_id, 6 AS val +FROM generate_series(1, 25) AS g(id) +UNION ALL +SELECT g.id, 'Test2'::bytea AS tc_id, 7 AS val +FROM generate_series(26, 50) AS g(id) +UNION ALL +SELECT g.id, 'Test4'::bytea AS tc_id, 6 AS val +FROM generate_series(51, 75) AS g(id) +UNION ALL +SELECT g.id, 'Test5'::bytea AS tc_id, 7 AS val +FROM generate_series(76, 100) AS g(id); +CREATE TABLE te (id int, tc_id bytea, val int); +INSERT INTO te SELECT * FROM td; +EXPLAIN (COSTS OFF) +SELECT t1.* +FROM ta t1 +WHERE EXISTS ( + SELECT 1 + FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL + JOIN tb t2 ON t2.id = t1.id + WHERE EXISTS ( + SELECT 1 + FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL + JOIN td t3 ON t3.tc_id IN ('Test1'::bytea, 'Test2'::bytea) + WHERE EXISTS ( + SELECT 1 + FROM te t4 + WHERE t4.tc_id = t3.tc_id + AND t4.val = t2.aval + ) = EXISTS ( + SELECT 1 + FROM tc t5 + WHERE t5.id = t3.id + ) + ) +); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------- + Merge Semi Join + Merge Cond: (t1.id = t2.id) + -> Index Scan using ta_pkey on ta t1 + -> Nested Loop Semi Join + Join Filter: ((ANY ((t3.tc_id = (hashed SubPlan 2).col1) AND (t2.aval = (hashed SubPlan 2).col2))) = (ANY (t3.id = (hashed SubPlan 4).col1))) + -> Index Scan using tb_pkey on tb t2 + -> Materialize + -> Seq Scan on td t3 + Filter: (tc_id = ANY ('{"\\x5465737431","\\x5465737432"}'::bytea[])) + SubPlan 2 + -> Seq Scan on te t4 + SubPlan 4 + -> Seq Scan on tc t5 +(13 rows) + +DROP TABLE td, te; +CREATE TABLE tst1 (id int, mes varchar(20)); +CREATE TABLE tst2 (id int, tst1_id int, type_id int); +CREATE TABLE tst3 (id bytea); +CREATE TABLE tst4 (id int, tst3_id bytea, type_id int); +CREATE TABLE tst5 (id int, tst3_id bytea, type_id int); +INSERT INTO tst1 VALUES (1, 'test1'); +INSERT INTO tst1 VALUES (2, 'test2'); +INSERT INTO tst1 VALUES (3, 'test3'); +INSERT INTO tst2 VALUES (1, 2, 7); +INSERT INTO tst2 VALUES (1, 2, 6); +INSERT INTO tst2 VALUES (2, 3, 7); +INSERT INTO tst3 +SELECT ('Test' || g.id)::bytea AS id + FROM generate_series(1, 5) AS g(id); +INSERT INTO tst4 +(SELECT g.id, 'Test1'::bytea AS tst3_id, 6 AS type_id + FROM generate_series(1, 25) AS g(id)) +UNION ALL +(SELECT g.id, 'Test2'::bytea AS tst3_id, 7 AS type_id + FROM generate_series(26, 50) AS g(id)) +UNION ALL +(SELECT g.id, 'Test4'::bytea AS tst3_id, 6 AS type_id + FROM generate_series(51, 75) AS g(id)) +UNION ALL +(SELECT g.id, 'Test5'::bytea AS tst3_id, 7 AS type_id + FROM generate_series(76, 100) AS g(id)); +INSERT INTO tst5 SELECT * FROM tst4; +VACUUM (ANALYZE) tst1, tst2, tst3, tst4, tst5; +-- Case with two exists in OpExpr, in the first one t3.id is the reference to the parent query +-- and t2.type-id is the reference to grandparent query +EXPLAIN (COSTS OFF) +SELECT t1.* +FROM tst1 t1 +WHERE EXISTS ( + SELECT 1 + FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL + JOIN tst2 t2 ON t2.tst1_id = t1.id + WHERE EXISTS ( + SELECT 1 + FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL + JOIN tst3 t3 ON t3.id IN ('Test1'::bytea, 'Test2'::bytea) + WHERE EXISTS ( + SELECT 1 + FROM tst4 t4 + WHERE t4.tst3_id = t3.id + AND t4.type_id = t2.type_id + ) = EXISTS ( + SELECT 1 + FROM tst5 t5 + WHERE t5.tst3_id = t3.id + ) + ) +); + QUERY PLAN +--------------------------------------------------------------------------------------- + Hash Right Semi Join + Hash Cond: (t2.tst1_id = t1.id) + -> Nested Loop Semi Join + Join Filter: (EXISTS(SubPlan 1) = EXISTS(SubPlan 3)) + -> Seq Scan on tst2 t2 + -> Materialize + -> Seq Scan on tst3 t3 + Filter: (id = ANY ('{"\\x5465737431","\\x5465737432"}'::bytea[])) + SubPlan 1 + -> Seq Scan on tst4 t4 + Filter: ((tst3_id = t3.id) AND (type_id = t2.type_id)) + SubPlan 3 + -> Seq Scan on tst5 t5 + Filter: (tst3_id = t3.id) + -> Hash + -> Seq Scan on tst1 t1 +(16 rows) + +DROP TABLE tst1, tst2, tst3, tst4, tst5; -- Test case for sublinks pushed down into subselects via join alias expansion -- select diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index fec38ef85a6..84f0f2101bb 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -440,6 +440,373 @@ where exists ( rollback; -- +-- Test case for exist sublink where we can consider some undependent expression +-- with outer link +-- + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tc.id +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON 1 = 1 + WHERE ta.id = tc.id +); + +-- Join compound expression +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tc.id + AND ta.id = tb.id +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta ta1 +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON tb.aval = tc.aid + AND tb.aval = ta1.id +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +JOIN tb ON true +WHERE EXISTS ( + SELECT 1 + FROM tb tb1 + JOIN tc ON ta.id = tb.id +); + +-- Compound expression with const type or other type of expressions +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tc.id + AND ta.id = 1 +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tc.id + AND tb.id = 1 +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + RIGHT JOIN tc ON ta.id = tc.id + WHERE ta.val = 1 +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id + AND tb.aval = ANY ('{1}'::int[]) +); + +-- Exists SubLink expression within expression +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta ta1 +WHERE EXISTS ( + SELECT 1 + FROM ta + JOIN tb ON ta.id = ta1.id + AND ta1.val = 1 + WHERE EXISTS ( + SELECT 1 + FROM ta ta2 + WHERE ta2.id = ta1.id + ) +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta ta1 +WHERE EXISTS ( + SELECT 1 + FROM ta + JOIN tb ON ta.val = ta1.id + AND ta1.id = 1 + WHERE EXISTS ( + SELECT 1 + FROM ta ta2 + WHERE ta2.id = ta.id + ) +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + WHERE ta.id = tb.id + AND EXISTS ( + SELECT 1 + FROM tc + WHERE tc.id = tb.id + AND tc.aid + tb.aval > 0 + ) +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + WHERE ta.id = tb.id + AND EXISTS ( + SELECT 1 + FROM tc + WHERE tc.id = tb.id + AND tc.aid + ta.val > 0 + ) +); + +-- Check with NULL and NOT NULL expressions +ALTER TABLE ta ADD COLUMN is_active bool; +UPDATE ta SET is_active = true; + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id + AND COALESCE(ta.is_active, true) +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM tb +WHERE EXISTS ( + SELECT 1 + FROM ta + JOIN tc ON ta.id = tb.id + AND COALESCE(ta.is_active, true) +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id + AND CASE + WHEN ta.is_active THEN true + ELSE false + END = true +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM tb +WHERE EXISTS ( + SELECT 1 + FROM ta + JOIN tc ON ta.id = tb.id + AND CASE + WHEN ta.is_active THEN true + ELSE false + END = true +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id + AND ta.is_active +); + +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON ta.id = tb.id + AND ta.is_active IS NOT NULL +); + + +-- Disabled pull up because it is applcapable for INNER JOIN connection +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + RIGHT JOIN tc ON ta.id = tc.id +); + +-- Disable pull-up due to lack of the outer var +EXPLAIN (COSTS OFF) +SELECT 1 +FROM ta +WHERE EXISTS ( + SELECT 1 + FROM tb + JOIN tc ON tc.id = tb.id +); + +CREATE TABLE td (id int, tc_id bytea, val int); + +INSERT INTO td +SELECT g.id, 'Test1'::bytea AS tc_id, 6 AS val +FROM generate_series(1, 25) AS g(id) + +UNION ALL + +SELECT g.id, 'Test2'::bytea AS tc_id, 7 AS val +FROM generate_series(26, 50) AS g(id) + +UNION ALL + +SELECT g.id, 'Test4'::bytea AS tc_id, 6 AS val +FROM generate_series(51, 75) AS g(id) + +UNION ALL + +SELECT g.id, 'Test5'::bytea AS tc_id, 7 AS val +FROM generate_series(76, 100) AS g(id); + +CREATE TABLE te (id int, tc_id bytea, val int); +INSERT INTO te SELECT * FROM td; + +EXPLAIN (COSTS OFF) +SELECT t1.* +FROM ta t1 +WHERE EXISTS ( + SELECT 1 + FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL + JOIN tb t2 ON t2.id = t1.id + WHERE EXISTS ( + SELECT 1 + FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL + JOIN td t3 ON t3.tc_id IN ('Test1'::bytea, 'Test2'::bytea) + WHERE EXISTS ( + SELECT 1 + FROM te t4 + WHERE t4.tc_id = t3.tc_id + AND t4.val = t2.aval + ) = EXISTS ( + SELECT 1 + FROM tc t5 + WHERE t5.id = t3.id + ) + ) +); + +DROP TABLE td, te; + +CREATE TABLE tst1 (id int, mes varchar(20)); +CREATE TABLE tst2 (id int, tst1_id int, type_id int); +CREATE TABLE tst3 (id bytea); +CREATE TABLE tst4 (id int, tst3_id bytea, type_id int); +CREATE TABLE tst5 (id int, tst3_id bytea, type_id int); + +INSERT INTO tst1 VALUES (1, 'test1'); +INSERT INTO tst1 VALUES (2, 'test2'); +INSERT INTO tst1 VALUES (3, 'test3'); +INSERT INTO tst2 VALUES (1, 2, 7); +INSERT INTO tst2 VALUES (1, 2, 6); +INSERT INTO tst2 VALUES (2, 3, 7); + +INSERT INTO tst3 +SELECT ('Test' || g.id)::bytea AS id + FROM generate_series(1, 5) AS g(id); + +INSERT INTO tst4 +(SELECT g.id, 'Test1'::bytea AS tst3_id, 6 AS type_id + FROM generate_series(1, 25) AS g(id)) +UNION ALL +(SELECT g.id, 'Test2'::bytea AS tst3_id, 7 AS type_id + FROM generate_series(26, 50) AS g(id)) +UNION ALL +(SELECT g.id, 'Test4'::bytea AS tst3_id, 6 AS type_id + FROM generate_series(51, 75) AS g(id)) +UNION ALL +(SELECT g.id, 'Test5'::bytea AS tst3_id, 7 AS type_id + FROM generate_series(76, 100) AS g(id)); +INSERT INTO tst5 SELECT * FROM tst4; + +VACUUM (ANALYZE) tst1, tst2, tst3, tst4, tst5; + +-- Case with two exists in OpExpr, in the first one t3.id is the reference to the parent query +-- and t2.type-id is the reference to grandparent query +EXPLAIN (COSTS OFF) +SELECT t1.* +FROM tst1 t1 +WHERE EXISTS ( + SELECT 1 + FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL + JOIN tst2 t2 ON t2.tst1_id = t1.id + WHERE EXISTS ( + SELECT 1 + FROM (SELECT 1 AS SDBL_DUMMY) SDBL_DUAL + JOIN tst3 t3 ON t3.id IN ('Test1'::bytea, 'Test2'::bytea) + WHERE EXISTS ( + SELECT 1 + FROM tst4 t4 + WHERE t4.tst3_id = t3.id + AND t4.type_id = t2.type_id + ) = EXISTS ( + SELECT 1 + FROM tst5 t5 + WHERE t5.tst3_id = t3.id + ) + ) +); + +DROP TABLE tst1, tst2, tst3, tst4, tst5; + -- Test case for sublinks pushed down into subselects via join alias expansion --