Fix mis-handling of outer join quals generated by EquivalenceClasses.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 23 Feb 2023 16:05:58 +0000 (11:05 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 23 Feb 2023 16:05:58 +0000 (11:05 -0500)
It's possible, in admittedly-rather-contrived cases, for an eclass
to generate a derived "join" qual that constrains the post-outer-join
value(s) of some RHS variable(s) without mentioning the LHS at all.
While the mechanisms were set up to work for this, we fell foul of
the "get_common_eclass_indexes" filter installed by commit 3373c7155:
it could decide that such an eclass wasn't relevant to the join, so
that the required qual clause wouldn't get emitted there or anywhere
else.

To fix, apply get_common_eclass_indexes only at inner joins, where
its rule is still valid.  At an outer join, fall back to examining all
eclasses that mention either input (or the OJ relid, though it should
be impossible for an eclass to mention that without mentioning either
input).  Perhaps we can improve on that later, but the cost/benefit of
adding more complexity to skip some irrelevant eclasses is dubious.

To allow cheaply distinguishing outer from inner joins, pass the
ojrelid to generate_join_implied_equalities as a separate argument.
This also allows cleaning up some sloppiness that had crept into
the definition of its join_relids argument, and it allows accurate
calculation of nominal_join_relids for a child outer join.  (The
latter oversight seems not to have been a live bug, but it certainly
could have caused problems in future.)

Also fix what might be a live bug in check_index_predicates: it was
being sloppy about what it passed to generate_join_implied_equalities.

Per report from Richard Guo.

Discussion: https://postgr.es/m/CAMbWs4-DsTBfOvXuw64GdFss2=M5cwtEhY=0DCS7t2gT7P6hSA@mail.gmail.com

src/backend/optimizer/path/equivclass.c
src/backend/optimizer/path/indxpath.c
src/backend/optimizer/plan/analyzejoins.c
src/backend/optimizer/util/relnode.c
src/include/optimizer/paths.h
src/test/regress/expected/join.out
src/test/regress/sql/join.sql

index faabe4d06521f107288be4226cae879c3931354f..9949d5b1d3b3ca75d7aff4ceb58aa1e714070779 100644 (file)
@@ -1366,15 +1366,19 @@ generate_base_implied_equalities_broken(PlannerInfo *root,
  * commutative duplicates, i.e. if the algorithm selects "a.x = b.y" but
  * we already have "b.y = a.x", we return the existing clause.
  *
- * join_relids should always equal bms_union(outer_relids, inner_rel->relids).
- * We could simplify this function's API by computing it internally, but in
- * most current uses, the caller has the value at hand anyway.
+ * If we are considering an outer join, ojrelid is the associated OJ relid,
+ * otherwise it's zero.
+ *
+ * join_relids should always equal bms_union(outer_relids, inner_rel->relids)
+ * plus ojrelid if that's not zero.  We could simplify this function's API by
+ * computing it internally, but most callers have the value at hand anyway.
  */
 List *
 generate_join_implied_equalities(PlannerInfo *root,
                                 Relids join_relids,
                                 Relids outer_relids,
-                                RelOptInfo *inner_rel)
+                                RelOptInfo *inner_rel,
+                                Index ojrelid)
 {
    List       *result = NIL;
    Relids      inner_relids = inner_rel->relids;
@@ -1392,6 +1396,8 @@ generate_join_implied_equalities(PlannerInfo *root,
        nominal_inner_relids = inner_rel->top_parent_relids;
        /* ECs will be marked with the parent's relid, not the child's */
        nominal_join_relids = bms_union(outer_relids, nominal_inner_relids);
+       if (ojrelid != 0)
+           nominal_join_relids = bms_add_member(nominal_join_relids, ojrelid);
    }
    else
    {
@@ -1400,10 +1406,23 @@ generate_join_implied_equalities(PlannerInfo *root,
    }
 
    /*
-    * Get all eclasses that mention both inner and outer sides of the join
+    * Examine all potentially-relevant eclasses.
+    *
+    * If we are considering an outer join, we must include "join" clauses
+    * that mention either input rel plus the outer join's relid; these
+    * represent post-join filter clauses that have to be applied at this
+    * join.  We don't have infrastructure that would let us identify such
+    * eclasses cheaply, so just fall back to considering all eclasses
+    * mentioning anything in nominal_join_relids.
+    *
+    * At inner joins, we can be smarter: only consider eclasses mentioning
+    * both input rels.
     */
-   matching_ecs = get_common_eclass_indexes(root, nominal_inner_relids,
-                                            outer_relids);
+   if (ojrelid != 0)
+       matching_ecs = get_eclass_indexes_for_relids(root, nominal_join_relids);
+   else
+       matching_ecs = get_common_eclass_indexes(root, nominal_inner_relids,
+                                                outer_relids);
 
    i = -1;
    while ((i = bms_next_member(matching_ecs, i)) >= 0)
@@ -1447,6 +1466,10 @@ generate_join_implied_equalities(PlannerInfo *root,
 /*
  * generate_join_implied_equalities_for_ecs
  *   As above, but consider only the listed ECs.
+ *
+ * For the sole current caller, we can assume ojrelid == 0, that is we are
+ * not interested in outer-join filter clauses.  This might need to change
+ * in future.
  */
 List *
 generate_join_implied_equalities_for_ecs(PlannerInfo *root,
@@ -2970,6 +2993,8 @@ generate_implied_equalities_for_column(PlannerInfo *root,
  * generate_join_implied_equalities().  Note it's OK to occasionally say "yes"
  * incorrectly.  Hence we don't bother with details like whether the lack of a
  * cross-type operator might prevent the clause from actually being generated.
+ * False negatives are not always fatal either: they will discourage, but not
+ * completely prevent, investigation of particular join pathways.
  */
 bool
 have_relevant_eclass_joinclause(PlannerInfo *root,
@@ -2978,7 +3003,16 @@ have_relevant_eclass_joinclause(PlannerInfo *root,
    Bitmapset  *matching_ecs;
    int         i;
 
-   /* Examine only eclasses mentioning both rel1 and rel2 */
+   /*
+    * Examine only eclasses mentioning both rel1 and rel2.
+    *
+    * Note that we do not consider the possibility of an eclass generating
+    * "join" clauses that mention just one of the rels plus an outer join
+    * that could be formed from them.  Although such clauses must be
+    * correctly enforced when we form the outer join, they don't seem like
+    * sufficient reason to prioritize this join over other ones.  The join
+    * ordering rules will force the join to be made when necessary.
+    */
    matching_ecs = get_common_eclass_indexes(root, rel1->relids,
                                             rel2->relids);
 
index 721a075201800997eda3be27271ea76db4487e1d..011a0337dade5ca530c9edc6a218ef8c4addc8b2 100644 (file)
@@ -3349,12 +3349,15 @@ check_index_predicates(PlannerInfo *root, RelOptInfo *rel)
     * relid sets for generate_join_implied_equalities is slightly tricky
     * because the rel could be a child rel rather than a true baserel, and in
     * that case we must subtract its parents' relid(s) from all_query_rels.
+    * Additionally, we mustn't consider clauses that are only computable
+    * after outer joins that can null the rel.
     */
    if (rel->reloptkind == RELOPT_OTHER_MEMBER_REL)
        otherrels = bms_difference(root->all_query_rels,
                                   find_childrel_parents(root, rel));
    else
        otherrels = bms_difference(root->all_query_rels, rel->relids);
+   otherrels = bms_del_members(otherrels, rel->nulling_relids);
 
    if (!bms_is_empty(otherrels))
        clauselist =
@@ -3363,7 +3366,8 @@ check_index_predicates(PlannerInfo *root, RelOptInfo *rel)
                                                         bms_union(rel->relids,
                                                                   otherrels),
                                                         otherrels,
-                                                        rel));
+                                                        rel,
+                                                        0));
 
    /*
     * Normally we remove quals that are implied by a partial index's
index f79bc4430c1d0928b60f9b2d71a8ef1549de1c0b..98cf3494e6fc454f36814b556aa34be1b2e431ca 100644 (file)
@@ -669,7 +669,8 @@ reduce_unique_semijoins(PlannerInfo *root)
            list_concat(generate_join_implied_equalities(root,
                                                         joinrelids,
                                                         sjinfo->min_lefthand,
-                                                        innerrel),
+                                                        innerrel,
+                                                        0),
                        innerrel->joininfo);
 
        /* Test whether the innerrel is unique for those clauses. */
index d29a25f8aa6185d5872e3557386c78b7ae5fd75c..9ad44a0508cc6e7fa9c03d01eca0e1827bd7e3a1 100644 (file)
@@ -47,7 +47,8 @@ static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
 static List *build_joinrel_restrictlist(PlannerInfo *root,
                                        RelOptInfo *joinrel,
                                        RelOptInfo *outer_rel,
-                                       RelOptInfo *inner_rel);
+                                       RelOptInfo *inner_rel,
+                                       SpecialJoinInfo *sjinfo);
 static void build_joinrel_joinlist(RelOptInfo *joinrel,
                                   RelOptInfo *outer_rel,
                                   RelOptInfo *inner_rel);
@@ -667,7 +668,8 @@ build_join_rel(PlannerInfo *root,
            *restrictlist_ptr = build_joinrel_restrictlist(root,
                                                           joinrel,
                                                           outer_rel,
-                                                          inner_rel);
+                                                          inner_rel,
+                                                          sjinfo);
        return joinrel;
    }
 
@@ -779,7 +781,8 @@ build_join_rel(PlannerInfo *root,
     * for set_joinrel_size_estimates().)
     */
    restrictlist = build_joinrel_restrictlist(root, joinrel,
-                                             outer_rel, inner_rel);
+                                             outer_rel, inner_rel,
+                                             sjinfo);
    if (restrictlist_ptr)
        *restrictlist_ptr = restrictlist;
    build_joinrel_joinlist(joinrel, outer_rel, inner_rel);
@@ -1220,6 +1223,7 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
  * 'joinrel' is a join relation node
  * 'outer_rel' and 'inner_rel' are a pair of relations that can be joined
  *     to form joinrel.
+ * 'sjinfo': join context info
  *
  * build_joinrel_restrictlist() returns a list of relevant restrictinfos,
  * whereas build_joinrel_joinlist() stores its results in the joinrel's
@@ -1234,7 +1238,8 @@ static List *
 build_joinrel_restrictlist(PlannerInfo *root,
                           RelOptInfo *joinrel,
                           RelOptInfo *outer_rel,
-                          RelOptInfo *inner_rel)
+                          RelOptInfo *inner_rel,
+                          SpecialJoinInfo *sjinfo)
 {
    List       *result;
    Relids      both_input_relids;
@@ -1260,7 +1265,8 @@ build_joinrel_restrictlist(PlannerInfo *root,
                         generate_join_implied_equalities(root,
                                                          joinrel->relids,
                                                          outer_rel->relids,
-                                                         inner_rel));
+                                                         inner_rel,
+                                                         sjinfo->ojrelid));
 
    return result;
 }
@@ -1543,7 +1549,8 @@ get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel,
                           generate_join_implied_equalities(root,
                                                            joinrelids,
                                                            required_outer,
-                                                           baserel));
+                                                           baserel,
+                                                           0));
 
    /* Compute set of serial numbers of the enforced clauses */
    pserials = NULL;
@@ -1665,7 +1672,8 @@ get_joinrel_parampathinfo(PlannerInfo *root, RelOptInfo *joinrel,
    eclauses = generate_join_implied_equalities(root,
                                                join_and_req,
                                                required_outer,
-                                               joinrel);
+                                               joinrel,
+                                               0);
    /* We only want ones that aren't movable to lower levels */
    dropped_ecs = NIL;
    foreach(lc, eclauses)
index 736d78ea4c3e36028935c6827ef5300c7162cf3e..5b9db7733d2edc275472606b68d4f081677abef6 100644 (file)
@@ -149,7 +149,8 @@ extern void generate_base_implied_equalities(PlannerInfo *root);
 extern List *generate_join_implied_equalities(PlannerInfo *root,
                                              Relids join_relids,
                                              Relids outer_relids,
-                                             RelOptInfo *inner_rel);
+                                             RelOptInfo *inner_rel,
+                                             Index ojrelid);
 extern List *generate_join_implied_equalities_for_ecs(PlannerInfo *root,
                                                      List *eclasses,
                                                      Relids join_relids,
index 5a2756b333c459185c109e71374957ac85e8c34b..e293de03c07c61ce5d9af4c0aca870d3f075d3ca 100644 (file)
@@ -4100,6 +4100,38 @@ select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand)
 ---------+---------+---------+----------
 (0 rows)
 
+-- related case
+explain (costs off)
+select * from int8_tbl t1 left join int8_tbl t2 on t1.q2 = t2.q1,
+  lateral (select * from int8_tbl t3 where t2.q1 = t2.q2) ss;
+                QUERY PLAN                 
+-------------------------------------------
+ Nested Loop
+   ->  Hash Left Join
+         Hash Cond: (t1.q2 = t2.q1)
+         Filter: (t2.q1 = t2.q2)
+         ->  Seq Scan on int8_tbl t1
+         ->  Hash
+               ->  Seq Scan on int8_tbl t2
+   ->  Seq Scan on int8_tbl t3
+(8 rows)
+
+select * from int8_tbl t1 left join int8_tbl t2 on t1.q2 = t2.q1,
+  lateral (select * from int8_tbl t3 where t2.q1 = t2.q2) ss;
+        q1        |        q2        |        q1        |        q2        |        q1        |        q2         
+------------------+------------------+------------------+------------------+------------------+-------------------
+              123 | 4567890123456789 | 4567890123456789 | 4567890123456789 |              123 |               456
+              123 | 4567890123456789 | 4567890123456789 | 4567890123456789 |              123 |  4567890123456789
+              123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 |               123
+              123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 |  4567890123456789
+              123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789
+ 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 |              123 |               456
+ 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 |              123 |  4567890123456789
+ 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 |               123
+ 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 |  4567890123456789
+ 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789
+(10 rows)
+
 --
 -- check handling of join aliases when flattening multiple levels of subquery
 --
index a38034bce7472586dee01e750693e274766f6b7f..5c0328ed764d331d5a25676b0ebc2438e4a557cb 100644 (file)
@@ -1386,6 +1386,15 @@ select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand)
   from tenk1 a left join tenk1 b on b.thousand = a.unique1                        left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand)
   where a.unique2 < 10 and coalesce(b.twothousand, a.twothousand) = 44;
 
+-- related case
+
+explain (costs off)
+select * from int8_tbl t1 left join int8_tbl t2 on t1.q2 = t2.q1,
+  lateral (select * from int8_tbl t3 where t2.q1 = t2.q2) ss;
+
+select * from int8_tbl t1 left join int8_tbl t2 on t1.q2 = t2.q1,
+  lateral (select * from int8_tbl t3 where t2.q1 = t2.q2) ss;
+
 --
 -- check handling of join aliases when flattening multiple levels of subquery
 --