Fix join removal logic to clean up sub-RestrictInfos of OR clauses.
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 10 Feb 2023 19:52:36 +0000 (14:52 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 10 Feb 2023 19:52:36 +0000 (14:52 -0500)
analyzejoins.c took care to clean out removed relids from the
clause_relids and required_relids of RestrictInfos associated with
the doomed rel ... but it paid no attention to the fact that if such a
RestrictInfo contains an OR clause, there will be sub-RestrictInfos
containing similar fields.

I'm more than a bit surprised that this oversight hasn't caused
visible problems before.  In any case, it's certainly broken now,
so add logic to clean out the sub-RestrictInfos recursively.
We might need to back-patch this someday.

Per bug #17786 from Robins Tharakan.

Discussion: https://postgr.es/m/17786-f1ea7fbdab97daec@postgresql.org

src/backend/optimizer/plan/analyzejoins.c
src/test/regress/expected/join.out
src/test/regress/sql/join.sql

index 283539e53c404eb5748de0e244c98ec6f35739aa..7fd11df9afb2297f6dc19b7f67aca9be26eec3ed 100644 (file)
@@ -29,6 +29,7 @@
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/planmain.h"
+#include "optimizer/restrictinfo.h"
 #include "optimizer/tlist.h"
 #include "utils/lsyscache.h"
 
@@ -36,6 +37,8 @@
 static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
 static void remove_rel_from_query(PlannerInfo *root, int relid, int ojrelid,
                                                                  Relids joinrelids);
+static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
+                                                                                int relid, int ojrelid);
 static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
 static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
 static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
@@ -470,24 +473,12 @@ remove_rel_from_query(PlannerInfo *root, int relid, int ojrelid,
                {
                        /*
                         * There might be references to relid or ojrelid in the
-                        * clause_relids as a consequence of PHVs having ph_eval_at sets
+                        * RestrictInfo, as a consequence of PHVs having ph_eval_at sets
                         * that include those.  We already checked above that any such PHV
                         * is safe, so we can just drop those references.
-                        *
-                        * The clause_relids probably aren't shared with anything else,
-                        * but let's copy them just to be sure.
                         */
-                       rinfo->clause_relids = bms_copy(rinfo->clause_relids);
-                       rinfo->clause_relids = bms_del_member(rinfo->clause_relids,
-                                                                                                 relid);
-                       rinfo->clause_relids = bms_del_member(rinfo->clause_relids,
-                                                                                                 ojrelid);
-                       /* Likewise for required_relids */
-                       rinfo->required_relids = bms_copy(rinfo->required_relids);
-                       rinfo->required_relids = bms_del_member(rinfo->required_relids,
-                                                                                                       relid);
-                       rinfo->required_relids = bms_del_member(rinfo->required_relids,
-                                                                                                       ojrelid);
+                       remove_rel_from_restrictinfo(rinfo, relid, ojrelid);
+                       /* Now throw it back into the joininfo lists */
                        distribute_restrictinfo_to_rels(root, rinfo);
                }
        }
@@ -498,6 +489,62 @@ remove_rel_from_query(PlannerInfo *root, int relid, int ojrelid,
         */
 }
 
+/*
+ * Remove any references to relid or ojrelid from the RestrictInfo.
+ *
+ * We only bother to clean out bits in clause_relids and required_relids,
+ * not nullingrel bits in contained Vars and PHVs.  (This might have to be
+ * improved sometime.)  However, if the RestrictInfo contains an OR clause
+ * we have to also clean up the sub-clauses.
+ */
+static void
+remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid)
+{
+       /*
+        * The clause_relids probably aren't shared with anything else, but let's
+        * copy them just to be sure.
+        */
+       rinfo->clause_relids = bms_copy(rinfo->clause_relids);
+       rinfo->clause_relids = bms_del_member(rinfo->clause_relids, relid);
+       rinfo->clause_relids = bms_del_member(rinfo->clause_relids, ojrelid);
+       /* Likewise for required_relids */
+       rinfo->required_relids = bms_copy(rinfo->required_relids);
+       rinfo->required_relids = bms_del_member(rinfo->required_relids, relid);
+       rinfo->required_relids = bms_del_member(rinfo->required_relids, ojrelid);
+
+       /* If it's an OR, recurse to clean up sub-clauses */
+       if (restriction_is_or_clause(rinfo))
+       {
+               ListCell   *lc;
+
+               Assert(is_orclause(rinfo->orclause));
+               foreach(lc, ((BoolExpr *) rinfo->orclause)->args)
+               {
+                       Node       *orarg = (Node *) lfirst(lc);
+
+                       /* OR arguments should be ANDs or sub-RestrictInfos */
+                       if (is_andclause(orarg))
+                       {
+                               List       *andargs = ((BoolExpr *) orarg)->args;
+                               ListCell   *lc2;
+
+                               foreach(lc2, andargs)
+                               {
+                                       RestrictInfo *rinfo2 = lfirst_node(RestrictInfo, lc2);
+
+                                       remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
+                               }
+                       }
+                       else
+                       {
+                               RestrictInfo *rinfo2 = castNode(RestrictInfo, orarg);
+
+                               remove_rel_from_restrictinfo(rinfo2, relid, ojrelid);
+                       }
+               }
+       }
+}
+
 /*
  * Remove any occurrences of the target relid from a joinlist structure.
  *
index 98d611412dfb562205d1fb26cf845bc0931a49a3..2f1f8b8dbe6730e1a1255cbedd0d1afc19a084c0 100644 (file)
@@ -5344,6 +5344,26 @@ SELECT q2 FROM
          One-Time Filter: false
 (9 rows)
 
+-- join removal bug #17786: check that OR conditions are cleaned up
+EXPLAIN (COSTS OFF)
+SELECT f1, x
+FROM int4_tbl
+     JOIN ((SELECT 42 AS x FROM int8_tbl LEFT JOIN innertab ON q1 = id) AS ss1
+           RIGHT JOIN tenk1 ON NULL)
+        ON tenk1.unique1 = ss1.x OR tenk1.unique2 = ss1.x;
+                                QUERY PLAN                                
+--------------------------------------------------------------------------
+ Nested Loop
+   ->  Seq Scan on int4_tbl
+   ->  Materialize
+         ->  Nested Loop Left Join
+               Join Filter: NULL::boolean
+               Filter: ((tenk1.unique1 = (42)) OR (tenk1.unique2 = (42)))
+               ->  Seq Scan on tenk1
+               ->  Result
+                     One-Time Filter: false
+(9 rows)
+
 rollback;
 -- another join removal bug: we must clean up correctly when removing a PHV
 begin;
index e50a769606938719791eeaf4e6aaaf416bc1b4bd..400c16958f31465576e5004f9604af852ec120e4 100644 (file)
@@ -1955,6 +1955,14 @@ SELECT q2 FROM
   RIGHT JOIN int4_tbl ON NULL
  WHERE x >= x;
 
+-- join removal bug #17786: check that OR conditions are cleaned up
+EXPLAIN (COSTS OFF)
+SELECT f1, x
+FROM int4_tbl
+     JOIN ((SELECT 42 AS x FROM int8_tbl LEFT JOIN innertab ON q1 = id) AS ss1
+           RIGHT JOIN tenk1 ON NULL)
+        ON tenk1.unique1 = ss1.x OR tenk1.unique2 = ss1.x;
+
 rollback;
 
 -- another join removal bug: we must clean up correctly when removing a PHV