Fix incorrect matching of subexpressions in outer-join plan nodes.

author Tom Lane <tgl@sss.pgh.pa.us>

Sat, 4 Apr 2015 23:55:15 +0000 (19:55 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sat, 4 Apr 2015 23:55:15 +0000 (19:55 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Sat, 4 Apr 2015 23:55:15 +0000 (19:55 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sat, 4 Apr 2015 23:55:15 +0000 (19:55 -0400)
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c

index ec828cdd9fb808a254d9255afc7d5049376e406a..94b12ab8ca1132867cd3a1b3f830eb52942c2d22 100644 (file)
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -1283,19 +1283,13 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset)
     outer_itlist = build_tlist_index(outer_plan->targetlist);
     inner_itlist = build_tlist_index(inner_plan->targetlist);
  
-   /* All join plans have tlist, qual, and joinqual */
-   join->plan.targetlist = fix_join_expr(root,
-                                         join->plan.targetlist,
-                                         outer_itlist,
-                                         inner_itlist,
-                                         (Index) 0,
-                                         rtoffset);
-   join->plan.qual = fix_join_expr(root,
-                                   join->plan.qual,
-                                   outer_itlist,
-                                   inner_itlist,
-                                   (Index) 0,
-                                   rtoffset);
+   /*
+    * First process the joinquals (including merge or hash clauses).  These
+    * are logically below the join so they can always use all values
+    * available from the input tlists.  It's okay to also handle
+    * NestLoopParams now, because those couldn't refer to nullable
+    * subexpressions.
+    */
     join->joinqual = fix_join_expr(root,
                                    join->joinqual,
                                    outer_itlist,
@@ -1347,6 +1341,49 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset)
                                         rtoffset);
     }
  
+   /*
+    * Now we need to fix up the targetlist and qpqual, which are logically
+    * above the join.  This means they should not re-use any input expression
+    * that was computed in the nullable side of an outer join.  Vars and
+    * PlaceHolderVars are fine, so we can implement this restriction just by
+    * clearing has_non_vars in the indexed_tlist structs.
+    *
+    * XXX This is a grotty workaround for the fact that we don't clearly
+    * distinguish between a Var appearing below an outer join and the "same"
+    * Var appearing above it.  If we did, we'd not need to hack the matching
+    * rules this way.
+    */
+   switch (join->jointype)
+   {
+       case JOIN_LEFT:
+       case JOIN_SEMI:
+       case JOIN_ANTI:
+           inner_itlist->has_non_vars = false;
+           break;
+       case JOIN_RIGHT:
+           outer_itlist->has_non_vars = false;
+           break;
+       case JOIN_FULL:
+           outer_itlist->has_non_vars = false;
+           inner_itlist->has_non_vars = false;
+           break;
+       default:
+           break;
+   }
+
+   join->plan.targetlist = fix_join_expr(root,
+                                         join->plan.targetlist,
+                                         outer_itlist,
+                                         inner_itlist,
+                                         (Index) 0,
+                                         rtoffset);
+   join->plan.qual = fix_join_expr(root,
+                                   join->plan.qual,
+                                   outer_itlist,
+                                   inner_itlist,
+                                   (Index) 0,
+                                   rtoffset);
+
     pfree(outer_itlist);
     pfree(inner_itlist);
  }
@@ -1625,7 +1662,9 @@ search_indexed_tlist_for_var(Var *var, indexed_tlist *itlist,
   * If no match, return NULL.
   *
   * NOTE: it is a waste of time to call this unless itlist->has_ph_vars or
- * itlist->has_non_vars
+ * itlist->has_non_vars.  Furthermore, set_join_references() relies on being
+ * able to prevent matching of non-Vars by clearing itlist->has_non_vars,
+ * so there's a correctness reason not to call it unless that's set.
   */
  static Var *
  search_indexed_tlist_for_non_var(Node *node,
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out

index 046b0853012cce328b7957b9bca80388d151215c..e4f3f22e0b99d4832baf9d6b4e8ef7ac64bbdaac 100644 (file)
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3176,6 +3176,54 @@ explain (costs off)
           Index Cond: (unique2 = 42)
  (6 rows)
  
+--
+-- test that quals attached to an outer join have correct semantics,
+-- specifically that they don't re-use expressions computed below the join;
+-- we force a mergejoin so that coalesce(b.q1, 1) appears as a join input
+--
+set enable_hashjoin to off;
+set enable_nestloop to off;
+explain (verbose, costs off)
+  select a.q2, b.q1
+    from int8_tbl a left join int8_tbl b on a.q2 = coalesce(b.q1, 1)
+    where coalesce(b.q1, 1) > 0;
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Merge Left Join
+   Output: a.q2, b.q1
+   Merge Cond: (a.q2 = (COALESCE(b.q1, '1'::bigint)))
+   Filter: (COALESCE(b.q1, '1'::bigint) > 0)
+   ->  Sort
+         Output: a.q2
+         Sort Key: a.q2
+         ->  Seq Scan on public.int8_tbl a
+               Output: a.q2
+   ->  Sort
+         Output: b.q1, (COALESCE(b.q1, '1'::bigint))
+         Sort Key: (COALESCE(b.q1, '1'::bigint))
+         ->  Seq Scan on public.int8_tbl b
+               Output: b.q1, COALESCE(b.q1, '1'::bigint)
+(14 rows)
+
+select a.q2, b.q1
+  from int8_tbl a left join int8_tbl b on a.q2 = coalesce(b.q1, 1)
+  where coalesce(b.q1, 1) > 0;
+        q2         |        q1        
+-------------------+------------------
+ -4567890123456789 |                 
+               123 |              123
+               123 |              123
+               456 |                 
+  4567890123456789 | 4567890123456789
+  4567890123456789 | 4567890123456789
+  4567890123456789 | 4567890123456789
+  4567890123456789 | 4567890123456789
+  4567890123456789 | 4567890123456789
+  4567890123456789 | 4567890123456789
+(10 rows)
+
+reset enable_hashjoin;
+reset enable_nestloop;
  --
  -- test join removal
  --
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql

index 06a27ea151da3136cb89f5467dba12fe6c43ca12..d0cf0a041d60c59a0742819361808586eb896696 100644 (file)
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -927,6 +927,26 @@ explain (costs off)
  explain (costs off)
    select * from tenk1 a full join tenk1 b using(unique2) where unique2 = 42;
  
+--
+-- test that quals attached to an outer join have correct semantics,
+-- specifically that they don't re-use expressions computed below the join;
+-- we force a mergejoin so that coalesce(b.q1, 1) appears as a join input
+--
+
+set enable_hashjoin to off;
+set enable_nestloop to off;
+
+explain (verbose, costs off)
+  select a.q2, b.q1
+    from int8_tbl a left join int8_tbl b on a.q2 = coalesce(b.q1, 1)
+    where coalesce(b.q1, 1) > 0;
+select a.q2, b.q1
+  from int8_tbl a left join int8_tbl b on a.q2 = coalesce(b.q1, 1)
+  where coalesce(b.q1, 1) > 0;
+
+reset enable_hashjoin;
+reset enable_nestloop;
+
  --
  -- test join removal
  --
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 4 Apr 2015 23:55:15 +0000 (19:55 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 4 Apr 2015 23:55:15 +0000 (19:55 -0400)
src/backend/optimizer/plan/setrefs.c		patch \| blob \| blame \| history
src/test/regress/expected/join.out		patch \| blob \| blame \| history
src/test/regress/sql/join.sql		patch \| blob \| blame \| history