Reduce "X = X" to "X IS NOT NULL", if it's easy to do so.
authorTom Lane <tgl@sss.pgh.pa.us>
Sun, 8 Oct 2017 16:23:32 +0000 (12:23 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Sun, 8 Oct 2017 16:23:32 +0000 (12:23 -0400)
If the operator is a strict btree equality operator, and X isn't volatile,
then the clause must yield true for any non-null value of X, or null if X
is null.  At top level of a WHERE clause, we can ignore the distinction
between false and null results, so it's valid to simplify the clause to
"X IS NOT NULL".  This is a useful improvement mainly because we'll get
a far better selectivity estimate in most cases.

Because such cases seldom arise in well-written queries, it is unappetizing
to expend a lot of planner cycles looking for them ... but it turns out
that there's a place we can shoehorn this in practically for free, because
equivclass.c already has to detect and reject candidate equivalences of the
form X = X.  That doesn't catch every place that it would be valid to
simplify to X IS NOT NULL, but it catches the typical case.  Working harder
doesn't seem justified.

Patch by me, reviewed by Petr Jelinek

Discussion: https://postgr.es/m/CAMjNa7cC4X9YR-vAJS-jSYCajhRDvJQnN7m2sLH1wLh-_Z2bsw@mail.gmail.com

src/backend/optimizer/path/equivclass.c
src/backend/optimizer/plan/initsplan.c
src/include/optimizer/paths.h
src/test/regress/expected/equivclass.out
src/test/regress/sql/equivclass.sql

index 7997f50c18d6fbf4d9db40b5dc50b49413ec1601..a225414c97017aeded3b07bd6b0da6babb1009eb 100644 (file)
@@ -27,6 +27,7 @@
 #include "optimizer/paths.h"
 #include "optimizer/planmain.h"
 #include "optimizer/prep.h"
+#include "optimizer/restrictinfo.h"
 #include "optimizer/var.h"
 #include "utils/lsyscache.h"
 
@@ -71,8 +72,14 @@ static bool reconsider_full_join_clause(PlannerInfo *root,
  *   any delay by an outer join, so its two sides can be considered equal
  *   anywhere they are both computable; moreover that equality can be
  *   extended transitively.  Record this knowledge in the EquivalenceClass
- *   data structure.  Returns TRUE if successful, FALSE if not (in which
- *   case caller should treat the clause as ordinary, not an equivalence).
+ *   data structure, if applicable.  Returns TRUE if successful, FALSE if not
+ *   (in which case caller should treat the clause as ordinary, not an
+ *   equivalence).
+ *
+ * In some cases, although we cannot convert a clause into EquivalenceClass
+ * knowledge, we can still modify it to a more useful form than the original.
+ * Then, *p_restrictinfo will be replaced by a new RestrictInfo, which is what
+ * the caller should use for further processing.
  *
  * If below_outer_join is true, then the clause was found below the nullable
  * side of an outer join, so its sides might validly be both NULL rather than
@@ -104,9 +111,11 @@ static bool reconsider_full_join_clause(PlannerInfo *root,
  * memory context.
  */
 bool
-process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo,
+process_equivalence(PlannerInfo *root,
+                   RestrictInfo **p_restrictinfo,
                    bool below_outer_join)
 {
+   RestrictInfo *restrictinfo = *p_restrictinfo;
    Expr       *clause = restrictinfo->clause;
    Oid         opno,
                collation,
@@ -154,16 +163,45 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo,
                                       collation);
 
    /*
-    * Reject clauses of the form X=X.  These are not as redundant as they
-    * might seem at first glance: assuming the operator is strict, this is
-    * really an expensive way to write X IS NOT NULL.  So we must not risk
-    * just losing the clause, which would be possible if there is already a
-    * single-element EquivalenceClass containing X.  The case is not common
-    * enough to be worth contorting the EC machinery for, so just reject the
-    * clause and let it be processed as a normal restriction clause.
+    * Clauses of the form X=X cannot be translated into EquivalenceClasses.
+    * We'd either end up with a single-entry EC, losing the knowledge that
+    * the clause was present at all, or else make an EC with duplicate
+    * entries, causing other issues.
     */
    if (equal(item1, item2))
-       return false;           /* X=X is not a useful equivalence */
+   {
+       /*
+        * If the operator is strict, then the clause can be treated as just
+        * "X IS NOT NULL".  (Since we know we are considering a top-level
+        * qual, we can ignore the difference between FALSE and NULL results.)
+        * It's worth making the conversion because we'll typically get a much
+        * better selectivity estimate than we would for X=X.
+        *
+        * If the operator is not strict, we can't be sure what it will do
+        * with NULLs, so don't attempt to optimize it.
+        */
+       set_opfuncid((OpExpr *) clause);
+       if (func_strict(((OpExpr *) clause)->opfuncid))
+       {
+           NullTest   *ntest = makeNode(NullTest);
+
+           ntest->arg = item1;
+           ntest->nulltesttype = IS_NOT_NULL;
+           ntest->argisrow = false;    /* correct even if composite arg */
+           ntest->location = -1;
+
+           *p_restrictinfo =
+               make_restrictinfo((Expr *) ntest,
+                                 restrictinfo->is_pushed_down,
+                                 restrictinfo->outerjoin_delayed,
+                                 restrictinfo->pseudoconstant,
+                                 restrictinfo->security_level,
+                                 NULL,
+                                 restrictinfo->outer_relids,
+                                 restrictinfo->nullable_relids);
+       }
+       return false;
+   }
 
    /*
     * If below outer join, check for strictness, else reject.
@@ -1741,7 +1779,7 @@ reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo,
                                                   bms_copy(inner_relids),
                                                   bms_copy(inner_nullable_relids),
                                                   cur_ec->ec_min_security);
-           if (process_equivalence(root, newrinfo, true))
+           if (process_equivalence(root, &newrinfo, true))
                match = true;
        }
 
@@ -1884,7 +1922,7 @@ reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo)
                                                       bms_copy(left_relids),
                                                       bms_copy(left_nullable_relids),
                                                       cur_ec->ec_min_security);
-               if (process_equivalence(root, newrinfo, true))
+               if (process_equivalence(root, &newrinfo, true))
                    matchleft = true;
            }
            eq_op = select_equality_operator(cur_ec,
@@ -1899,7 +1937,7 @@ reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo)
                                                       bms_copy(right_relids),
                                                       bms_copy(right_nullable_relids),
                                                       cur_ec->ec_min_security);
-               if (process_equivalence(root, newrinfo, true))
+               if (process_equivalence(root, &newrinfo, true))
                    matchright = true;
            }
        }
index 9931dddba43a6675d39ad5a621335166cd8a779e..974eb58d83751bc2907556555e42be01710f40cb 100644 (file)
@@ -1964,10 +1964,11 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
        if (maybe_equivalence)
        {
            if (check_equivalence_delay(root, restrictinfo) &&
-               process_equivalence(root, restrictinfo, below_outer_join))
+               process_equivalence(root, &restrictinfo, below_outer_join))
                return;
            /* EC rejected it, so set left_ec/right_ec the hard way ... */
-           initialize_mergeclause_eclasses(root, restrictinfo);
+           if (restrictinfo->mergeopfamilies)  /* EC might have changed this */
+               initialize_mergeclause_eclasses(root, restrictinfo);
            /* ... and fall through to distribute_restrictinfo_to_rels */
        }
        else if (maybe_outer_join && restrictinfo->can_join)
index a15eee54bb8a1e655817df1e27a1ae9fbc40cf09..ea886b6501bfa6a503b7ebf6f3d78a7d2c0ec54a 100644 (file)
@@ -127,7 +127,8 @@ typedef bool (*ec_matches_callback_type) (PlannerInfo *root,
                                          EquivalenceMember *em,
                                          void *arg);
 
-extern bool process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo,
+extern bool process_equivalence(PlannerInfo *root,
+                   RestrictInfo **p_restrictinfo,
                    bool below_outer_join);
 extern Expr *canonicalize_ec_expression(Expr *expr,
                           Oid req_type, Oid req_collation);
index a96b2a1b07c36bfa0c9888c9e30631134fe68637..c448d85dec37ef1ce5b66f6903833f79ffaba934 100644 (file)
@@ -421,3 +421,21 @@ reset session authorization;
 revoke select on ec0 from regress_user_ectest;
 revoke select on ec1 from regress_user_ectest;
 drop user regress_user_ectest;
+-- check that X=X is converted to X IS NOT NULL when appropriate
+explain (costs off)
+  select * from tenk1 where unique1 = unique1 and unique2 = unique2;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Seq Scan on tenk1
+   Filter: ((unique1 IS NOT NULL) AND (unique2 IS NOT NULL))
+(2 rows)
+
+-- this could be converted, but isn't at present
+explain (costs off)
+  select * from tenk1 where unique1 = unique1 or unique2 = unique2;
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Seq Scan on tenk1
+   Filter: ((unique1 = unique1) OR (unique2 = unique2))
+(2 rows)
+
index 0e4aa0cd2c55c69662bcf6c9ec9beedfd04c6108..85aa65de392e46898388515209d249a45419b702 100644 (file)
@@ -254,3 +254,11 @@ revoke select on ec0 from regress_user_ectest;
 revoke select on ec1 from regress_user_ectest;
 
 drop user regress_user_ectest;
+
+-- check that X=X is converted to X IS NOT NULL when appropriate
+explain (costs off)
+  select * from tenk1 where unique1 = unique1 and unique2 = unique2;
+
+-- this could be converted, but isn't at present
+explain (costs off)
+  select * from tenk1 where unique1 = unique1 or unique2 = unique2;