Fix cost_nestloop and cost_hashjoin to model the behavior of semi and anti

author Tom Lane <tgl@sss.pgh.pa.us>

Sat, 9 May 2009 22:51:41 +0000 (22:51 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sat, 9 May 2009 22:51:41 +0000 (22:51 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Sat, 9 May 2009 22:51:41 +0000 (22:51 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sat, 9 May 2009 22:51:41 +0000 (22:51 +0000)
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index 119997f3ff4d402a25ae488467b30afe609eb7df..eca0f80b8c1a1eff78adbf38a934acca8639c264 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -54,7 +54,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.207 2009/04/17 15:33:33 tgl Exp $
+ *   $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.208 2009/05/09 22:51:41 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -71,6 +71,7 @@
  #include "optimizer/pathnode.h"
  #include "optimizer/placeholder.h"
  #include "optimizer/planmain.h"
+#include "optimizer/restrictinfo.h"
  #include "parser/parsetree.h"
  #include "utils/lsyscache.h"
  #include "utils/selfuncs.h"
@@ -119,6 +120,11 @@ static MergeScanSelCache *cached_scansel(PlannerInfo *root,
                RestrictInfo *rinfo,
                PathKey *pathkey);
  static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context);
+static bool adjust_semi_join(PlannerInfo *root, JoinPath *path,
+                SpecialJoinInfo *sjinfo,
+                Selectivity *outer_match_frac,
+                Selectivity *match_count,
+                bool *indexed_join_quals);
  static double approx_tuple_count(PlannerInfo *root, JoinPath *path,
                                  List *quals);
  static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
@@ -1394,11 +1400,15 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
     Path       *inner_path = path->innerjoinpath;
     Cost        startup_cost = 0;
     Cost        run_cost = 0;
+   Cost        inner_run_cost;
     Cost        cpu_per_tuple;
     QualCost    restrict_qual_cost;
     double      outer_path_rows = PATH_ROWS(outer_path);
     double      inner_path_rows = nestloop_inner_path_rows(inner_path);
     double      ntuples;
+   Selectivity outer_match_frac;
+   Selectivity match_count;
+   bool        indexed_join_quals;
  
     if (!enable_nestloop)
         startup_cost += disable_cost;
@@ -1428,13 +1438,66 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
          */
         run_cost += (outer_path_rows - 1) * inner_path->startup_cost;
     }
-   run_cost += outer_path_rows *
-       (inner_path->total_cost - inner_path->startup_cost);
+   inner_run_cost = inner_path->total_cost - inner_path->startup_cost;
  
-   /*
-    * Compute number of tuples processed (not number emitted!)
-    */
-   ntuples = outer_path_rows * inner_path_rows;
+   if (adjust_semi_join(root, path, sjinfo,
+                        &outer_match_frac,
+                        &match_count,
+                        &indexed_join_quals))
+   {
+       double      outer_matched_rows;
+       Selectivity inner_scan_frac;
+
+       /*
+        * SEMI or ANTI join: executor will stop after first match.
+        *
+        * For an outer-rel row that has at least one match, we can expect the
+        * inner scan to stop after a fraction 1/(match_count+1) of the inner
+        * rows, if the matches are evenly distributed.  Since they probably
+        * aren't quite evenly distributed, we apply a fuzz factor of 2.0 to
+        * that fraction.  (If we used a larger fuzz factor, we'd have to
+        * clamp inner_scan_frac to at most 1.0; but since match_count is at
+        * least 1, no such clamp is needed now.)
+        */
+       outer_matched_rows = rint(outer_path_rows * outer_match_frac);
+       inner_scan_frac = 2.0 / (match_count + 1.0);
+
+       /* Add inner run cost for outer tuples having matches */
+       run_cost += outer_matched_rows * inner_run_cost * inner_scan_frac;
+
+       /* Compute number of tuples processed (not number emitted!) */
+       ntuples = outer_matched_rows * inner_path_rows * inner_scan_frac;
+
+       /*
+        * For unmatched outer-rel rows, there are two cases.  If the inner
+        * path is an indexscan using all the joinquals as indexquals, then
+        * an unmatched row results in an indexscan returning no rows, which
+        * is probably quite cheap.  We estimate this case as the same cost
+        * to return the first tuple of a nonempty scan.  Otherwise, the
+        * executor will have to scan the whole inner rel; not so cheap.
+        */
+       if (indexed_join_quals)
+       {
+           run_cost += (outer_path_rows - outer_matched_rows) *
+               inner_run_cost / inner_path_rows;
+           /* We won't be evaluating any quals at all for these rows */
+       }
+       else
+       {
+           run_cost += (outer_path_rows - outer_matched_rows) *
+               inner_run_cost;
+           ntuples += (outer_path_rows - outer_matched_rows) *
+               inner_path_rows;
+       }
+   }
+   else
+   {
+       /* Normal case; we'll scan whole input rel for each outer row */
+       run_cost += outer_path_rows * inner_run_cost;
+
+       /* Compute number of tuples processed (not number emitted!) */
+       ntuples = outer_path_rows * inner_path_rows;
+   }
  
     /* CPU costs */
     cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo, root);
@@ -1731,6 +1794,9 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
      * cpu_tuple_cost plus the cost of evaluating additional restriction
      * clauses that are to be applied at the join.  (This is pessimistic since
      * not all of the quals may get evaluated at each tuple.)
+    *
+    * Note: we could adjust for SEMI/ANTI joins skipping some qual evaluations
+    * here, but it's probably not worth the trouble.
      */
     startup_cost += qp_qual_cost.startup;
     cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
@@ -1824,6 +1890,8 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
     int         num_skew_mcvs;
     double      virtualbuckets;
     Selectivity innerbucketsize;
+   Selectivity outer_match_frac;
+   Selectivity match_count;
     ListCell   *hcl;
  
     if (!enable_hashjoin)
@@ -1838,12 +1906,6 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
     qp_qual_cost.startup -= hash_qual_cost.startup;
     qp_qual_cost.per_tuple -= hash_qual_cost.per_tuple;
  
-   /*
-    * Get approx # tuples passing the hashquals.  We use approx_tuple_count
-    * here because we need an estimate done with JOIN_INNER semantics.
-    */
-   hashjointuples = approx_tuple_count(root, &path->jpath, hashclauses);
-
     /* cost of source data */
     startup_cost += outer_path->startup_cost;
     run_cost += outer_path->total_cost - outer_path->startup_cost;
@@ -1970,18 +2032,78 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
  
     /* CPU costs */
  
-   /*
-    * The number of tuple comparisons needed is the number of outer tuples
-    * times the typical number of tuples in a hash bucket, which is the inner
-    * relation size times its bucketsize fraction.  At each one, we need to
-    * evaluate the hashjoin quals.  But actually, charging the full qual eval
-    * cost at each tuple is pessimistic, since we don't evaluate the quals
-    * unless the hash values match exactly.  For lack of a better idea, halve
-    * the cost estimate to allow for that.
-    */
-   startup_cost += hash_qual_cost.startup;
-   run_cost += hash_qual_cost.per_tuple *
-       outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) * 0.5;
+   if (adjust_semi_join(root, &path->jpath, sjinfo,
+                        &outer_match_frac,
+                        &match_count,
+                        NULL))
+   {
+       double      outer_matched_rows;
+       Selectivity inner_scan_frac;
+
+       /*
+        * SEMI or ANTI join: executor will stop after first match.
+        *
+        * For an outer-rel row that has at least one match, we can expect the
+        * bucket scan to stop after a fraction 1/(match_count+1) of the
+        * bucket's rows, if the matches are evenly distributed.  Since they
+        * probably aren't quite evenly distributed, we apply a fuzz factor of
+        * 2.0 to that fraction.  (If we used a larger fuzz factor, we'd have
+        * to clamp inner_scan_frac to at most 1.0; but since match_count is
+        * at least 1, no such clamp is needed now.)
+        */
+       outer_matched_rows = rint(outer_path_rows * outer_match_frac);
+       inner_scan_frac = 2.0 / (match_count + 1.0);
+
+       startup_cost += hash_qual_cost.startup;
+       run_cost += hash_qual_cost.per_tuple * outer_matched_rows *
+           clamp_row_est(inner_path_rows * innerbucketsize * inner_scan_frac) * 0.5;
+
+       /*
+        * For unmatched outer-rel rows, the picture is quite a lot different.
+        * In the first place, there is no reason to assume that these rows
+        * preferentially hit heavily-populated buckets; instead assume they
+        * are uncorrelated with the inner distribution and so they see an
+        * average bucket size of inner_path_rows / virtualbuckets.  In the
+        * second place, it seems likely that they will have few if any
+        * exact hash-code matches and so very few of the tuples in the
+        * bucket will actually require eval of the hash quals.  We don't
+        * have any good way to estimate how many will, but for the moment
+        * assume that the effective cost per bucket entry is one-tenth what
+        * it is for matchable tuples.
+        */
+       run_cost += hash_qual_cost.per_tuple *
+           (outer_path_rows - outer_matched_rows) *
+           clamp_row_est(inner_path_rows / virtualbuckets) * 0.05;
+
+       /* Get # of tuples that will pass the basic join */
+       if (path->jpath.jointype == JOIN_SEMI)
+           hashjointuples = outer_matched_rows;
+       else
+           hashjointuples = outer_path_rows - outer_matched_rows;
+   }
+   else
+   {
+       /*
+        * The number of tuple comparisons needed is the number of outer
+        * tuples times the typical number of tuples in a hash bucket, which
+        * is the inner relation size times its bucketsize fraction.  At each
+        * one, we need to evaluate the hashjoin quals.  But actually,
+        * charging the full qual eval cost at each tuple is pessimistic,
+        * since we don't evaluate the quals unless the hash values match
+        * exactly.  For lack of a better idea, halve the cost estimate to
+        * allow for that.
+        */
+       startup_cost += hash_qual_cost.startup;
+       run_cost += hash_qual_cost.per_tuple * outer_path_rows *
+           clamp_row_est(inner_path_rows * innerbucketsize) * 0.5;
+
+       /*
+        * Get approx # tuples passing the hashquals.  We use
+        * approx_tuple_count here because we need an estimate done with
+        * JOIN_INNER semantics.
+        */
+       hashjointuples = approx_tuple_count(root, &path->jpath, hashclauses);
+   }
  
     /*
      * For each tuple that gets through the hashjoin proper, we charge
@@ -2320,6 +2442,156 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
  }
  
  
+/*
+ * adjust_semi_join
+ *   Estimate how much of the inner input a SEMI or ANTI join
+ *   can be expected to scan.
+ *
+ * In a hash or nestloop SEMI/ANTI join, the executor will stop scanning
+ * inner rows as soon as it finds a match to the current outer row.
+ * We should therefore adjust some of the cost components for this effect.
+ * This function computes some estimates needed for these adjustments.
+ *
+ * 'path' is already filled in except for the cost fields
+ * 'sjinfo' is extra info about the join for selectivity estimation
+ *
+ * Returns TRUE if this is a SEMI or ANTI join, FALSE if not.
+ *
+ * Output parameters (set only in TRUE-result case):
+ * *outer_match_frac is set to the fraction of the outer tuples that are
+ *     expected to have at least one match.
+ * *match_count is set to the average number of matches expected for
+ *     outer tuples that have at least one match.
+ * *indexed_join_quals is set to TRUE if all the joinquals are used as
+ *     inner index quals, FALSE if not.
+ *
+ * indexed_join_quals can be passed as NULL if that information is not
+ * relevant (it is only useful for the nestloop case).
+ */
+static bool
+adjust_semi_join(PlannerInfo *root, JoinPath *path, SpecialJoinInfo *sjinfo,
+                Selectivity *outer_match_frac,
+                Selectivity *match_count,
+                bool *indexed_join_quals)
+{
+   JoinType    jointype = path->jointype;
+   Selectivity jselec;
+   Selectivity nselec;
+   Selectivity avgmatch;
+   SpecialJoinInfo norm_sjinfo;
+   List       *joinquals;
+   ListCell   *l;
+
+   /* Fall out if it's not JOIN_SEMI or JOIN_ANTI */
+   if (jointype != JOIN_SEMI && jointype != JOIN_ANTI)
+       return false;
+
+   /*
+    * Note: it's annoying to repeat this selectivity estimation on each call,
+    * when the joinclause list will be the same for all path pairs
+    * implementing a given join.  clausesel.c will save us from the worst
+    * effects of this by caching at the RestrictInfo level; but perhaps it'd
+    * be worth finding a way to cache the results at a higher level.
+    */
+
+   /*
+    * In an ANTI join, we must ignore clauses that are "pushed down",
+    * since those won't affect the match logic.  In a SEMI join, we do not
+    * distinguish joinquals from "pushed down" quals, so just use the whole
+    * restrictinfo list.
+    */
+   if (jointype == JOIN_ANTI)
+   {
+       joinquals = NIL;
+       foreach(l, path->joinrestrictinfo)
+       {
+           RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
+
+           Assert(IsA(rinfo, RestrictInfo));
+           if (!rinfo->is_pushed_down)
+               joinquals = lappend(joinquals, rinfo);
+       }
+   }
+   else
+       joinquals = path->joinrestrictinfo;
+
+   /*
+    * Get the JOIN_SEMI or JOIN_ANTI selectivity of the join clauses.
+    */
+   jselec = clauselist_selectivity(root,
+                                   joinquals,
+                                   0,
+                                   jointype,
+                                   sjinfo);
+
+   /*
+    * Also get the normal inner-join selectivity of the join clauses.
+    */
+   norm_sjinfo.type = T_SpecialJoinInfo;
+   norm_sjinfo.min_lefthand = path->outerjoinpath->parent->relids;
+   norm_sjinfo.min_righthand = path->innerjoinpath->parent->relids;
+   norm_sjinfo.syn_lefthand = path->outerjoinpath->parent->relids;
+   norm_sjinfo.syn_righthand = path->innerjoinpath->parent->relids;
+   norm_sjinfo.jointype = JOIN_INNER;
+   /* we don't bother trying to make the remaining fields valid */
+   norm_sjinfo.lhs_strict = false;
+   norm_sjinfo.delay_upper_joins = false;
+   norm_sjinfo.join_quals = NIL;
+
+   nselec = clauselist_selectivity(root,
+                                   joinquals,
+                                   0,
+                                   JOIN_INNER,
+                                   &norm_sjinfo);
+
+   /* Avoid leaking a lot of ListCells */
+   if (jointype == JOIN_ANTI)
+       list_free(joinquals);
+
+   /*
+    * jselec can be interpreted as the fraction of outer-rel rows that have
+    * any matches (this is true for both SEMI and ANTI cases).  And nselec
+    * is the fraction of the Cartesian product that matches.  So, the
+    * average number of matches for each outer-rel row that has at least
+    * one match is nselec * inner_rows / jselec.
+    *
+    * Note: it is correct to use the inner rel's "rows" count here, not
+    * PATH_ROWS(), even if the inner path under consideration is an inner
+    * indexscan.  This is because we have included all the join clauses
+    * in the selectivity estimate, even ones used in an inner indexscan.
+    */
+   if (jselec > 0)             /* protect against zero divide */
+   {
+       avgmatch = nselec * path->innerjoinpath->parent->rows / jselec;
+       /* Clamp to sane range */
+       avgmatch = Max(1.0, avgmatch);
+   }
+   else
+       avgmatch = 1.0;
+
+   *outer_match_frac = jselec;
+   *match_count = avgmatch;
+
+   /*
+    * If requested, check whether the inner path uses all the joinquals
+    * as indexquals.  (If that's true, we can assume that an unmatched
+    * outer tuple is cheap to process, whereas otherwise it's probably
+    * expensive.)
+    */
+   if (indexed_join_quals)
+   {
+       List       *nrclauses;
+
+       nrclauses = select_nonredundant_join_clauses(root,
+                                                    path->joinrestrictinfo,
+                                                    path->innerjoinpath);
+       *indexed_join_quals = (nrclauses == NIL);
+   }
+
+   return true;
+}
+
+
  /*
   * approx_tuple_count
   *     Quick-and-dirty estimation of the number of join rows passing
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index 1a957ac39698edfe794d143f564016f7c839cbe6..2aabd880aaf5ed577d605ff4991147202118a78c 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
   *
   *
   * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.258 2009/04/19 19:46:33 tgl Exp $
+ *   $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.259 2009/05/09 22:51:41 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1562,62 +1562,16 @@ create_nestloop_plan(PlannerInfo *root,
     List       *otherclauses;
     NestLoop   *join_plan;
  
-   if (IsA(best_path->innerjoinpath, IndexPath))
-   {
-       /*
-        * An index is being used to reduce the number of tuples scanned in
-        * the inner relation.  If there are join clauses being used with the
-        * index, we may remove those join clauses from the list of clauses
-        * that have to be checked as qpquals at the join node.
-        *
-        * We can also remove any join clauses that are redundant with those
-        * being used in the index scan; this check is needed because
-        * find_eclass_clauses_for_index_join() may emit different clauses
-        * than generate_join_implied_equalities() did.
-        *
-        * We can skip this if the index path is an ordinary indexpath and not
-        * a special innerjoin path, since it then wouldn't be using any join
-        * clauses.
-        */
-       IndexPath  *innerpath = (IndexPath *) best_path->innerjoinpath;
-
-       if (innerpath->isjoininner)
-           joinrestrictclauses =
-               select_nonredundant_join_clauses(root,
-                                                joinrestrictclauses,
-                                                innerpath->indexclauses);
-   }
-   else if (IsA(best_path->innerjoinpath, BitmapHeapPath))
-   {
-       /*
-        * Same deal for bitmapped index scans.
-        *
-        * Note: both here and above, we ignore any implicit index
-        * restrictions associated with the use of partial indexes.  This is
-        * OK because we're only trying to prove we can dispense with some
-        * join quals; failing to prove that doesn't result in an incorrect
-        * plan.  It is the right way to proceed because adding more quals to
-        * the stuff we got from the original query would just make it harder
-        * to detect duplication.  (Also, to change this we'd have to be wary
-        * of UPDATE/DELETE/SELECT FOR UPDATE target relations; see notes
-        * above about EvalPlanQual.)
-        */
-       BitmapHeapPath *innerpath = (BitmapHeapPath *) best_path->innerjoinpath;
-
-       if (innerpath->isjoininner)
-       {
-           List       *bitmapclauses;
-
-           bitmapclauses =
-               make_restrictinfo_from_bitmapqual(innerpath->bitmapqual,
-                                                 true,
-                                                 false);
-           joinrestrictclauses =
-               select_nonredundant_join_clauses(root,
-                                                joinrestrictclauses,
-                                                bitmapclauses);
-       }
-   }
+   /*
+    * If the inner path is a nestloop inner indexscan, it might be using
+    * some of the join quals as index quals, in which case we don't have
+    * to check them again at the join node.  Remove any join quals that
+    * are redundant.
+    */
+   joinrestrictclauses =
+       select_nonredundant_join_clauses(root,
+                                        joinrestrictclauses,
+                                        best_path->innerjoinpath);
  
     /* Sort join qual clauses into best execution order */
     joinrestrictclauses = order_qual_clauses(root, joinrestrictclauses);
diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c

index 0fcfd0ed509d724253e0e143383cf8f349bd4bcd..5b75d2de3bd80bbc4a9a9ee2b8bc0f88684de789 100644 (file)
--- a/src/backend/optimizer/util/restrictinfo.c
+++ b/src/backend/optimizer/util/restrictinfo.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.58 2009/04/16 20:42:16 tgl Exp $
+ *   $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.59 2009/05/09 22:51:41 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -35,8 +35,9 @@ static Expr *make_sub_restrictinfos(Expr *clause,
                        bool pseudoconstant,
                        Relids required_relids,
                        Relids nullable_relids);
-static bool join_clause_is_redundant(PlannerInfo *root,
-                        RestrictInfo *rinfo,
+static List *select_nonredundant_join_list(List *restrictinfo_list,
+                             List *reference_list);
+static bool join_clause_is_redundant(RestrictInfo *rinfo,
                          List *reference_list);
  
  
@@ -545,26 +546,91 @@ extract_actual_join_clauses(List *restrictinfo_list,
     }
  }
  
+
  /*
   * select_nonredundant_join_clauses
   *
   * Given a list of RestrictInfo clauses that are to be applied in a join,
- * select the ones that are not redundant with any clause in the
- * reference_list. This is used only for nestloop-with-inner-indexscan
- * joins: any clauses being checked by the index should be removed from
- * the qpquals list.
+ * select the ones that are not redundant with any clause that's enforced
+ * by the inner_path.  This is used for nestloop joins, wherein any clause
+ * being used in an inner indexscan need not be checked again at the join.
   *
   * "Redundant" means either equal() or derived from the same EquivalenceClass.
   * We have to check the latter because indxqual.c may select different derived
   * clauses than were selected by generate_join_implied_equalities().
   *
- * Note that we assume the given restrictinfo_list has already been checked
- * for local redundancies, so we don't check again.
+ * Note that we are *not* checking for local redundancies within the given
+ * restrictinfo_list; that should have been handled elsewhere.
   */
  List *
  select_nonredundant_join_clauses(PlannerInfo *root,
                                  List *restrictinfo_list,
-                                List *reference_list)
+                                Path *inner_path)
+{
+   if (IsA(inner_path, IndexPath))
+   {
+       /*
+        * Check the index quals to see if any of them are join clauses.
+        *
+        * We can skip this if the index path is an ordinary indexpath and not
+        * a special innerjoin path, since it then wouldn't be using any join
+        * clauses.
+        */
+       IndexPath  *innerpath = (IndexPath *) inner_path;
+
+       if (innerpath->isjoininner)
+           restrictinfo_list =
+               select_nonredundant_join_list(restrictinfo_list,
+                                             innerpath->indexclauses);
+   }
+   else if (IsA(inner_path, BitmapHeapPath))
+   {
+       /*
+        * Same deal for bitmapped index scans.
+        *
+        * Note: both here and above, we ignore any implicit index
+        * restrictions associated with the use of partial indexes.  This is
+        * OK because we're only trying to prove we can dispense with some
+        * join quals; failing to prove that doesn't result in an incorrect
+        * plan.  It's quite unlikely that a join qual could be proven
+        * redundant by an index predicate anyway.  (Also, if we did manage
+        * to prove it, we'd have to have a special case for update targets;
+        * see notes about EvalPlanQual testing in create_indexscan_plan().)
+        */
+       BitmapHeapPath *innerpath = (BitmapHeapPath *) inner_path;
+
+       if (innerpath->isjoininner)
+       {
+           List       *bitmapclauses;
+
+           bitmapclauses =
+               make_restrictinfo_from_bitmapqual(innerpath->bitmapqual,
+                                                 true,
+                                                 false);
+           restrictinfo_list =
+               select_nonredundant_join_list(restrictinfo_list,
+                                             bitmapclauses);
+       }
+   }
+
+   /*
+    * XXX the inner path of a nestloop could also be an append relation
+    * whose elements use join quals.  However, they might each use different
+    * quals; we could only remove join quals that are enforced by all the
+    * appendrel members.  For the moment we don't bother to try.
+    */
+
+   return restrictinfo_list;
+}
+
+/*
+ * select_nonredundant_join_list
+ *     Select the members of restrictinfo_list that are not redundant with
+ *     any member of reference_list.  See above for more info.
+ */
+static List *
+select_nonredundant_join_list(List *restrictinfo_list,
+                             List *reference_list)
  {
     List       *result = NIL;
     ListCell   *item;
@@ -574,7 +640,7 @@ select_nonredundant_join_clauses(PlannerInfo *root,
         RestrictInfo *rinfo = (RestrictInfo *) lfirst(item);
  
         /* drop it if redundant with any reference clause */
-       if (join_clause_is_redundant(root, rinfo, reference_list))
+       if (join_clause_is_redundant(rinfo, reference_list))
             continue;
  
         /* otherwise, add it to result list */
@@ -589,8 +655,7 @@ select_nonredundant_join_clauses(PlannerInfo *root,
   *     Test whether rinfo is redundant with any clause in reference_list.
   */
  static bool
-join_clause_is_redundant(PlannerInfo *root,
-                        RestrictInfo *rinfo,
+join_clause_is_redundant(RestrictInfo *rinfo,
                          List *reference_list)
  {
     ListCell   *refitem;
diff --git a/src/include/optimizer/restrictinfo.h b/src/include/optimizer/restrictinfo.h

index 52d256ed2849e296319339e598819b2e162e6eac..dcdc82ee1dc4acc9c6006a84c838b70730f94bcb 100644 (file)
--- a/src/include/optimizer/restrictinfo.h
+++ b/src/include/optimizer/restrictinfo.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/optimizer/restrictinfo.h,v 1.43 2009/04/16 20:42:16 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/restrictinfo.h,v 1.44 2009/05/09 22:51:41 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -39,6 +39,6 @@ extern void extract_actual_join_clauses(List *restrictinfo_list,
                             List **otherquals);
  extern List *select_nonredundant_join_clauses(PlannerInfo *root,
                                  List *restrictinfo_list,
-                                List *reference_list);
+                                Path *inner_path);
  
  #endif   /* RESTRICTINFO_H */
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 9 May 2009 22:51:41 +0000 (22:51 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 9 May 2009 22:51:41 +0000 (22:51 +0000)
src/backend/optimizer/path/costsize.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/createplan.c		patch \| blob \| blame \| history
src/backend/optimizer/util/restrictinfo.c		patch \| blob \| blame \| history
src/include/optimizer/restrictinfo.h		patch \| blob \| blame \| history