Add the ability to extract OR indexscan conditions from OR-of-AND
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 5 Jan 2004 05:07:36 +0000 (05:07 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 5 Jan 2004 05:07:36 +0000 (05:07 +0000)
join conditions in which each OR subclause includes a constraint on
the same relation.  This implements the other useful side-effect of
conversion to CNF format, without its unpleasant side-effects.  As
per pghackers discussion of a few weeks ago.

16 files changed:
src/backend/nodes/copyfuncs.c
src/backend/nodes/equalfuncs.c
src/backend/nodes/outfuncs.c
src/backend/optimizer/path/allpaths.c
src/backend/optimizer/path/costsize.c
src/backend/optimizer/path/indxpath.c
src/backend/optimizer/path/joinpath.c
src/backend/optimizer/path/orindxpath.c
src/backend/optimizer/plan/createplan.c
src/backend/optimizer/plan/initsplan.c
src/backend/optimizer/util/pathnode.c
src/backend/optimizer/util/restrictinfo.c
src/include/nodes/relation.h
src/include/optimizer/cost.h
src/include/optimizer/paths.h
src/include/optimizer/restrictinfo.h

index 29f55e0abb8a78cd5534dbbdefbf8f2b9da36dee..75c2fa898d9f190df2f63033b776a36a985e57ba 100644 (file)
@@ -15,7 +15,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.272 2004/01/04 03:51:52 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.273 2004/01/05 05:07:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1168,8 +1168,9 @@ _copyRestrictInfo(RestrictInfo *from)
        RestrictInfo *newnode = makeNode(RestrictInfo);
 
        COPY_NODE_FIELD(clause);
-       COPY_SCALAR_FIELD(ispusheddown);
-       COPY_SCALAR_FIELD(canjoin);
+       COPY_SCALAR_FIELD(is_pushed_down);
+       COPY_SCALAR_FIELD(valid_everywhere);
+       COPY_SCALAR_FIELD(can_join);
        COPY_BITMAPSET_FIELD(clause_relids);
        COPY_BITMAPSET_FIELD(left_relids);
        COPY_BITMAPSET_FIELD(right_relids);
index 5c7e53844022743eea617cff8e2d6cec65a1a7f5..41b292a47cc877abe4d82aa859f7b954012c5c8d 100644 (file)
@@ -18,7 +18,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.211 2003/12/30 23:53:14 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.212 2004/01/05 05:07:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -560,7 +560,8 @@ static bool
 _equalRestrictInfo(RestrictInfo *a, RestrictInfo *b)
 {
        COMPARE_NODE_FIELD(clause);
-       COMPARE_SCALAR_FIELD(ispusheddown);
+       COMPARE_SCALAR_FIELD(is_pushed_down);
+       COMPARE_SCALAR_FIELD(valid_everywhere);
 
        /*
         * We ignore all the remaining fields, since they may not be set yet,
index 3b8613fd19c2f5a1faced674cdf4ea4b14c993d2..e91e8e0d170f376014e4e0798bf1797f1e571b35 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.225 2004/01/04 03:51:52 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.226 2004/01/05 05:07:35 tgl Exp $
  *
  * NOTES
  *       Every node type that can appear in stored rules' parsetrees *must*
@@ -971,7 +971,7 @@ _outIndexPath(StringInfo str, IndexPath *node)
        WRITE_NODE_FIELD(indexqual);
        WRITE_NODE_FIELD(indexjoinclauses);
        WRITE_ENUM_FIELD(indexscandir, ScanDirection);
-       WRITE_FLOAT_FIELD(rows, "%.2f");
+       WRITE_FLOAT_FIELD(rows, "%.0f");
 }
 
 static void
@@ -1073,8 +1073,9 @@ _outRestrictInfo(StringInfo str, RestrictInfo *node)
 
        /* NB: this isn't a complete set of fields */
        WRITE_NODE_FIELD(clause);
-       WRITE_BOOL_FIELD(ispusheddown);
-       WRITE_BOOL_FIELD(canjoin);
+       WRITE_BOOL_FIELD(is_pushed_down);
+       WRITE_BOOL_FIELD(valid_everywhere);
+       WRITE_BOOL_FIELD(can_join);
        WRITE_BITMAPSET_FIELD(clause_relids);
        WRITE_BITMAPSET_FIELD(left_relids);
        WRITE_BITMAPSET_FIELD(right_relids);
index 1f979dd4094a948a4fffbae522a997a884d9ca0e..2d724265f06dc698ec3a2fb0fbf9c89181aea881 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.110 2003/12/17 17:07:48 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.111 2004/01/05 05:07:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -151,6 +151,17 @@ set_plain_rel_pathlist(Query *root, RelOptInfo *rel, RangeTblEntry *rte)
        /* Mark rel with estimated output rows, width, etc */
        set_baserel_size_estimates(root, rel);
 
+       /* Test any partial indexes of rel for applicability */
+       check_partial_indexes(root, rel);
+
+       /*
+        * Check to see if we can extract any restriction conditions from
+        * join quals that are OR-of-AND structures.  If so, add them to the
+        * rel's restriction list, and recompute the size estimates.
+        */
+       if (create_or_index_quals(root, rel))
+               set_baserel_size_estimates(root, rel);
+
        /*
         * Generate paths and add them to the rel's pathlist.
         *
@@ -167,8 +178,6 @@ set_plain_rel_pathlist(Query *root, RelOptInfo *rel, RangeTblEntry *rte)
 
        /* Consider index paths for both simple and OR index clauses */
        create_index_paths(root, rel);
-
-       /* create_index_paths must be done before create_or_index_paths */
        create_or_index_paths(root, rel);
 
        /* Now find the cheapest of the paths for this rel */
index 94202cda4b02cca8104e8a3e85cd71fc5caa8bf3..fa7dcdc9740d8af1dfdaacda324468f9bebd19be 100644 (file)
@@ -49,7 +49,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.119 2004/01/04 03:51:52 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.120 2004/01/05 05:07:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -107,11 +107,33 @@ static Selectivity estimate_hash_bucketsize(Query *root, Var *var,
 static bool cost_qual_eval_walker(Node *node, QualCost *total);
 static Selectivity approx_selectivity(Query *root, List *quals,
                                   JoinType jointype);
+static Selectivity join_in_selectivity(JoinPath *path, Query *root);
 static void set_rel_width(Query *root, RelOptInfo *rel);
 static double relation_byte_size(double tuples, int width);
 static double page_size(double tuples, int width);
 
 
+/*
+ * clamp_row_est
+ *             Force a row-count estimate to a sane value.
+ */
+double
+clamp_row_est(double nrows)
+{
+       /*
+        * Force estimate to be at least one row, to make explain output look
+        * better and to avoid possible divide-by-zero when interpolating
+        * costs.  Make it an integer, too.
+        */
+       if (nrows < 1.0)
+               nrows = 1.0;
+       else
+               nrows = ceil(nrows);
+
+       return nrows;
+}
+
+
 /*
  * cost_seqscan
  *       Determines and returns the cost of scanning a relation sequentially.
@@ -300,10 +322,7 @@ cost_index(Path *path, Query *root,
         *----------
         */
 
-       tuples_fetched = indexSelectivity * baserel->tuples;
-       /* Don't believe estimates less than 1... */
-       if (tuples_fetched < 1.0)
-               tuples_fetched = 1.0;
+       tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples);
 
        /* This part is the Mackert and Lohman formula */
 
@@ -718,7 +737,6 @@ cost_nestloop(NestPath *path, Query *root)
 {
        Path       *outer_path = path->outerjoinpath;
        Path       *inner_path = path->innerjoinpath;
-       List       *restrictlist = path->joinrestrictinfo;
        Cost            startup_cost = 0;
        Cost            run_cost = 0;
        Cost            cpu_per_tuple;
@@ -728,6 +746,15 @@ cost_nestloop(NestPath *path, Query *root)
        double          ntuples;
        Selectivity joininfactor;
 
+       /*
+        * If inner path is an indexscan, be sure to use its estimated output row
+        * count, which may be lower than the restriction-clause-only row count of
+        * its parent.  (We don't include this case in the PATH_ROWS macro because
+        * it applies *only* to a nestloop's inner relation.)
+        */
+       if (IsA(inner_path, IndexPath))
+               inner_path_rows = ((IndexPath *) inner_path)->rows;
+
        if (!enable_nestloop)
                startup_cost += disable_cost;
 
@@ -735,26 +762,12 @@ cost_nestloop(NestPath *path, Query *root)
         * If we're doing JOIN_IN then we will stop scanning inner tuples for
         * an outer tuple as soon as we have one match.  Account for the
         * effects of this by scaling down the cost estimates in proportion to
-        * the expected output size.  (This assumes that all the quals
-        * attached to the join are IN quals, which should be true.)
-        *
-        * Note: it's probably bogus to use the normal selectivity calculation
-        * here when either the outer or inner path is a UniquePath.
+        * the JOIN_IN selectivity.  (This assumes that all the quals
+        * attached to the join are IN quals, which should be true.)  This would
+        * probably be the wrong approach if an input path is a UniquePath, but
+        * we'd never have that with JOIN_IN join type.
         */
-       if (path->jointype == JOIN_IN)
-       {
-               Selectivity qual_selec = approx_selectivity(root, restrictlist,
-                                                                                                       path->jointype);
-               double          qptuples;
-
-               qptuples = ceil(qual_selec * outer_path_rows * inner_path_rows);
-               if (qptuples > path->path.parent->rows)
-                       joininfactor = path->path.parent->rows / qptuples;
-               else
-                       joininfactor = 1.0;
-       }
-       else
-               joininfactor = 1.0;
+       joininfactor = join_in_selectivity(path, root);
 
        /* cost of source data */
 
@@ -785,23 +798,12 @@ cost_nestloop(NestPath *path, Query *root)
                (inner_path->total_cost - inner_path->startup_cost) * joininfactor;
 
        /*
-        * Compute number of tuples processed (not number emitted!). If inner
-        * path is an indexscan, be sure to use its estimated output row
-        * count, which may be lower than the restriction-clause-only row
-        * count of its parent.  (We don't include this case in the PATH_ROWS
-        * macro because it applies *only* to a nestloop's inner relation.)
-        * Note: it is correct to use the unadjusted inner_path_rows in the
-        * above calculation for joininfactor, since otherwise we'd be
-        * double-counting the selectivity of the join clause being used for
-        * the index.
+        * Compute number of tuples processed (not number emitted!)
         */
-       if (IsA(inner_path, IndexPath))
-               inner_path_rows = ((IndexPath *) inner_path)->rows;
-
-       ntuples = inner_path_rows * outer_path_rows;
+       ntuples = outer_path_rows * inner_path_rows * joininfactor;
 
        /* CPU costs */
-       cost_qual_eval(&restrict_qual_cost, restrictlist);
+       cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo);
        startup_cost += restrict_qual_cost.startup;
        cpu_per_tuple = cpu_tuple_cost + restrict_qual_cost.per_tuple;
        run_cost += cpu_per_tuple * ntuples;
@@ -827,7 +829,6 @@ cost_mergejoin(MergePath *path, Query *root)
 {
        Path       *outer_path = path->jpath.outerjoinpath;
        Path       *inner_path = path->jpath.innerjoinpath;
-       List       *restrictlist = path->jpath.joinrestrictinfo;
        List       *mergeclauses = path->path_mergeclauses;
        List       *outersortkeys = path->outersortkeys;
        List       *innersortkeys = path->innersortkeys;
@@ -835,18 +836,15 @@ cost_mergejoin(MergePath *path, Query *root)
        Cost            run_cost = 0;
        Cost            cpu_per_tuple;
        Selectivity merge_selec;
-       Selectivity qp_selec;
        QualCost        merge_qual_cost;
        QualCost        qp_qual_cost;
        RestrictInfo *firstclause;
-       List       *qpquals;
        double          outer_path_rows = PATH_ROWS(outer_path);
        double          inner_path_rows = PATH_ROWS(inner_path);
        double          outer_rows,
                                inner_rows;
        double          mergejointuples,
                                rescannedtuples;
-       double          qptuples;
        double          rescanratio;
        Selectivity outerscansel,
                                innerscansel;
@@ -868,16 +866,12 @@ cost_mergejoin(MergePath *path, Query *root)
        merge_selec = approx_selectivity(root, mergeclauses,
                                                                         path->jpath.jointype);
        cost_qual_eval(&merge_qual_cost, mergeclauses);
-       qpquals = set_ptrDifference(restrictlist, mergeclauses);
-       qp_selec = approx_selectivity(root, qpquals,
-                                                                 path->jpath.jointype);
-       cost_qual_eval(&qp_qual_cost, qpquals);
-       freeList(qpquals);
+       cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo);
+       qp_qual_cost.startup -= merge_qual_cost.startup;
+       qp_qual_cost.per_tuple -= merge_qual_cost.per_tuple;
 
        /* approx # tuples passing the merge quals */
-       mergejointuples = ceil(merge_selec * outer_path_rows * inner_path_rows);
-       /* approx # tuples passing qpquals as well */
-       qptuples = ceil(mergejointuples * qp_selec);
+       mergejointuples = clamp_row_est(merge_selec * outer_path_rows * inner_path_rows);
 
        /*
         * When there are equal merge keys in the outer relation, the
@@ -948,13 +942,8 @@ cost_mergejoin(MergePath *path, Query *root)
        }
 
        /* convert selectivity to row count; must scan at least one row */
-
-       outer_rows = ceil(outer_path_rows * outerscansel);
-       if (outer_rows < 1)
-               outer_rows = 1;
-       inner_rows = ceil(inner_path_rows * innerscansel);
-       if (inner_rows < 1)
-               inner_rows = 1;
+       outer_rows = clamp_row_est(outer_path_rows * outerscansel);
+       inner_rows = clamp_row_est(inner_path_rows * innerscansel);
 
        /*
         * Readjust scan selectivities to account for above rounding.  This is
@@ -1012,13 +1001,11 @@ cost_mergejoin(MergePath *path, Query *root)
         * for an outer tuple as soon as we have one match.  Account for the
         * effects of this by scaling down the cost estimates in proportion to
         * the expected output size.  (This assumes that all the quals
-        * attached to the join are IN quals, which should be true.)
+        * attached to the join are IN quals, which should be true.)  This would
+        * probably be the wrong approach if an input path is a UniquePath, but
+        * we'd never have that with JOIN_IN join type.
         */
-       if (path->jpath.jointype == JOIN_IN &&
-               qptuples > path->jpath.path.parent->rows)
-               joininfactor = path->jpath.path.parent->rows / qptuples;
-       else
-               joininfactor = 1.0;
+       joininfactor = join_in_selectivity(&path->jpath, root);
 
        /*
         * The number of tuple comparisons needed is approximately number of
@@ -1060,17 +1047,14 @@ cost_hashjoin(HashPath *path, Query *root)
 {
        Path       *outer_path = path->jpath.outerjoinpath;
        Path       *inner_path = path->jpath.innerjoinpath;
-       List       *restrictlist = path->jpath.joinrestrictinfo;
        List       *hashclauses = path->path_hashclauses;
        Cost            startup_cost = 0;
        Cost            run_cost = 0;
        Cost            cpu_per_tuple;
        Selectivity hash_selec;
-       Selectivity qp_selec;
        QualCost        hash_qual_cost;
        QualCost        qp_qual_cost;
        double          hashjointuples;
-       double          qptuples;
        double          outer_path_rows = PATH_ROWS(outer_path);
        double          inner_path_rows = PATH_ROWS(inner_path);
        double          outerbytes = relation_byte_size(outer_path_rows,
@@ -1084,7 +1068,6 @@ cost_hashjoin(HashPath *path, Query *root)
        Selectivity innerbucketsize;
        Selectivity joininfactor;
        List       *hcl;
-       List       *qpquals;
 
        if (!enable_hashjoin)
                startup_cost += disable_cost;
@@ -1101,16 +1084,12 @@ cost_hashjoin(HashPath *path, Query *root)
        hash_selec = approx_selectivity(root, hashclauses,
                                                                        path->jpath.jointype);
        cost_qual_eval(&hash_qual_cost, hashclauses);
-       qpquals = set_ptrDifference(restrictlist, hashclauses);
-       qp_selec = approx_selectivity(root, qpquals,
-                                                                 path->jpath.jointype);
-       cost_qual_eval(&qp_qual_cost, qpquals);
-       freeList(qpquals);
+       cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo);
+       qp_qual_cost.startup -= hash_qual_cost.startup;
+       qp_qual_cost.per_tuple -= hash_qual_cost.per_tuple;
 
        /* approx # tuples passing the hash quals */
-       hashjointuples = ceil(hash_selec * outer_path_rows * inner_path_rows);
-       /* approx # tuples passing qpquals as well */
-       qptuples = ceil(hashjointuples * qp_selec);
+       hashjointuples = clamp_row_est(hash_selec * outer_path_rows * inner_path_rows);
 
        /* cost of source data */
        startup_cost += outer_path->startup_cost;
@@ -1229,13 +1208,11 @@ cost_hashjoin(HashPath *path, Query *root)
         * an outer tuple as soon as we have one match.  Account for the
         * effects of this by scaling down the cost estimates in proportion to
         * the expected output size.  (This assumes that all the quals
-        * attached to the join are IN quals, which should be true.)
+        * attached to the join are IN quals, which should be true.)  This would
+        * probably be the wrong approach if an input path is a UniquePath, but
+        * we'd never have that with JOIN_IN join type.
         */
-       if (path->jpath.jointype == JOIN_IN &&
-               qptuples > path->jpath.path.parent->rows)
-               joininfactor = path->jpath.path.parent->rows / qptuples;
-       else
-               joininfactor = 1.0;
+       joininfactor = join_in_selectivity(&path->jpath, root);
 
        /*
         * The number of tuple comparisons needed is the number of outer
@@ -1245,7 +1222,7 @@ cost_hashjoin(HashPath *path, Query *root)
         */
        startup_cost += hash_qual_cost.startup;
        run_cost += hash_qual_cost.per_tuple *
-               outer_path_rows * ceil(inner_path_rows * innerbucketsize) *
+               outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) *
                joininfactor;
 
        /*
@@ -1669,28 +1646,18 @@ approx_selectivity(Query *root, List *quals, JoinType jointype)
 void
 set_baserel_size_estimates(Query *root, RelOptInfo *rel)
 {
-       double          temp;
+       double          nrows;
 
        /* Should only be applied to base relations */
        Assert(rel->relid > 0);
 
-       temp = rel->tuples *
+       nrows = rel->tuples *
                clauselist_selectivity(root,
                                                           rel->baserestrictinfo,
                                                           0,
                                                           JOIN_INNER);
 
-       /*
-        * Force estimate to be at least one row, to make explain output look
-        * better and to avoid possible divide-by-zero when interpolating
-        * cost.  Make it an integer, too.
-        */
-       if (temp < 1.0)
-               temp = 1.0;
-       else
-               temp = ceil(temp);
-
-       rel->rows = temp;
+       rel->rows = clamp_row_est(nrows);
 
        cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo);
 
@@ -1719,7 +1686,8 @@ set_baserel_size_estimates(Query *root, RelOptInfo *rel)
  * rel1, JOIN_RIGHT).  Also, JOIN_IN should produce the same result as
  * JOIN_UNIQUE_INNER, likewise JOIN_REVERSE_IN == JOIN_UNIQUE_OUTER.
  *
- * We set the same relnode fields as set_baserel_size_estimates() does.
+ * We set only the rows field here.  The width field was already set by
+ * build_joinrel_tlist, and baserestrictcost is not used for join rels.
  */
 void
 set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
@@ -1729,7 +1697,7 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
                                                   List *restrictlist)
 {
        Selectivity selec;
-       double          temp;
+       double          nrows;
        UniquePath *upath;
 
        /*
@@ -1757,63 +1725,87 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
        switch (jointype)
        {
                case JOIN_INNER:
-                       temp = outer_rel->rows * inner_rel->rows * selec;
+                       nrows = outer_rel->rows * inner_rel->rows * selec;
                        break;
                case JOIN_LEFT:
-                       temp = outer_rel->rows * inner_rel->rows * selec;
-                       if (temp < outer_rel->rows)
-                               temp = outer_rel->rows;
+                       nrows = outer_rel->rows * inner_rel->rows * selec;
+                       if (nrows < outer_rel->rows)
+                               nrows = outer_rel->rows;
                        break;
                case JOIN_RIGHT:
-                       temp = outer_rel->rows * inner_rel->rows * selec;
-                       if (temp < inner_rel->rows)
-                               temp = inner_rel->rows;
+                       nrows = outer_rel->rows * inner_rel->rows * selec;
+                       if (nrows < inner_rel->rows)
+                               nrows = inner_rel->rows;
                        break;
                case JOIN_FULL:
-                       temp = outer_rel->rows * inner_rel->rows * selec;
-                       if (temp < outer_rel->rows)
-                               temp = outer_rel->rows;
-                       if (temp < inner_rel->rows)
-                               temp = inner_rel->rows;
+                       nrows = outer_rel->rows * inner_rel->rows * selec;
+                       if (nrows < outer_rel->rows)
+                               nrows = outer_rel->rows;
+                       if (nrows < inner_rel->rows)
+                               nrows = inner_rel->rows;
                        break;
                case JOIN_IN:
                case JOIN_UNIQUE_INNER:
                        upath = create_unique_path(root, inner_rel,
                                                                           inner_rel->cheapest_total_path);
-                       temp = outer_rel->rows * upath->rows * selec;
-                       if (temp > outer_rel->rows)
-                               temp = outer_rel->rows;
+                       nrows = outer_rel->rows * upath->rows * selec;
+                       if (nrows > outer_rel->rows)
+                               nrows = outer_rel->rows;
                        break;
                case JOIN_REVERSE_IN:
                case JOIN_UNIQUE_OUTER:
                        upath = create_unique_path(root, outer_rel,
                                                                           outer_rel->cheapest_total_path);
-                       temp = upath->rows * inner_rel->rows * selec;
-                       if (temp > inner_rel->rows)
-                               temp = inner_rel->rows;
+                       nrows = upath->rows * inner_rel->rows * selec;
+                       if (nrows > inner_rel->rows)
+                               nrows = inner_rel->rows;
                        break;
                default:
                        elog(ERROR, "unrecognized join type: %d", (int) jointype);
-                       temp = 0;                       /* keep compiler quiet */
+                       nrows = 0;                      /* keep compiler quiet */
                        break;
        }
 
-       /*
-        * Force estimate to be at least one row, to make explain output look
-        * better and to avoid possible divide-by-zero when interpolating
-        * cost.  Make it an integer, too.
-        */
-       if (temp < 1.0)
-               temp = 1.0;
-       else
-               temp = ceil(temp);
+       rel->rows = clamp_row_est(nrows);
+}
+
+/*
+ * join_in_selectivity
+ *       Determines the factor by which a JOIN_IN join's result is expected
+ *       to be smaller than an ordinary inner join.
+ *
+ * 'path' is already filled in except for the cost fields
+ */
+static Selectivity
+join_in_selectivity(JoinPath *path, Query *root)
+{
+       Selectivity selec;
+       double          nrows;
 
-       rel->rows = temp;
+       if (path->jointype != JOIN_IN)
+               return 1.0;
 
        /*
-        * We need not compute the output width here, because
-        * build_joinrel_tlist already did.
+        * Compute same result set_joinrel_size_estimates would compute
+        * for JOIN_INNER.  Note that we use the input rels' absolute size
+        * estimates, not PATH_ROWS() which might be less; if we used PATH_ROWS()
+        * we'd be double-counting the effects of any join clauses used in
+        * input scans.
         */
+       selec = clauselist_selectivity(root,
+                                                                  path->joinrestrictinfo,
+                                                                  0,
+                                                                  JOIN_INNER);
+       nrows = path->outerjoinpath->parent->rows *
+               path->innerjoinpath->parent->rows * selec;
+
+       nrows = clamp_row_est(nrows);
+
+       /* See if it's larger than the actual JOIN_IN size estimate */
+       if (nrows > path->path.parent->rows)
+               return path->path.parent->rows / nrows;
+       else
+               return 1.0;
 }
 
 /*
@@ -1823,17 +1815,11 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
  * The rel's targetlist and restrictinfo list must have been constructed
  * already.
  *
- * We set the following fields of the rel node:
- *     rows: the estimated number of output tuples (after applying
- *               restriction clauses).
- *     width: the estimated average output tuple width in bytes.
- *     baserestrictcost: estimated cost of evaluating baserestrictinfo clauses.
+ * We set the same fields as set_baserel_size_estimates.
  */
 void
 set_function_size_estimates(Query *root, RelOptInfo *rel)
 {
-       double          temp;
-
        /* Should only be applied to base relations that are functions */
        Assert(rel->relid > 0);
        Assert(rel->rtekind == RTE_FUNCTION);
@@ -1846,28 +1832,8 @@ set_function_size_estimates(Query *root, RelOptInfo *rel)
         */
        rel->tuples = 1000;
 
-       /* Now estimate number of output rows */
-       temp = rel->tuples *
-               clauselist_selectivity(root,
-                                                          rel->baserestrictinfo,
-                                                          0,
-                                                          JOIN_INNER);
-
-       /*
-        * Force estimate to be at least one row, to make explain output look
-        * better and to avoid possible divide-by-zero when interpolating
-        * cost.  Make it an integer, too.
-        */
-       if (temp < 1.0)
-               temp = 1.0;
-       else
-               temp = ceil(temp);
-
-       rel->rows = temp;
-
-       cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo);
-
-       set_rel_width(root, rel);
+       /* Now estimate number of output rows, etc */
+       set_baserel_size_estimates(root, rel);
 }
 
 
index 0fa02d4f74f8074e87824d5d5557dabb36eee609..83d129d0c9cff737b54f9960de1eea18916aa382 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.153 2004/01/04 03:51:52 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.154 2004/01/05 05:07:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -63,8 +63,7 @@ static bool match_join_clause_to_indexcol(RelOptInfo *rel, IndexOptInfo *index,
                                                                                  RestrictInfo *rinfo);
 static Oid indexable_operator(Expr *clause, Oid opclass,
                                   bool indexkey_on_left);
-static bool pred_test(List *predicate_list, List *restrictinfo_list,
-                 List *joininfo_list);
+static bool pred_test(List *predicate_list, List *restrictinfo_list);
 static bool pred_test_restrict_list(Expr *predicate, List *restrictinfo_list);
 static bool pred_test_recurse_clause(Expr *predicate, Node *clause);
 static bool pred_test_recurse_pred(Expr *predicate, Node *clause);
@@ -114,12 +113,12 @@ static Const *string_to_const(const char *str, Oid datatype);
  * and avoid repeated computation.
  *
  * 'rel' is the relation for which we want to generate index paths
+ *
+ * Note: check_partial_indexes() must have been run previously.
  */
 void
 create_index_paths(Query *root, RelOptInfo *rel)
 {
-       List       *restrictinfo_list = rel->baserestrictinfo;
-       List       *joininfo_list = rel->joininfo;
        Relids          all_join_outerrelids = NULL;
        List       *ilist;
 
@@ -132,16 +131,9 @@ create_index_paths(Query *root, RelOptInfo *rel)
                bool            index_is_ordered;
                Relids          join_outerrelids;
 
-               /*
-                * If this is a partial index, we can only use it if it passes the
-                * predicate test.
-                */
-               if (index->indpred != NIL)
-               {
-                       if (!pred_test(index->indpred, restrictinfo_list, joininfo_list))
-                               continue;
-                       index->predOK = true; /* set flag for orindxpaths.c */
-               }
+               /* Ignore partial indexes that do not match the query */
+               if (index->indpred != NIL && !index->predOK)
+                       continue;
 
                /*
                 * 1. Match the index against non-OR restriction clauses.
@@ -336,7 +328,7 @@ group_clauses_by_indexkey_for_join(Query *root,
                        RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
 
                        /* Can't use pushed-down clauses in outer join */
-                       if (isouterjoin && rinfo->ispusheddown)
+                       if (isouterjoin && rinfo->is_pushed_down)
                                continue;
 
                        if (match_clause_to_indexcol(rel,
@@ -365,7 +357,7 @@ group_clauses_by_indexkey_for_join(Query *root,
                                RestrictInfo *rinfo = (RestrictInfo *) lfirst(j);
 
                                /* Can't use pushed-down clauses in outer join */
-                               if (isouterjoin && rinfo->ispusheddown)
+                               if (isouterjoin && rinfo->is_pushed_down)
                                        continue;
 
                                if (match_join_clause_to_indexcol(rel,
@@ -736,6 +728,32 @@ indexable_operator(Expr *clause, Oid opclass, bool indexkey_on_left)
  *                             ----  ROUTINES TO DO PARTIAL INDEX PREDICATE TESTS      ----
  ****************************************************************************/
 
+/*
+ * check_partial_indexes
+ *             Check each partial index of the relation, and mark it predOK or not
+ *             depending on whether the predicate is satisfied for this query.
+ */
+void
+check_partial_indexes(Query *root, RelOptInfo *rel)
+{
+       List       *restrictinfo_list = rel->baserestrictinfo;
+       List       *ilist;
+
+       foreach(ilist, rel->indexlist)
+       {
+               IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
+
+               /*
+                * If this is a partial index, we can only use it if it passes the
+                * predicate test.
+                */
+               if (index->indpred == NIL)
+                       continue;                       /* ignore non-partial indexes */
+
+               index->predOK = pred_test(index->indpred, restrictinfo_list);
+       }
+}
+
 /*
  * pred_test
  *       Does the "predicate inclusion test" for partial indexes.
@@ -751,7 +769,7 @@ indexable_operator(Expr *clause, Oid opclass, bool indexkey_on_left)
  *       to CNF format). --Nels, Jan '93
  */
 static bool
-pred_test(List *predicate_list, List *restrictinfo_list, List *joininfo_list)
+pred_test(List *predicate_list, List *restrictinfo_list)
 {
        List       *pred;
 
@@ -1464,8 +1482,7 @@ make_innerjoin_index_path(Query *root,
                                                           rel->relid,          /* do not use 0! */
                                                           JOIN_INNER);
        /* Like costsize.c, force estimate to be at least one row */
-       if (pathnode->rows < 1.0)
-               pathnode->rows = 1.0;
+       pathnode->rows = clamp_row_est(pathnode->rows);
 
        cost_index(&pathnode->path, root, rel, index, indexquals, true);
 
index 91cd0ab22a5f45dbdabd8025a22ce4459d7004e9..8a666e80d8920dbae0e552e39fe0920d27804d63 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.84 2003/12/30 23:53:14 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.85 2004/01/05 05:07:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -690,7 +690,7 @@ hash_inner_and_outer(Query *root,
        {
                RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
 
-               if (!restrictinfo->canjoin ||
+               if (!restrictinfo->can_join ||
                        restrictinfo->hashjoinoperator == InvalidOid)
                        continue;                       /* not hashjoinable */
 
@@ -698,7 +698,7 @@ hash_inner_and_outer(Query *root,
                 * If processing an outer join, only use its own join clauses for
                 * hashing.  For inner joins we need not be so picky.
                 */
-               if (isouterjoin && restrictinfo->ispusheddown)
+               if (isouterjoin && restrictinfo->is_pushed_down)
                        continue;
 
                /*
@@ -804,17 +804,17 @@ select_mergejoin_clauses(RelOptInfo *joinrel,
                 */
                if (isouterjoin)
                {
-                       if (restrictinfo->ispusheddown)
+                       if (restrictinfo->is_pushed_down)
                                continue;
                        switch (jointype)
                        {
                                case JOIN_RIGHT:
-                                       if (!restrictinfo->canjoin ||
+                                       if (!restrictinfo->can_join ||
                                                restrictinfo->mergejoinoperator == InvalidOid)
                                                return NIL;             /* not mergejoinable */
                                        break;
                                case JOIN_FULL:
-                                       if (!restrictinfo->canjoin ||
+                                       if (!restrictinfo->can_join ||
                                                restrictinfo->mergejoinoperator == InvalidOid)
                                                ereport(ERROR,
                                                                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@@ -826,7 +826,7 @@ select_mergejoin_clauses(RelOptInfo *joinrel,
                        }
                }
 
-               if (!restrictinfo->canjoin ||
+               if (!restrictinfo->can_join ||
                        restrictinfo->mergejoinoperator == InvalidOid)
                        continue;                       /* not mergejoinable */
 
index bea2a8e416134d9f38929cfb96a1443dbe581fa6..02e486ec35cf9a9fa5e0184b71502c5f6918e639 100644 (file)
@@ -8,20 +8,21 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.55 2004/01/04 00:07:32 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.56 2004/01/05 05:07:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 
 #include "postgres.h"
 
+#include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/restrictinfo.h"
 
 
-static IndexPath *best_or_subclause_indices(Query *root, RelOptInfo *rel,
+static IndexPath *best_or_subclause_indexes(Query *root, RelOptInfo *rel,
                                                                                        List *subclauses);
 static bool best_or_subclause_index(Query *root,
                                                RelOptInfo *rel,
@@ -32,16 +33,166 @@ static bool best_or_subclause_index(Query *root,
                                                Cost *retTotalCost);
 
 
+/*----------
+ * create_or_index_quals
+ *       Examine join OR-of-AND quals to see if any useful restriction OR
+ *       clauses can be extracted.  If so, add them to the query.
+ *
+ * Although a join clause must reference other relations overall,
+ * an OR of ANDs clause might contain sub-clauses that reference just this
+ * relation and can be used to build a restriction clause.
+ * For example consider
+ *             WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
+ * We can transform this into
+ *             WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
+ *                     AND (a.x = 42 OR a.x = 44)
+ *                     AND (b.y = 43 OR b.z = 45);
+ * which opens the potential to build OR indexscans on a and b.  In essence
+ * this is a partial transformation to CNF (AND of ORs format).  It is not
+ * complete, however, because we do not unravel the original OR --- doing so
+ * would usually bloat the qualification expression to little gain.
+ *
+ * The added quals are partially redundant with the original OR, and therefore
+ * will cause the size of the joinrel to be underestimated when it is finally
+ * formed.  (This would be true of a full transformation to CNF as well; the
+ * fault is not really in the transformation, but in clauselist_selectivity's
+ * inability to recognize redundant conditions.)  To minimize the collateral
+ * damage, we want to minimize the number of quals added.  Therefore we do
+ * not add every possible extracted restriction condition to the query.
+ * Instead, we search for the single restriction condition that generates
+ * the most useful (cheapest) OR indexscan, and add only that condition.
+ * This is a pretty ad-hoc heuristic, but quite useful.
+ *
+ * We can then compensate for the redundancy of the added qual by poking
+ * the recorded selectivity of the original OR clause, thereby ensuring
+ * the added qual doesn't change the estimated size of the joinrel when
+ * it is finally formed.  This is a MAJOR HACK: it depends on the fact
+ * that clause selectivities are cached and on the fact that the same
+ * RestrictInfo node will appear in every joininfo list that might be used
+ * when the joinrel is formed.  And it probably isn't right in cases where
+ * the size estimation is nonlinear (i.e., outer and IN joins).  But it
+ * beats not doing anything.
+ *
+ * NOTE: one might think this messiness could be worked around by generating
+ * the indexscan path with a small path->rows value, and not touching the
+ * rel's baserestrictinfo or rel->rows.  However, that does not work.
+ * The optimizer's fundamental design assumes that every general-purpose
+ * Path for a given relation generates the same number of rows.  Without
+ * this assumption we'd not be able to optimize solely on the cost of Paths,
+ * but would have to take number of output rows into account as well.
+ * (Perhaps someday that'd be worth doing, but it's a pretty big change...)
+ *
+ * 'rel' is the relation entry for which quals are to be created
+ *
+ * If successful, adds qual(s) to rel->baserestrictinfo and returns TRUE.
+ * If no quals available, returns FALSE and doesn't change rel.
+ *
+ * Note: check_partial_indexes() must have been run previously.
+ *----------
+ */
+bool
+create_or_index_quals(Query *root, RelOptInfo *rel)
+{
+       IndexPath  *bestpath = NULL;
+       RestrictInfo *bestrinfo = NULL;
+       FastList        orclauses;
+       List       *orclause;
+       Expr       *indxqual_or_expr;
+       RestrictInfo *or_rinfo;
+       Selectivity or_selec,
+                               orig_selec;
+       List       *i;
+
+       /*
+        * We use the best_or_subclause_indexes() machinery to locate the
+        * best combination of restriction subclauses.  Note we must ignore
+        * any joinclauses that are not marked valid_everywhere, because they
+        * cannot be pushed down due to outer-join rules.
+        */
+       foreach(i, rel->joininfo)
+       {
+               JoinInfo   *joininfo = (JoinInfo *) lfirst(i);
+               List       *j;
+
+               foreach(j, joininfo->jinfo_restrictinfo)
+               {
+                       RestrictInfo *rinfo = (RestrictInfo *) lfirst(j);
+
+                       if (restriction_is_or_clause(rinfo) &&
+                               rinfo->valid_everywhere)
+                       {
+                               IndexPath  *pathnode;
+
+                               pathnode = best_or_subclause_indexes(root,
+                                                                                                        rel,
+                                                                               ((BoolExpr *) rinfo->orclause)->args);
+
+                               if (pathnode)
+                               {
+                                       if (bestpath == NULL ||
+                                               pathnode->path.total_cost < bestpath->path.total_cost)
+                                       {
+                                               bestpath = pathnode;
+                                               bestrinfo = rinfo;
+                                       }
+                               }
+                       }
+               }
+       }
+
+       /* Fail if no suitable clauses found */
+       if (bestpath == NULL)
+               return false;
+
+       /*
+        * Build an expression representation of the indexqual, expanding
+        * the implicit OR and AND semantics of the first- and
+        * second-level lists.
+        */
+       FastListInit(&orclauses);
+       foreach(orclause, bestpath->indexqual)
+               FastAppend(&orclauses, make_ands_explicit(lfirst(orclause)));
+       indxqual_or_expr = make_orclause(FastListValue(&orclauses));
+
+       /*
+        * And add it to the rel's restriction list.
+        */
+       or_rinfo = make_restrictinfo(indxqual_or_expr, true, true);
+       rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo);
+
+       /*
+        * Adjust the original OR clause's cached selectivity to compensate
+        * for the selectivity of the added (but redundant) lower-level qual.
+        * This should result in the join rel getting approximately the same
+        * rows estimate as it would have gotten without all these shenanigans.
+        * (XXX major hack alert ... this depends on the assumption that the
+        * selectivity will stay cached ...)
+        */
+       or_selec = clause_selectivity(root, (Node *) or_rinfo,
+                                                                 0, JOIN_INNER);
+       if (or_selec > 0 && or_selec < 1)
+       {
+               orig_selec = clause_selectivity(root, (Node *) bestrinfo,
+                                                                               0, JOIN_INNER);
+               bestrinfo->this_selec = orig_selec / or_selec;
+               /* clamp result to sane range */
+               if (bestrinfo->this_selec > 1)
+                       bestrinfo->this_selec = 1;
+       }
+
+       /* Tell caller to recompute rel's rows estimate */
+       return true;
+}
+
 /*
  * create_or_index_paths
- *       Creates multi-scan index paths for indices that match OR clauses.
+ *       Creates multi-scan index paths for indexes that match OR clauses.
  *
  * 'rel' is the relation entry for which the paths are to be created
  *
  * Returns nothing, but adds paths to rel->pathlist via add_path().
  *
- * Note: create_index_paths() must have been run already, since it does
- * the heavy lifting to determine whether partial indexes may be used.
+ * Note: check_partial_indexes() must have been run previously.
  */
 void
 create_or_index_paths(Query *root, RelOptInfo *rel)
@@ -60,7 +211,7 @@ create_or_index_paths(Query *root, RelOptInfo *rel)
                {
                        IndexPath  *pathnode;
 
-                       pathnode = best_or_subclause_indices(root,
+                       pathnode = best_or_subclause_indexes(root,
                                                                                                 rel,
                                                           ((BoolExpr *) rinfo->orclause)->args);
 
@@ -68,49 +219,10 @@ create_or_index_paths(Query *root, RelOptInfo *rel)
                                add_path(rel, (Path *) pathnode);
                }
        }
-
-       /*
-        * Also consider join clauses that are ORs.  Although a join clause
-        * must reference other relations overall, an OR of ANDs clause might
-        * contain sub-clauses that reference just our relation and can be
-        * used to build a non-join indexscan.  For example consider
-        *              WHERE (a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45);
-        * We could build an OR indexscan on a.x using those subclauses.
-        *
-        * XXX don't enable this code quite yet.  Although the plans it creates
-        * are correct, and possibly even useful, we are totally confused about
-        * the number of rows returned, leading to poor choices of join plans
-        * above the indexscan.  Need to restructure the way join sizes are
-        * calculated before this will really work.
-        */
-#ifdef NOT_YET
-       foreach(i, rel->joininfo)
-       {
-               JoinInfo   *joininfo = (JoinInfo *) lfirst(i);
-               List       *j;
-
-               foreach(j, joininfo->jinfo_restrictinfo)
-               {
-                       RestrictInfo *rinfo = (RestrictInfo *) lfirst(j);
-
-                       if (restriction_is_or_clause(rinfo))
-                       {
-                               IndexPath  *pathnode;
-
-                               pathnode = best_or_subclause_indices(root,
-                                                                                                        rel,
-                                                                               ((BoolExpr *) rinfo->orclause)->args);
-
-                               if (pathnode)
-                                       add_path(rel, (Path *) pathnode);
-                       }
-               }
-       }
-#endif
 }
 
 /*
- * best_or_subclause_indices
+ * best_or_subclause_indexes
  *       Determine the best index to be used in conjunction with each subclause
  *       of an OR clause, and build a Path for a multi-index scan.
  *
@@ -134,7 +246,7 @@ create_or_index_paths(Query *root, RelOptInfo *rel)
  * single tuple more than once).
  */
 static IndexPath *
-best_or_subclause_indices(Query *root,
+best_or_subclause_indexes(Query *root,
                                                  RelOptInfo *rel,
                                                  List *subclauses)
 {
@@ -202,7 +314,10 @@ best_or_subclause_indices(Query *root,
        /* We don't actually care what order the index scans in. */
        pathnode->indexscandir = NoMovementScanDirection;
 
-       /* XXX this may be wrong when using join OR clauses... */
+       /*
+        * The number of rows is the same as the parent rel's estimate, since
+        * this isn't a join inner indexscan.
+        */
        pathnode->rows = rel->rows;
 
        return pathnode;
index 906e1f7f257552013f91f7103d261e268d101922..d20b967b05cebe93fe5d2450894144958a1b3c5b 100644 (file)
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.161 2003/11/29 19:51:50 pgsql Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.162 2004/01/05 05:07:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -643,6 +643,7 @@ create_unique_plan(Query *root, UniquePath *best_path)
                plan = (Plan *) make_unique(my_tlist, plan, sortList);
        }
 
+       /* Adjust output size estimate (other fields should be OK already) */
        plan->plan_rows = best_path->rows;
 
        return plan;
index bcfdb25403ce9e079b870c9204002f246641675e..c4412030cc0e8756ebdb1e2c6705b111590e0dd1 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.96 2004/01/04 03:51:52 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.97 2004/01/05 05:07:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -37,7 +37,7 @@
 static void mark_baserels_for_outer_join(Query *root, Relids rels,
                                                         Relids outerrels);
 static void distribute_qual_to_rels(Query *root, Node *clause,
-                                               bool ispusheddown,
+                                               bool is_pushed_down,
                                                bool isdeduced,
                                                Relids outerjoin_nonnullable,
                                                Relids qualscope);
@@ -356,7 +356,7 @@ mark_baserels_for_outer_join(Query *root, Relids rels, Relids outerrels)
  *       equijoined vars.
  *
  * 'clause': the qual clause to be distributed
- * 'ispusheddown': if TRUE, force the clause to be marked 'ispusheddown'
+ * 'is_pushed_down': if TRUE, force the clause to be marked 'is_pushed_down'
  *             (this indicates the clause came from a FromExpr, not a JoinExpr)
  * 'isdeduced': TRUE if the qual came from implied-equality deduction
  * 'outerjoin_nonnullable': NULL if not an outer-join qual, else the set of
@@ -365,16 +365,17 @@ mark_baserels_for_outer_join(Query *root, Relids rels, Relids outerrels)
  *
  * 'qualscope' identifies what level of JOIN the qual came from.  For a top
  * level qual (WHERE qual), qualscope lists all baserel ids and in addition
- * 'ispusheddown' will be TRUE.
+ * 'is_pushed_down' will be TRUE.
  */
 static void
 distribute_qual_to_rels(Query *root, Node *clause,
-                                               bool ispusheddown,
+                                               bool is_pushed_down,
                                                bool isdeduced,
                                                Relids outerjoin_nonnullable,
                                                Relids qualscope)
 {
        Relids          relids;
+       bool            valid_everywhere;
        bool            can_be_equijoin;
        RestrictInfo *restrictinfo;
        RelOptInfo *rel;
@@ -415,6 +416,7 @@ distribute_qual_to_rels(Query *root, Node *clause,
                 * the vars were equal).
                 */
                Assert(bms_equal(relids, qualscope));
+               valid_everywhere = true;
                can_be_equijoin = true;
        }
        else if (bms_overlap(relids, outerjoin_nonnullable))
@@ -433,6 +435,7 @@ distribute_qual_to_rels(Query *root, Node *clause,
                 * result, so we treat it the same as an ordinary inner-join qual.
                 */
                relids = qualscope;
+               valid_everywhere = false;
                can_be_equijoin = false;
        }
        else
@@ -447,18 +450,26 @@ distribute_qual_to_rels(Query *root, Node *clause,
                 * time we are called, the outerjoinset of each baserel will show
                 * exactly those outer joins that are below the qual in the join
                 * tree.
+                *
+                * We also need to determine whether the qual is "valid everywhere",
+                * which is true if the qual mentions no variables that are involved
+                * in lower-level outer joins (this may be an overly strong test).
                 */
                Relids          addrelids = NULL;
                Relids          tmprelids;
                int                     relno;
 
+               valid_everywhere = true;
                tmprelids = bms_copy(relids);
                while ((relno = bms_first_member(tmprelids)) >= 0)
                {
                        RelOptInfo *rel = find_base_rel(root, relno);
 
                        if (rel->outerjoinset != NULL)
+                       {
                                addrelids = bms_add_members(addrelids, rel->outerjoinset);
+                               valid_everywhere = false;
+                       }
                }
                bms_free(tmprelids);
 
@@ -489,13 +500,15 @@ distribute_qual_to_rels(Query *root, Node *clause,
         * same joinrel. A qual originating from WHERE is always considered
         * "pushed down".
         */
-       if (!ispusheddown)
-               ispusheddown = !bms_equal(relids, qualscope);
+       if (!is_pushed_down)
+               is_pushed_down = !bms_equal(relids, qualscope);
 
        /*
         * Build the RestrictInfo node itself.
         */
-       restrictinfo = make_restrictinfo((Expr *) clause, ispusheddown);
+       restrictinfo = make_restrictinfo((Expr *) clause,
+                                                                        is_pushed_down,
+                                                                        valid_everywhere);
 
        /*
         * Figure out where to attach it.
index 55581774f2f79280b69d68bc969fdf1577ab421c..94109ee01e35ed357186f64588bae604f8fe324a 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.96 2003/11/29 19:51:51 pgsql Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.97 2004/01/05 05:07:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -373,13 +373,6 @@ create_index_path(Query *root,
         */
        pathnode->rows = rel->rows;
 
-       /*
-        * Not sure if this is necessary, but it should help if the statistics
-        * are too far off
-        */
-       if (index->indpred && index->tuples < pathnode->rows)
-               pathnode->rows = index->tuples;
-
        cost_index(&pathnode->path, root, rel, index, indexquals, false);
 
        return pathnode;
@@ -398,6 +391,7 @@ create_tidscan_path(Query *root, RelOptInfo *rel, List *tideval)
        pathnode->path.pathtype = T_TidScan;
        pathnode->path.parent = rel;
        pathnode->path.pathkeys = NIL;
+
        pathnode->tideval = tideval;
 
        cost_tidscan(&pathnode->path, root, rel, tideval);
index c72e2306aca7849279758a2e61c466beab0c2b7d..0ab4a75c5c1ddf2a497b8dd1ecc1a3819ff3d23c 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.23 2004/01/04 03:51:52 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.24 2004/01/05 05:07:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -20,7 +20,8 @@
 #include "optimizer/var.h"
 
 
-static Expr *make_sub_restrictinfos(Expr *clause, bool ispusheddown);
+static Expr *make_sub_restrictinfos(Expr *clause, bool is_pushed_down,
+                                                                       bool valid_everywhere);
 static bool join_clause_is_redundant(Query *root,
                                                 RestrictInfo *rinfo,
                                                 List *reference_list,
@@ -32,18 +33,22 @@ static bool join_clause_is_redundant(Query *root,
  *
  * Build a RestrictInfo node containing the given subexpression.
  *
- * The ispusheddown flag must be supplied by the caller.  We initialize
- * fields that depend only on the given subexpression, leaving others that
- * depend on context (or may never be needed at all) to be filled later.
+ * The is_pushed_down and valid_everywhere flags must be supplied by the
+ * caller.
+ *
+ * We initialize fields that depend only on the given subexpression, leaving
+ * others that depend on context (or may never be needed at all) to be filled
+ * later.
  */
 RestrictInfo *
-make_restrictinfo(Expr *clause, bool ispusheddown)
+make_restrictinfo(Expr *clause, bool is_pushed_down, bool valid_everywhere)
 {
        RestrictInfo *restrictinfo = makeNode(RestrictInfo);
 
        restrictinfo->clause = clause;
-       restrictinfo->ispusheddown = ispusheddown;
-       restrictinfo->canjoin = false;          /* may get set below */
+       restrictinfo->is_pushed_down = is_pushed_down;
+       restrictinfo->valid_everywhere = valid_everywhere;
+       restrictinfo->can_join = false;         /* may get set below */
 
        /*
         * If it's a binary opclause, set up left/right relids info.
@@ -67,7 +72,7 @@ make_restrictinfo(Expr *clause, bool ispusheddown)
                        !bms_is_empty(restrictinfo->right_relids) &&
                        !bms_overlap(restrictinfo->left_relids,
                                                 restrictinfo->right_relids))
-                       restrictinfo->canjoin = true;
+                       restrictinfo->can_join = true;
        }
        else
        {
@@ -84,7 +89,9 @@ make_restrictinfo(Expr *clause, bool ispusheddown)
         */
        if (or_clause((Node *) clause))
        {
-               restrictinfo->orclause = make_sub_restrictinfos(clause, ispusheddown);
+               restrictinfo->orclause = make_sub_restrictinfos(clause,
+                                                                                                               is_pushed_down,
+                                                                                                               valid_everywhere);
        }
        else
        {
@@ -126,7 +133,8 @@ make_restrictinfo(Expr *clause, bool ispusheddown)
  * Recursively insert sub-RestrictInfo nodes into a boolean expression.
  */
 static Expr *
-make_sub_restrictinfos(Expr *clause, bool ispusheddown)
+make_sub_restrictinfos(Expr *clause, bool is_pushed_down,
+                                          bool valid_everywhere)
 {
        if (or_clause((Node *) clause))
        {
@@ -136,7 +144,8 @@ make_sub_restrictinfos(Expr *clause, bool ispusheddown)
                foreach(temp, ((BoolExpr *) clause)->args)
                        orlist = lappend(orlist,
                                                         make_sub_restrictinfos(lfirst(temp),
-                                                                                                       ispusheddown));
+                                                                                                       is_pushed_down,
+                                                                                                       valid_everywhere));
                return make_orclause(orlist);
        }
        else if (and_clause((Node *) clause))
@@ -147,11 +156,14 @@ make_sub_restrictinfos(Expr *clause, bool ispusheddown)
                foreach(temp, ((BoolExpr *) clause)->args)
                        andlist = lappend(andlist,
                                                          make_sub_restrictinfos(lfirst(temp),
-                                                                                                        ispusheddown));
+                                                                                                        is_pushed_down,
+                                                                                                        valid_everywhere));
                return make_andclause(andlist);
        }
        else
-               return (Expr *) make_restrictinfo(clause, ispusheddown);
+               return (Expr *) make_restrictinfo(clause,
+                                                                                 is_pushed_down,
+                                                                                 valid_everywhere);
 }
 
 /*
@@ -207,7 +219,7 @@ get_actual_join_clauses(List *restrictinfo_list,
        {
                RestrictInfo *clause = (RestrictInfo *) lfirst(temp);
 
-               if (clause->ispusheddown)
+               if (clause->is_pushed_down)
                        *otherquals = lappend(*otherquals, clause->clause);
                else
                        *joinquals = lappend(*joinquals, clause->clause);
@@ -348,7 +360,7 @@ join_clause_is_redundant(Query *root,
                        if (refrinfo->mergejoinoperator != InvalidOid &&
                                rinfo->left_pathkey == refrinfo->left_pathkey &&
                                rinfo->right_pathkey == refrinfo->right_pathkey &&
-                               (rinfo->ispusheddown == refrinfo->ispusheddown ||
+                               (rinfo->is_pushed_down == refrinfo->is_pushed_down ||
                                 !IS_OUTER_JOIN(jointype)))
                        {
                                redundant = true;
index 010adcc8592a0e5a35636f54dd5b5848033201cf..14486591da9ea56754ce23747477f0bc60319d65 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.90 2004/01/04 03:51:52 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.91 2004/01/05 05:07:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -321,6 +321,8 @@ typedef struct Path
 {
        NodeTag         type;
 
+       NodeTag         pathtype;               /* tag identifying scan/join method */
+
        RelOptInfo *parent;                     /* the relation this path can build */
 
        /* estimated execution costs for path (see costsize.c for more info) */
@@ -329,8 +331,6 @@ typedef struct Path
        Cost            total_cost;             /* total cost (assuming all tuples
                                                                 * fetched) */
 
-       NodeTag         pathtype;               /* tag identifying scan/join method */
-
        List       *pathkeys;           /* sort ordering of path's output */
        /* pathkeys is a List of Lists of PathKeyItem nodes; see above */
 } Path;
@@ -389,6 +389,9 @@ typedef struct IndexPath
 
 /*
  * TidPath represents a scan by TID
+ *
+ * tideval is an implicitly OR'ed list of quals of the form CTID = something.
+ * Note they are bare quals, not RestrictInfos.
  */
 typedef struct TidPath
 {
@@ -570,13 +573,17 @@ typedef struct HashPath
  * When we do form the outer join's joinrel, we still need to distinguish
  * those quals that are actually in that join's JOIN/ON condition from those
  * that appeared higher in the tree and were pushed down to the join rel
- * because they used no other rels.  That's what the ispusheddown flag is for;
- * it tells us that a qual came from a point above the join of the specific
- * set of base rels that it uses (or that the JoinInfo structures claim it
- * uses).  A clause that originally came from WHERE will *always* have its
- * ispusheddown flag set; a clause that came from an INNER JOIN condition,
- * but doesn't use all the rels being joined, will also have ispusheddown set
- * because it will get attached to some lower joinrel.
+ * because they used no other rels.  That's what the is_pushed_down flag is
+ * for; it tells us that a qual came from a point above the join of the
+ * specific set of base rels that it uses (or that the JoinInfo structures
+ * claim it uses).  A clause that originally came from WHERE will *always*
+ * have its is_pushed_down flag set; a clause that came from an INNER JOIN
+ * condition, but doesn't use all the rels being joined, will also have
+ * is_pushed_down set because it will get attached to some lower joinrel.
+ *
+ * We also store a valid_everywhere flag, which says that the clause is not
+ * affected by any lower-level outer join, and therefore any conditions it
+ * asserts can be presumed true throughout the plan tree.
  *
  * In general, the referenced clause might be arbitrarily complex.     The
  * kinds of clauses we can handle as indexscan quals, mergejoin clauses,
@@ -602,7 +609,9 @@ typedef struct RestrictInfo
 
        Expr       *clause;                     /* the represented clause of WHERE or JOIN */
 
-       bool            ispusheddown;   /* TRUE if clause was pushed down in level */
+       bool            is_pushed_down; /* TRUE if clause was pushed down in level */
+
+       bool            valid_everywhere;       /* TRUE if valid on every level */
 
        /*
         * This flag is set true if the clause looks potentially useful as a
@@ -611,7 +620,7 @@ typedef struct RestrictInfo
         * (Whether the operator is actually merge or hash joinable isn't
         * checked, however.)
         */
-       bool            canjoin;
+       bool            can_join;
 
        /* The set of relids (varnos) referenced in the clause: */
        Relids          clause_relids;
index 15540a66643f5810bd4fe4d8307f93add707d1a2..002edfd91e8515e314e1f9e7ed893ec61e89c359 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.59 2004/01/04 03:51:52 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.60 2004/01/05 05:07:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -49,6 +49,7 @@ extern bool enable_nestloop;
 extern bool enable_mergejoin;
 extern bool enable_hashjoin;
 
+extern double clamp_row_est(double nrows);
 extern void cost_seqscan(Path *path, Query *root,
                         RelOptInfo *baserel);
 extern void cost_index(Path *path, Query *root,
index 3e60d608020c1134fabab39eacd3b1f32ce91f29..e1a46bcb3f843ec01025a209fde6f4be861953de 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.71 2004/01/04 00:07:32 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.72 2004/01/05 05:07:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -43,11 +43,13 @@ extern List *group_clauses_by_indexkey_for_or(RelOptInfo *rel,
                                                                                          Expr *orsubclause);
 extern List *expand_indexqual_conditions(IndexOptInfo *index,
                                                        List *clausegroups);
+extern void check_partial_indexes(Query *root, RelOptInfo *rel);
 
 /*
  * orindxpath.c
  *       additional routines for indexable OR clauses
  */
+extern bool create_or_index_quals(Query *root, RelOptInfo *rel);
 extern void create_or_index_paths(Query *root, RelOptInfo *rel);
 
 /*
index 362ed26a61b5715082e4545b576e6728a96126dd..022bfa253a39e74db6309d7f691ecf43b013395e 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/restrictinfo.h,v 1.21 2004/01/04 00:07:32 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/restrictinfo.h,v 1.22 2004/01/05 05:07:36 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -16,7 +16,8 @@
 
 #include "nodes/relation.h"
 
-extern RestrictInfo *make_restrictinfo(Expr *clause, bool ispusheddown);
+extern RestrictInfo *make_restrictinfo(Expr *clause, bool is_pushed_down,
+                                                                          bool valid_everywhere);
 extern bool restriction_is_or_clause(RestrictInfo *restrictinfo);
 extern List *get_actual_clauses(List *restrictinfo_list);
 extern void get_actual_join_clauses(List *restrictinfo_list,