Install some slightly realistic cost estimation for bitmap index scans.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 21 Apr 2005 02:28:02 +0000 (02:28 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 21 Apr 2005 02:28:02 +0000 (02:28 +0000)
src/backend/nodes/outfuncs.c
src/backend/optimizer/path/costsize.c
src/backend/optimizer/path/indxpath.c
src/backend/optimizer/path/orindxpath.c
src/backend/optimizer/plan/createplan.c
src/backend/optimizer/util/pathnode.c
src/include/nodes/relation.h
src/include/optimizer/cost.h

index c241b113674fe78aa780867f07dfc710682f5388..1ea59314ea2710861e15b30455b7820071609b2a 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.247 2005/04/19 22:35:14 tgl Exp $
+ *   $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.248 2005/04/21 02:28:01 tgl Exp $
  *
  * NOTES
  *   Every node type that can appear in stored rules' parsetrees *must*
@@ -1024,6 +1024,8 @@ _outIndexPath(StringInfo str, IndexPath *node)
    WRITE_NODE_FIELD(indexquals);
    WRITE_BOOL_FIELD(isjoininner);
    WRITE_ENUM_FIELD(indexscandir, ScanDirection);
+   WRITE_FLOAT_FIELD(indextotalcost, "%.2f");
+   WRITE_FLOAT_FIELD(indexselectivity, "%.4f");
    WRITE_FLOAT_FIELD(rows, "%.0f");
 }
 
index 06ebe18fe789b0a42499a887c9f27158f2137467..a33ba0f796f47624c936babf8d4d8edeb90ca08e 100644 (file)
@@ -49,7 +49,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.142 2005/04/19 22:35:15 tgl Exp $
+ *   $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.143 2005/04/21 02:28:01 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -103,6 +103,7 @@ bool        enable_hashjoin = true;
 
 
 static bool cost_qual_eval_walker(Node *node, QualCost *total);
+static Selectivity cost_bitmap_qual(Node *bitmapqual, Cost *totalCost);
 static Selectivity approx_selectivity(Query *root, List *quals,
                   JoinType jointype);
 static Selectivity join_in_selectivity(JoinPath *path, Query *root);
@@ -126,7 +127,7 @@ clamp_row_est(double nrows)
    if (nrows < 1.0)
        nrows = 1.0;
    else
-       nrows = ceil(nrows);
+       nrows = rint(nrows);
 
    return nrows;
 }
@@ -232,6 +233,10 @@ cost_nonsequential_access(double relpages)
  * 'is_injoin' is T if we are considering using the index scan as the inside
  *     of a nestloop join (hence, some of the indexQuals are join clauses)
  *
+ * cost_index() takes an IndexPath not just a Path, because it sets a few
+ * additional fields of the IndexPath besides startup_cost and total_cost.
+ * These fields are needed if the IndexPath is used in a BitmapIndexScan.
+ *
  * NOTE: 'indexQuals' must contain only clauses usable as index restrictions.
  * Any additional quals evaluated as qpquals may reduce the number of returned
  * tuples, but they won't reduce the number of tuples we have to fetch from
@@ -241,7 +246,7 @@ cost_nonsequential_access(double relpages)
  * it was a list of bare clause expressions.
  */
 void
-cost_index(Path *path, Query *root,
+cost_index(IndexPath *path, Query *root,
           IndexOptInfo *index,
           List *indexQuals,
           bool is_injoin)
@@ -286,6 +291,14 @@ cost_index(Path *path, Query *root,
                     PointerGetDatum(&indexSelectivity),
                     PointerGetDatum(&indexCorrelation));
 
+   /*
+    * Save amcostestimate's results for possible use by cost_bitmap_scan.
+    * We don't bother to save indexStartupCost or indexCorrelation, because
+    * a bitmap scan doesn't care about either.
+    */
+   path->indextotalcost = indexTotalCost;
+   path->indexselectivity = indexSelectivity;
+
    /* all costs for touching index itself included here */
    startup_cost += indexStartupCost;
    run_cost += indexTotalCost - indexStartupCost;
@@ -396,8 +409,8 @@ cost_index(Path *path, Query *root,
 
    run_cost += cpu_per_tuple * tuples_fetched;
 
-   path->startup_cost = startup_cost;
-   path->total_cost = startup_cost + run_cost;
+   path->path.startup_cost = startup_cost;
+   path->path.total_cost = startup_cost + run_cost;
 }
 
 /*
@@ -417,19 +430,151 @@ cost_bitmap_scan(Path *path, Query *root, RelOptInfo *baserel,
 {
    Cost        startup_cost = 0;
    Cost        run_cost = 0;
+   Cost        indexTotalCost;
+   Selectivity indexSelectivity;
+   Cost        cpu_per_tuple;
+   Cost        cost_per_page;
+   double      tuples_fetched;
+   double      pages_fetched;
+   double      T;
 
    /* Should only be applied to base relations */
    Assert(IsA(baserel, RelOptInfo));
    Assert(baserel->relid > 0);
    Assert(baserel->rtekind == RTE_RELATION);
 
-   /* XXX lots to do here */
-   run_cost += 10;
+   if (!enable_indexscan)      /* XXX use a separate enable flag? */
+       startup_cost += disable_cost;
+
+   /*
+    * Estimate total cost of obtaining the bitmap, as well as its total
+    * selectivity.
+    */
+   indexTotalCost = 0;
+   indexSelectivity = cost_bitmap_qual(bitmapqual, &indexTotalCost);
+
+   startup_cost += indexTotalCost;
+
+   /*
+    * The number of heap pages that need to be fetched is the same as the
+    * Mackert and Lohman formula for the case T <= b (ie, no re-reads
+    * needed).
+    */
+   tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples);
+
+   T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
+   pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
+   if (pages_fetched > T)
+       pages_fetched = T;
+
+   /*
+    * For small numbers of pages we should charge random_page_cost apiece,
+    * while if nearly all the table's pages are being read, it's more
+    * appropriate to charge 1.0 apiece.  The effect is nonlinear, too.
+    * For lack of a better idea, interpolate like this to determine the
+    * cost per page.
+    */
+   cost_per_page = random_page_cost -
+       (random_page_cost - 1.0) * sqrt(pages_fetched / T);
+
+   run_cost += pages_fetched * cost_per_page;
+
+   /*
+    * Estimate CPU costs per tuple.
+    *
+    * Often the indexquals don't need to be rechecked at each tuple ...
+    * but not always, especially not if there are enough tuples involved
+    * that the bitmaps become lossy.  For the moment, just assume they
+    * will be rechecked always.
+    */
+   startup_cost += baserel->baserestrictcost.startup;
+   cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
+
+   run_cost += cpu_per_tuple * tuples_fetched;
 
    path->startup_cost = startup_cost;
    path->total_cost = startup_cost + run_cost;
 }
 
+/*
+ * cost_bitmap_qual
+ *     Recursively examine the AND/OR/IndexPath tree for a bitmap scan
+ *
+ * Total execution costs are added to *totalCost (so caller must be sure
+ * to initialize that to zero).  Estimated total selectivity of the bitmap
+ * is returned as the function result.
+ */
+static Selectivity
+cost_bitmap_qual(Node *bitmapqual, Cost *totalCost)
+{
+   Selectivity result;
+   Selectivity subresult;
+   ListCell   *l;
+
+   if (and_clause(bitmapqual))
+   {
+       /*
+        * We estimate AND selectivity on the assumption that the inputs
+        * are independent.  This is probably often wrong, but we don't
+        * have the info to do better.
+        *
+        * The runtime cost of the BitmapAnd itself is estimated at 100x
+        * cpu_operator_cost for each tbm_intersect needed.  Probably too
+        * small, definitely too simplistic?
+        *
+        * This must agree with make_bitmap_and in createplan.c.
+        */
+       result = 1.0;
+       foreach(l, ((BoolExpr *) bitmapqual)->args)
+       {
+           subresult = cost_bitmap_qual((Node *) lfirst(l), totalCost);
+           result *= subresult;
+           if (l != list_head(((BoolExpr *) bitmapqual)->args))
+               *totalCost += 100.0 * cpu_operator_cost;
+       }
+   }
+   else if (or_clause(bitmapqual))
+   {
+       /*
+        * We estimate OR selectivity on the assumption that the inputs
+        * are non-overlapping, since that's often the case in "x IN (list)"
+        * type situations.  Of course, we clamp to 1.0 at the end.
+        *
+        * The runtime cost of the BitmapOr itself is estimated at 100x
+        * cpu_operator_cost for each tbm_union needed.  Probably too
+        * small, definitely too simplistic?  We are aware that the tbm_unions
+        * are optimized out when the inputs are BitmapIndexScans.
+        *
+        * This must agree with make_bitmap_or in createplan.c.
+        */
+       result = 0.0;
+       foreach(l, ((BoolExpr *) bitmapqual)->args)
+       {
+           subresult = cost_bitmap_qual((Node *) lfirst(l), totalCost);
+           result += subresult;
+           if (l != list_head(((BoolExpr *) bitmapqual)->args) &&
+               !IsA((Node *) lfirst(l), IndexPath))
+               *totalCost += 100.0 * cpu_operator_cost;
+       }
+       result = Min(result, 1.0);
+   }
+   else if (IsA(bitmapqual, IndexPath))
+   {
+       IndexPath *ipath = (IndexPath *) bitmapqual;
+
+       /* this must agree with create_bitmap_subplan in createplan.c */
+       *totalCost += ipath->indextotalcost;
+       result = ipath->indexselectivity;
+   }
+   else
+   {
+       elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual));
+       result = 0.0;               /* keep compiler quiet */
+   }
+
+   return result;
+}
+
 /*
  * cost_tidscan
  *   Determines and returns the cost of scanning a relation using TIDs.
index 937e2aed80eb9e000b8ea891694493ffca6035bd..e387a7bd768364d0d2b038b5240618c83231ca2a 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.174 2005/04/20 21:48:04 tgl Exp $
+ *   $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.175 2005/04/21 02:28:01 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1710,7 +1710,7 @@ make_innerjoin_index_path(Query *root,
    /* Like costsize.c, force estimate to be at least one row */
    pathnode->rows = clamp_row_est(pathnode->rows);
 
-   cost_index(&pathnode->path, root, index, indexquals, true);
+   cost_index(pathnode, root, index, indexquals, true);
 
    return (Path *) pathnode;
 }
index 0843bb6ea88a43c4edbe41efb32117dd37f2a442..c30c26562c5aadf80ccd230b61c282d7b6231eb1 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.67 2005/03/27 06:29:36 tgl Exp $
+ *   $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.68 2005/04/21 02:28:01 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -353,7 +353,7 @@ best_or_subclause_index(Query *root,
        IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
        List       *indexclauses;
        List       *indexquals;
-       Path        subclause_path;
+       IndexPath   subclause_path;
 
        /*
         * Ignore partial indexes that do not match the query.  If predOK
@@ -402,13 +402,13 @@ best_or_subclause_index(Query *root,
 
        cost_index(&subclause_path, root, index, indexquals, false);
 
-       if (!found || subclause_path.total_cost < *retTotalCost)
+       if (!found || subclause_path.path.total_cost < *retTotalCost)
        {
            *retIndexInfo = index;
            *retIndexClauses = flatten_clausegroups_list(indexclauses);
            *retIndexQuals = indexquals;
-           *retStartupCost = subclause_path.startup_cost;
-           *retTotalCost = subclause_path.total_cost;
+           *retStartupCost = subclause_path.path.startup_cost;
+           *retTotalCost = subclause_path.path.total_cost;
            found = true;
        }
    }
index d15f0c6dcae292bc760fe8bb53cc4cccd4e059e5..0abb900beaaee6b7ae53c7d3b1142c66b1460329 100644 (file)
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.180 2005/04/19 22:35:16 tgl Exp $
+ *   $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.181 2005/04/21 02:28:01 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -976,10 +976,12 @@ create_bitmap_subplan(Query *root, Node *bitmapqual)
                                      linitial(iscan->indxqualorig),
                                      linitial(iscan->indxstrategy),
                                      linitial(iscan->indxsubtype));
-       /* XXX this cost is wrong: */
-       copy_path_costsize(&bscan->scan.plan, &ipath->path);
-       /* use the indexscan-specific rows estimate, not the parent rel's */
-       bscan->scan.plan.plan_rows = ipath->rows;
+       /* this must agree with cost_bitmap_qual in costsize.c */
+       bscan->scan.plan.startup_cost = 0.0;
+       bscan->scan.plan.total_cost = ipath->indextotalcost;
+       bscan->scan.plan.plan_rows =
+           clamp_row_est(ipath->indexselectivity * ipath->path.parent->tuples);
+       bscan->scan.plan.plan_width = 0; /* meaningless */
        plan = (Plan *) bscan;
    }
    else
@@ -2068,8 +2070,9 @@ make_bitmap_and(List *bitmapplans)
    ListCell   *subnode;
 
    /*
-    * Compute cost as sum of subplan costs, plus 10x cpu_operator_cost
+    * Compute cost as sum of subplan costs, plus 100x cpu_operator_cost
     * (a pretty arbitrary amount, agreed) for each tbm_intersect needed.
+    * This must agree with cost_bitmap_qual in costsize.c.
     */
    plan->startup_cost = 0;
    plan->total_cost = 0;
@@ -2085,7 +2088,10 @@ make_bitmap_and(List *bitmapplans)
            plan->plan_rows = subplan->plan_rows;
        }
        else
+       {
+           plan->total_cost += cpu_operator_cost * 100.0;
            plan->plan_rows = Min(plan->plan_rows, subplan->plan_rows);
+       }
        plan->total_cost += subplan->total_cost;
    }
 
@@ -2106,10 +2112,12 @@ make_bitmap_or(List *bitmapplans)
    ListCell   *subnode;
 
    /*
-    * Compute cost as sum of subplan costs, plus 10x cpu_operator_cost
+    * Compute cost as sum of subplan costs, plus 100x cpu_operator_cost
     * (a pretty arbitrary amount, agreed) for each tbm_union needed.
     * We assume that tbm_union can be optimized away for BitmapIndexScan
     * subplans.
+    *
+    * This must agree with cost_bitmap_qual in costsize.c.
     */
    plan->startup_cost = 0;
    plan->total_cost = 0;
@@ -2122,7 +2130,7 @@ make_bitmap_or(List *bitmapplans)
        if (subnode == list_head(bitmapplans))  /* first node? */
            plan->startup_cost = subplan->startup_cost;
        else if (!IsA(subplan, BitmapIndexScan))
-           plan->total_cost += cpu_operator_cost * 10;
+           plan->total_cost += cpu_operator_cost * 100.0;
        plan->total_cost += subplan->total_cost;
        plan->plan_rows += subplan->plan_rows; /* ignore overlap */
    }
index ec0fc8a29ab023e5d25f32e04c2913fe0cfa9a1f..823486e2f3b83390b47e246a7bef733c37341bc9 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.116 2005/04/19 22:35:17 tgl Exp $
+ *   $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.117 2005/04/21 02:28:01 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -466,7 +466,7 @@ create_index_path(Query *root,
     */
    pathnode->rows = index->rel->rows;
 
-   cost_index(&pathnode->path, root, index, indexquals, false);
+   cost_index(pathnode, root, index, indexquals, false);
 
    return pathnode;
 }
index 2e4e1834fe639ca9616f58c1aef87aa7faab2ced..4ae0ae3a2c058c7362e42b31ce1106ebefe0ca23 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.105 2005/04/19 22:35:17 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.106 2005/04/21 02:28:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -374,6 +374,10 @@ typedef struct Path
  * NoMovementScanDirection for an indexscan, but the planner wants to
  * distinguish ordered from unordered indexes for building pathkeys.)
  *
+ * 'indextotalcost' and 'indexselectivity' are saved in the IndexPath so that
+ * we need not recompute them when considering using the same index in a
+ * bitmap index/heap scan (see BitmapHeapPath).
+ *
  * 'rows' is the estimated result tuple count for the indexscan.  This
  * is the same as path.parent->rows for a simple indexscan, but it is
  * different for a nestloop inner scan, because the additional indexquals
@@ -389,6 +393,8 @@ typedef struct IndexPath
    List       *indexquals;
    bool        isjoininner;
    ScanDirection indexscandir;
+   Cost        indextotalcost;
+   Selectivity indexselectivity;
    double      rows;           /* estimated number of result tuples */
 } IndexPath;
 
@@ -401,9 +407,12 @@ typedef struct IndexPath
  *
  * The individual indexscans are represented by IndexPath nodes, and any
  * logic on top of them is represented by regular AND and OR expressions.
- * Notice that we can use the same IndexPath node both to represent an
- * ordered index scan, and as the child of a BitmapHeapPath that represents
- * scanning the same index in an unordered way.
+ * Notice that we can use the same IndexPath node both to represent a regular
+ * IndexScan plan, and as the child of a BitmapHeapPath that represents
+ * scanning the same index using a BitmapIndexScan.  The startup_cost and
+ * total_cost figures of an IndexPath always represent the costs to use it
+ * as a regular IndexScan.  The costs of a BitmapIndexScan can be computed
+ * using the IndexPath's indextotalcost and indexselectivity.
  *
  * BitmapHeapPaths can be nestloop inner indexscans.  The isjoininner and
  * rows fields serve the same purpose as for plain IndexPaths.
index 8b1445dadf1d7d3fcb2885fcd64cefeffadf0ccf..1f7ea96ee04c4d05707af11d3b50af0ee93e2026 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.64 2005/04/19 22:35:18 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.65 2005/04/21 02:28:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -51,7 +51,7 @@ extern bool enable_hashjoin;
 
 extern double clamp_row_est(double nrows);
 extern void cost_seqscan(Path *path, Query *root, RelOptInfo *baserel);
-extern void cost_index(Path *path, Query *root, IndexOptInfo *index,
+extern void cost_index(IndexPath *path, Query *root, IndexOptInfo *index,
           List *indexQuals, bool is_injoin);
 extern void cost_bitmap_scan(Path *path, Query *root, RelOptInfo *baserel,
                             Node *bitmapqual, bool is_injoin);