First cut at making indexscan cost estimates depend on correlation

author Tom Lane <tgl@sss.pgh.pa.us>

Wed, 9 May 2001 23:13:37 +0000 (23:13 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Wed, 9 May 2001 23:13:37 +0000 (23:13 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Wed, 9 May 2001 23:13:37 +0000 (23:13 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Wed, 9 May 2001 23:13:37 +0000 (23:13 +0000)
diff --git a/doc/src/sgml/indexcost.sgml b/doc/src/sgml/indexcost.sgml

index 9c781f97fc8de18d1b9e88609acbe8b3b4ae4552..482a2e199e6370a4a19f7da96a9e9f1f3df07d37 100644 (file)
--- a/doc/src/sgml/indexcost.sgml
+++ b/doc/src/sgml/indexcost.sgml
@@ -1,5 +1,5 @@
  <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/Attic/indexcost.sgml,v 2.6 2000/12/22 21:51:57 petere Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/Attic/indexcost.sgml,v 2.7 2001/05/09 23:13:34 tgl Exp $
  -->
  
   <chapter id="indexcost">
@@ -57,7 +57,8 @@ amcostestimate (Query *root,
                  List *indexQuals,
                  Cost *indexStartupCost,
                  Cost *indexTotalCost,
-                Selectivity *indexSelectivity);
+                Selectivity *indexSelectivity,
+                double *indexCorrelation);
     </programlisting>
  
     The first four parameters are inputs:
@@ -103,7 +104,7 @@ amcostestimate (Query *root,
    </para>
  
    <para>
-   The last three parameters are pass-by-reference outputs:
+   The last four parameters are pass-by-reference outputs:
  
     <variablelist>
      <varlistentry>
@@ -132,6 +133,16 @@ amcostestimate (Query *root,
        </para>
       </listitem>
      </varlistentry>
+
+    <varlistentry>
+     <term>*indexCorrelation</term>
+     <listitem>
+      <para>
+       Set to correlation coefficient between index scan order and
+       underlying table's order
+      </para>
+     </listitem>
+    </varlistentry>
     </variablelist>
    </para>
  
@@ -172,6 +183,13 @@ amcostestimate (Query *root,
     tuples that actually pass the given qual conditions.
    </para>
  
+  <para>
+   The indexCorrelation should be set to the correlation (ranging between
+   -1.0 and 1.0) between the index order and the table order.  This is used
+   to adjust the estimate for the cost of fetching tuples from the main
+   table.
+  </para>
+
    <procedure>
     <title>Cost Estimation</title>
     <para>
@@ -224,6 +242,14 @@ amcostestimate (Query *root,
       </programlisting>
      </para>
     </step>
+
+   <step>
+    <para>
+     Estimate the index correlation.  For a simple ordered index on a single
+     field, this can be retrieved from pg_statistic.  If the correlation
+     is not known, the conservative estimate is zero (no correlation).
+    </para>
+   </step>
    </procedure>
  
    <para>
@@ -237,8 +263,8 @@ amcostestimate (Query *root,
  
     <programlisting>
  prorettype = 0
-pronargs = 7
-proargtypes = 0 0 0 0 0 0 0
+pronargs = 8
+proargtypes = 0 0 0 0 0 0 0 0
     </programlisting>
  
     We use zero ("opaque") for all the arguments since none of them have types
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index 7dfe834b7796f53972527c824c4e40eb56b2c668..dddca240e9586b4a044513123833a838e3838176 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -31,17 +31,18 @@
   * result by interpolating between startup_cost and total_cost.  In detail:
   *     actual_cost = startup_cost +
   *         (total_cost - startup_cost) * tuples_to_fetch / path->parent->rows;
- * Note that a relation's rows count (and, by extension, a Plan's plan_rows)
- * are set without regard to any LIMIT, so that this equation works properly.
- * (Also, these routines guarantee not to set the rows count to zero, so there
- * will be no zero divide.)  The LIMIT is applied as a separate Plan node.
+ * Note that a base relation's rows count (and, by extension, plan_rows for
+ * plan nodes below the LIMIT node) are set without regard to any LIMIT, so
+ * that this equation works properly.  (Also, these routines guarantee not to
+ * set the rows count to zero, so there will be no zero divide.)  The LIMIT is
+ * applied as a top-level plan node.
   *
   *
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.72 2001/05/09 00:35:09 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.73 2001/05/09 23:13:34 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -205,12 +206,18 @@ cost_index(Path *path, Query *root,
  {
     Cost        startup_cost = 0;
     Cost        run_cost = 0;
-   Cost        cpu_per_tuple;
     Cost        indexStartupCost;
     Cost        indexTotalCost;
     Selectivity indexSelectivity;
+   double      indexCorrelation,
+               csquared;
+   Cost        min_IO_cost,
+               max_IO_cost;
+   Cost        cpu_per_tuple;
     double      tuples_fetched;
     double      pages_fetched;
+   double      T,
+               b;
  
     /* Should only be applied to base relations */
     Assert(IsA(baserel, RelOptInfo) &&IsA(index, IndexOptInfo));
@@ -224,38 +231,52 @@ cost_index(Path *path, Query *root,
      * Call index-access-method-specific code to estimate the processing
      * cost for scanning the index, as well as the selectivity of the
      * index (ie, the fraction of main-table tuples we will have to
-    * retrieve).
+    * retrieve) and its correlation to the main-table tuple order.
      */
-   OidFunctionCall7(index->amcostestimate,
+   OidFunctionCall8(index->amcostestimate,
                      PointerGetDatum(root),
                      PointerGetDatum(baserel),
                      PointerGetDatum(index),
                      PointerGetDatum(indexQuals),
                      PointerGetDatum(&indexStartupCost),
                      PointerGetDatum(&indexTotalCost),
-                    PointerGetDatum(&indexSelectivity));
+                    PointerGetDatum(&indexSelectivity),
+                    PointerGetDatum(&indexCorrelation));
  
     /* all costs for touching index itself included here */
     startup_cost += indexStartupCost;
     run_cost += indexTotalCost - indexStartupCost;
  
-   /*
+   /*----------
      * Estimate number of main-table tuples and pages fetched.
      *
-    * If the number of tuples is much smaller than the number of pages in
-    * the relation, each tuple will cost a separate nonsequential fetch.
-    * If it is comparable or larger, then probably we will be able to
-    * avoid some fetches.  We use a growth rate of log(#tuples/#pages +
-    * 1) --- probably totally bogus, but intuitively it gives the right
-    * shape of curve at least.
+    * When the index ordering is uncorrelated with the table ordering,
+    * we use an approximation proposed by Mackert and Lohman, "Index Scans
+    * Using a Finite LRU Buffer: A Validated I/O Model", ACM Transactions
+    * on Database Systems, Vol. 14, No. 3, September 1989, Pages 401-424.
+    * The Mackert and Lohman approximation is that the number of pages
+    * fetched is
+    *  PF =
+    *      min(2TNs/(2T+Ns), T)            when T <= b
+    *      2TNs/(2T+Ns)                    when T > b and Ns <= 2Tb/(2T-b)
+    *      b + (Ns - 2Tb/(2T-b))*(T-b)/T   when T > b and Ns > 2Tb/(2T-b)
+    * where
+    *      T = # pages in table
+    *      N = # tuples in table
+    *      s = selectivity = fraction of table to be scanned
+    *      b = # buffer pages available (we include kernel space here)
      *
-    * XXX if the relation has recently been "clustered" using this index,
-    * then in fact the target tuples will be highly nonuniformly
-    * distributed, and we will be seriously overestimating the scan cost!
-    * Currently we have no way to know whether the relation has been
-    * clustered, nor how much it's been modified since the last
-    * clustering, so we ignore this effect.  Would be nice to do better
-    * someday.
+    * When the index ordering is exactly correlated with the table ordering
+    * (just after a CLUSTER, for example), the number of pages fetched should
+    * be just sT.  What's more, these will be sequential fetches, not the
+    * random fetches that occur in the uncorrelated case.  So, depending on
+    * the extent of correlation, we should estimate the actual I/O cost
+    * somewhere between s * T * 1.0 and PF * random_cost.  We currently
+    * interpolate linearly between these two endpoints based on the
+    * correlation squared (XXX is that appropriate?).
+    *
+    * In any case the number of tuples fetched is Ns.
+    *----------
      */
  
     tuples_fetched = indexSelectivity * baserel->tuples;
@@ -263,24 +284,56 @@ cost_index(Path *path, Query *root,
     if (tuples_fetched < 1.0)
         tuples_fetched = 1.0;
  
-   if (baserel->pages > 0)
-       pages_fetched = ceil(baserel->pages *
-                            log(tuples_fetched / baserel->pages + 1.0));
+   /* This part is the Mackert and Lohman formula */
+
+   T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
+   b = (effective_cache_size > 1) ? effective_cache_size : 1.0;
+
+   if (T <= b)
+   {
+       pages_fetched =
+           (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
+       if (pages_fetched > T)
+           pages_fetched = T;
+   }
     else
-       pages_fetched = tuples_fetched;
+   {
+       double  lim;
+
+       lim = (2.0 * T * b) / (2.0 * T - b);
+       if (tuples_fetched <= lim)
+       {
+           pages_fetched =
+               (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
+       }
+       else
+       {
+           pages_fetched =
+               b + (tuples_fetched - lim) * (T - b) / T;
+       }
+   }
  
     /*
-    * Now estimate one nonsequential access per page fetched, plus
-    * appropriate CPU costs per tuple.
+    * min_IO_cost corresponds to the perfectly correlated case (csquared=1),
+    * max_IO_cost to the perfectly uncorrelated case (csquared=0).  Note
+    * that we just charge random_page_cost per page in the uncorrelated
+    * case, rather than using cost_nonsequential_access, since we've already
+    * accounted for caching effects by using the Mackert model.
      */
+   min_IO_cost = ceil(indexSelectivity * T);
+   max_IO_cost = pages_fetched * random_page_cost;
  
-   /* disk costs for main table */
-   run_cost += pages_fetched * cost_nonsequential_access(baserel->pages);
+   /*
+    * Now interpolate based on estimated index order correlation
+    * to get total disk I/O cost for main table accesses.
+    */
+   csquared = indexCorrelation * indexCorrelation;
  
-   /* CPU costs */
-   cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost;
+   run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost);
  
     /*
+    * Estimate CPU costs per tuple.
+    *
      * Normally the indexquals will be removed from the list of
      * restriction clauses that we have to evaluate as qpquals, so we
      * should subtract their costs from baserestrictcost.  For a lossy
@@ -290,6 +343,8 @@ cost_index(Path *path, Query *root,
      * Rather than work out exactly how much to subtract, we don't
      * subtract anything in that case either.
      */
+   cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost;
+
     if (!index->lossy && !is_injoin)
         cpu_per_tuple -= cost_qual_eval(indexQuals);
  
diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c

index 30f02de5c72e2c1dfb6e7b88a7dcc8d98f5e19db..9b620c80f5aeaf12858aa9e325389ec2e1ff1e9c 100644 (file)
--- a/src/backend/optimizer/util/var.c
+++ b/src/backend/optimizer/util/var.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/var.c,v 1.31 2001/04/18 20:42:55 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/var.c,v 1.32 2001/05/09 23:13:35 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -28,8 +28,9 @@ typedef struct
  typedef struct
  {
     int         varno;
+   int         varattno;
     int         sublevels_up;
-} contain_whole_tuple_var_context;
+} contain_var_reference_context;
  
  typedef struct
  {
@@ -39,8 +40,8 @@ typedef struct
  
  static bool pull_varnos_walker(Node *node,
                    pull_varnos_context *context);
-static bool contain_whole_tuple_var_walker(Node *node,
-                  contain_whole_tuple_var_context *context);
+static bool contain_var_reference_walker(Node *node,
+                  contain_var_reference_context *context);
  static bool contain_var_clause_walker(Node *node, void *context);
  static bool pull_var_clause_walker(Node *node,
                        pull_var_clause_context *context);
@@ -129,10 +130,10 @@ pull_varnos_walker(Node *node, pull_varnos_context *context)
  
  
  /*
- *     contain_whole_tuple_var
+ *     contain_var_reference
   *
- *     Detect whether a parsetree contains any references to the whole
- *     tuple of a given rtable entry (ie, a Var with varattno = 0).
+ *     Detect whether a parsetree contains any references to a specified
+ *     attribute of a specified rtable entry.
   *
   * NOTE: this is used on not-yet-planned expressions.  It may therefore find
   * bare SubLinks, and if so it needs to recurse into them to look for uplevel
@@ -140,11 +141,12 @@ pull_varnos_walker(Node *node, pull_varnos_context *context)
   * SubPlan, we only need to look at the parameters passed to the subplan.
   */
  bool
-contain_whole_tuple_var(Node *node, int varno, int levelsup)
+contain_var_reference(Node *node, int varno, int varattno, int levelsup)
  {
-   contain_whole_tuple_var_context context;
+   contain_var_reference_context context;
  
     context.varno = varno;
+   context.varattno = varattno;
     context.sublevels_up = levelsup;
  
     /*
@@ -154,15 +156,15 @@ contain_whole_tuple_var(Node *node, int varno, int levelsup)
      */
     if (node && IsA(node, Query))
         return query_tree_walker((Query *) node,
-                                contain_whole_tuple_var_walker,
+                                contain_var_reference_walker,
                                  (void *) &context, true);
     else
-       return contain_whole_tuple_var_walker(node, &context);
+       return contain_var_reference_walker(node, &context);
  }
  
  static bool
-contain_whole_tuple_var_walker(Node *node,
-                              contain_whole_tuple_var_context *context)
+contain_var_reference_walker(Node *node,
+                            contain_var_reference_context *context)
  {
     if (node == NULL)
         return false;
@@ -171,8 +173,8 @@ contain_whole_tuple_var_walker(Node *node,
         Var        *var = (Var *) node;
  
         if (var->varno == context->varno &&
-           var->varlevelsup == context->sublevels_up &&
-           var->varattno == InvalidAttrNumber)
+           var->varattno == context->varattno &&
+           var->varlevelsup == context->sublevels_up)
             return true;
         return false;
     }
@@ -187,11 +189,11 @@ contain_whole_tuple_var_walker(Node *node,
          */
         Expr       *expr = (Expr *) node;
  
-       if (contain_whole_tuple_var_walker((Node *) ((SubPlan *) expr->oper)->sublink->oper,
-                                          context))
+       if (contain_var_reference_walker((Node *) ((SubPlan *) expr->oper)->sublink->oper,
+                                        context))
             return true;
-       if (contain_whole_tuple_var_walker((Node *) expr->args,
-                                          context))
+       if (contain_var_reference_walker((Node *) expr->args,
+                                        context))
             return true;
         return false;
     }
@@ -202,16 +204,29 @@ contain_whole_tuple_var_walker(Node *node,
  
         context->sublevels_up++;
         result = query_tree_walker((Query *) node,
-                                  contain_whole_tuple_var_walker,
+                                  contain_var_reference_walker,
                                    (void *) context, true);
         context->sublevels_up--;
         return result;
     }
-   return expression_tree_walker(node, contain_whole_tuple_var_walker,
+   return expression_tree_walker(node, contain_var_reference_walker,
                                   (void *) context);
  }
  
  
+/*
+ *     contain_whole_tuple_var
+ *
+ *     Detect whether a parsetree contains any references to the whole
+ *     tuple of a given rtable entry (ie, a Var with varattno = 0).
+ */
+bool
+contain_whole_tuple_var(Node *node, int varno, int levelsup)
+{
+   return contain_var_reference(node, varno, InvalidAttrNumber, levelsup);
+}
+
+
  /*
   * contain_var_clause
   *   Recursively scan a clause to discover whether it contains any Var nodes
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c

index 41ba82db7b574d6ba6d095a25092376d04702250..d7633dc47dd6a21040762a19772c2e7a98ff6ca6 100644 (file)
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.88 2001/05/07 00:43:23 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.89 2001/05/09 23:13:35 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -39,6 +39,7 @@
  #include "optimizer/cost.h"
  #include "parser/parse_func.h"
  #include "parser/parse_oper.h"
+#include "parser/parsetree.h"
  #include "utils/builtins.h"
  #include "utils/date.h"
  #include "utils/int8.h"
@@ -818,7 +819,6 @@ eqjoinsel(PG_FUNCTION_ARGS)
  {
  #ifdef NOT_USED                    /* see neqjoinsel() before removing me! */
     Oid         opid = PG_GETARG_OID(0);
-
  #endif
     Oid         relid1 = PG_GETARG_OID(1);
     AttrNumber  attno1 = PG_GETARG_INT16(2);
@@ -2230,16 +2230,14 @@ string_to_datum(const char *str, Oid datatype)
   *-------------------------------------------------------------------------
   */
  
-static Datum
-genericcostestimate(PG_FUNCTION_ARGS)
+static void
+genericcostestimate(Query *root, RelOptInfo *rel,
+                   IndexOptInfo *index, List *indexQuals,
+                   Cost *indexStartupCost,
+                   Cost *indexTotalCost,
+                   Selectivity *indexSelectivity,
+                   double *indexCorrelation)
  {
-   Query      *root = (Query *) PG_GETARG_POINTER(0);
-   RelOptInfo *rel = (RelOptInfo *) PG_GETARG_POINTER(1);
-   IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(2);
-   List       *indexQuals = (List *) PG_GETARG_POINTER(3);
-   Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
-   Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
-   Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
     double      numIndexTuples;
     double      numIndexPages;
  
@@ -2275,33 +2273,134 @@ genericcostestimate(PG_FUNCTION_ARGS)
     *indexTotalCost = numIndexPages +
         (cpu_index_tuple_cost + cost_qual_eval(indexQuals)) * numIndexTuples;
  
-   PG_RETURN_VOID();
+   /*
+    * Generic assumption about index correlation: there isn't any.
+    */
+   *indexCorrelation = 0.0;
  }
  
-/*
- * For first cut, just use generic function for all index types.
- */
  
  Datum
  btcostestimate(PG_FUNCTION_ARGS)
  {
-   return genericcostestimate(fcinfo);
+   Query      *root = (Query *) PG_GETARG_POINTER(0);
+   RelOptInfo *rel = (RelOptInfo *) PG_GETARG_POINTER(1);
+   IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(2);
+   List       *indexQuals = (List *) PG_GETARG_POINTER(3);
+   Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
+   Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
+   Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
+   double     *indexCorrelation = (double *) PG_GETARG_POINTER(7);
+
+   genericcostestimate(root, rel, index, indexQuals,
+                       indexStartupCost, indexTotalCost,
+                       indexSelectivity, indexCorrelation);
+
+   /*
+    * If it's a functional index, leave the default zero-correlation
+    * estimate in place.  If not, and if we can get an estimate for
+    * the first variable's ordering correlation C from pg_statistic,
+    * estimate the index correlation as C / number-of-columns.
+    * (The idea here is that multiple columns dilute the importance
+    * of the first column's ordering, but don't negate it entirely.)
+    */
+   if (index->indproc == InvalidOid)
+   {
+       Oid         relid;
+       HeapTuple   tuple;
+
+       relid = getrelid(lfirsti(rel->relids), root->rtable);
+       Assert(relid != InvalidOid);
+       tuple = SearchSysCache(STATRELATT,
+                              ObjectIdGetDatum(relid),
+                              Int16GetDatum(index->indexkeys[0]),
+                              0, 0);
+       if (HeapTupleIsValid(tuple))
+       {
+           Oid     typid;
+           int32   typmod;
+           float4 *numbers;
+           int     nnumbers;
+
+           get_atttypetypmod(relid, index->indexkeys[0],
+                             &typid, &typmod);
+           if (get_attstatsslot(tuple, typid, typmod,
+                                STATISTIC_KIND_CORRELATION,
+                                index->ordering[0],
+                                NULL, NULL, &numbers, &nnumbers))
+           {
+               double  varCorrelation;
+               int     nKeys;
+
+               Assert(nnumbers == 1);
+               varCorrelation = numbers[0];
+               for (nKeys = 1; index->indexkeys[nKeys] != 0; nKeys++)
+                   /*skip*/;
+
+               *indexCorrelation = varCorrelation / nKeys;
+
+               free_attstatsslot(typid, NULL, 0, numbers, nnumbers);
+           }
+           ReleaseSysCache(tuple);
+       }
+   }
+
+   PG_RETURN_VOID();
  }
  
  Datum
  rtcostestimate(PG_FUNCTION_ARGS)
  {
-   return genericcostestimate(fcinfo);
+   Query      *root = (Query *) PG_GETARG_POINTER(0);
+   RelOptInfo *rel = (RelOptInfo *) PG_GETARG_POINTER(1);
+   IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(2);
+   List       *indexQuals = (List *) PG_GETARG_POINTER(3);
+   Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
+   Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
+   Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
+   double     *indexCorrelation = (double *) PG_GETARG_POINTER(7);
+
+   genericcostestimate(root, rel, index, indexQuals,
+                       indexStartupCost, indexTotalCost,
+                       indexSelectivity, indexCorrelation);
+
+   PG_RETURN_VOID();
  }
  
  Datum
  hashcostestimate(PG_FUNCTION_ARGS)
  {
-   return genericcostestimate(fcinfo);
+   Query      *root = (Query *) PG_GETARG_POINTER(0);
+   RelOptInfo *rel = (RelOptInfo *) PG_GETARG_POINTER(1);
+   IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(2);
+   List       *indexQuals = (List *) PG_GETARG_POINTER(3);
+   Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
+   Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
+   Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
+   double     *indexCorrelation = (double *) PG_GETARG_POINTER(7);
+
+   genericcostestimate(root, rel, index, indexQuals,
+                       indexStartupCost, indexTotalCost,
+                       indexSelectivity, indexCorrelation);
+
+   PG_RETURN_VOID();
  }
  
  Datum
  gistcostestimate(PG_FUNCTION_ARGS)
  {
-   return genericcostestimate(fcinfo);
+   Query      *root = (Query *) PG_GETARG_POINTER(0);
+   RelOptInfo *rel = (RelOptInfo *) PG_GETARG_POINTER(1);
+   IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(2);
+   List       *indexQuals = (List *) PG_GETARG_POINTER(3);
+   Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
+   Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
+   Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
+   double     *indexCorrelation = (double *) PG_GETARG_POINTER(7);
+
+   genericcostestimate(root, rel, index, indexQuals,
+                       indexStartupCost, indexTotalCost,
+                       indexSelectivity, indexCorrelation);
+
+   PG_RETURN_VOID();
  }
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c

index ee15a940cc5b41e3448b78a814159fe17880e21e..573c21afd8d24e3882c997b3792a153ab6e2e51d 100644 (file)
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.54 2001/05/09 00:35:09 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.55 2001/05/09 23:13:35 tgl Exp $
   *
   * NOTES
   *   Eventually, the index information should go through here, too.
@@ -185,6 +185,36 @@ get_atttypmod(Oid relid, AttrNumber attnum)
         return -1;
  }
  
+/*
+ * get_atttypetypmod
+ *
+ *     A two-fer: given the relation id and the attribute number,
+ *     fetch both type OID and atttypmod in a single cache lookup.
+ *
+ * Unlike the otherwise-similar get_atttype/get_atttypmod, this routine
+ * raises an error if it can't obtain the information.
+ */
+void
+get_atttypetypmod(Oid relid, AttrNumber attnum,
+                 Oid *typid, int32 *typmod)
+{
+   HeapTuple   tp;
+   Form_pg_attribute att_tup;
+
+   tp = SearchSysCache(ATTNUM,
+                       ObjectIdGetDatum(relid),
+                       Int16GetDatum(attnum),
+                       0, 0);
+   if (!HeapTupleIsValid(tp))
+       elog(ERROR, "cache lookup failed for relation %u attribute %d",
+            relid, attnum);
+   att_tup = (Form_pg_attribute) GETSTRUCT(tp);
+
+   *typid = att_tup->atttypid;
+   *typmod = att_tup->atttypmod;
+   ReleaseSysCache(tp);
+}
+
  /*             ---------- INDEX CACHE ----------                        */
  
  /*     watch this space...
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h

index d03d472043f4ec55025cb824ba1e172486fac796..7d7acf96f73752d498fa018f8eec90b94aa24192 100644 (file)
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: pg_proc.h,v 1.184 2001/03/22 04:00:39 momjian Exp $
+ * $Id: pg_proc.h,v 1.185 2001/05/09 23:13:35 tgl Exp $
   *
   * NOTES
   *   The script catalog/genbki.sh reads this file and generates .bki
@@ -210,9 +210,9 @@ DESCR("not equal");
  DATA(insert OID =  89 (  version          PGUID 12 f t f t 0 f 25 "" 100 0 0 100 pgsql_version - ));
  DESCR("PostgreSQL version string");
  
-DATA(insert OID = 1265 (  rtcostestimate   PGUID 12 f t f t 7 f 0 "0 0 0 0 0 0 0" 100 0 0 100  rtcostestimate - ));
+DATA(insert OID = 1265 (  rtcostestimate   PGUID 12 f t f t 8 f 0 "0 0 0 0 0 0 0 0" 100 0 0 100  rtcostestimate - ));
  DESCR("r-tree cost estimator");
-DATA(insert OID = 1268 (  btcostestimate   PGUID 12 f t f t 7 f 0 "0 0 0 0 0 0 0" 100 0 0 100  btcostestimate - ));
+DATA(insert OID = 1268 (  btcostestimate   PGUID 12 f t f t 8 f 0 "0 0 0 0 0 0 0 0" 100 0 0 100  btcostestimate - ));
  DESCR("btree cost estimator");
  
  /* OIDS 100 - 199 */
@@ -789,7 +789,7 @@ DESCR("convert name to char()");
  DATA(insert OID =  409 (  name            PGUID 12 f t t t 1 f 19 "1042" 100 0 0 100   bpchar_name - ));
  DESCR("convert char() to name");
  
-DATA(insert OID =  438 (  hashcostestimate PGUID 12 f t f t 7 f 0 "0 0 0 0 0 0 0" 100 0 0 100  hashcostestimate - ));
+DATA(insert OID =  438 (  hashcostestimate PGUID 12 f t f t 8 f 0 "0 0 0 0 0 0 0 0" 100 0 0 100  hashcostestimate - ));
  DESCR("hash index cost estimator");
  
  DATA(insert OID = 440 (  hashgettuple     PGUID 12 f t f t 2 f 23 "0 0" 100 0 0 100  hashgettuple - ));
@@ -1014,7 +1014,7 @@ DESCR("larger of two");
  DATA(insert OID = 771 (  int2smaller      PGUID 12 f t t t 2 f 21 "21 21" 100 0 0 100  int2smaller - ));
  DESCR("smaller of two");
  
-DATA(insert OID = 772 (  gistcostestimate  PGUID 12 f t f t 7 f 0 "0 0 0 0 0 0 0" 100 0 0 100  gistcostestimate - ));
+DATA(insert OID = 772 (  gistcostestimate  PGUID 12 f t f t 8 f 0 "0 0 0 0 0 0 0 0" 100 0 0 100  gistcostestimate - ));
  DESCR("gist cost estimator");
  DATA(insert OID = 774 (  gistgettuple     PGUID 12 f t f t 2 f 23 "0 0" 100 0 0 100  gistgettuple - ));
  DESCR("gist(internal)");
diff --git a/src/include/optimizer/var.h b/src/include/optimizer/var.h

index 45048133eb03002bb378fa3c90f6b7b8a5e7f0e8..4cad677c7ce4efa36082dc5d907d2b531a632d0b 100644 (file)
--- a/src/include/optimizer/var.h
+++ b/src/include/optimizer/var.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: var.h,v 1.13 2001/04/18 20:42:55 tgl Exp $
+ * $Id: var.h,v 1.14 2001/05/09 23:13:36 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -17,6 +17,8 @@
  #include "nodes/primnodes.h"
  
  extern List *pull_varnos(Node *node);
+extern bool contain_var_reference(Node *node, int varno, int varattno,
+                                 int levelsup);
  extern bool contain_whole_tuple_var(Node *node, int varno, int levelsup);
  extern bool contain_var_clause(Node *node);
  extern List *pull_var_clause(Node *node, bool includeUpperVars);
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h

index 3f18a4aea63aeb0711ec4b7f5728ff0ea41967ea..719f68a873f3ec1cd8579f5887025992ae47a545 100644 (file)
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -6,7 +6,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: lsyscache.h,v 1.32 2001/05/09 00:35:09 tgl Exp $
+ * $Id: lsyscache.h,v 1.33 2001/05/09 23:13:37 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -21,6 +21,8 @@ extern AttrNumber get_attnum(Oid relid, char *attname);
  extern Oid get_atttype(Oid relid, AttrNumber attnum);
  extern bool get_attisset(Oid relid, char *attname);
  extern int32 get_atttypmod(Oid relid, AttrNumber attnum);
+extern void get_atttypetypmod(Oid relid, AttrNumber attnum,
+                             Oid *typid, int32 *typmod);
  extern RegProcedure get_opcode(Oid opno);
  extern char *get_opname(Oid opno);
  extern bool op_mergejoinable(Oid opno, Oid ltype, Oid rtype,
author	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 9 May 2001 23:13:37 +0000 (23:13 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 9 May 2001 23:13:37 +0000 (23:13 +0000)
doc/src/sgml/indexcost.sgml		patch \| blob \| blame \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| blame \| history
src/backend/optimizer/util/var.c		patch \| blob \| blame \| history
src/backend/utils/adt/selfuncs.c		patch \| blob \| blame \| history
src/backend/utils/cache/lsyscache.c		patch \| blob \| blame \| history
src/include/catalog/pg_proc.h		patch \| blob \| blame \| history
src/include/optimizer/var.h		patch \| blob \| blame \| history
src/include/utils/lsyscache.h		patch \| blob \| blame \| history