Marginal tweaks to make sure that roundoff error won't cause us to make
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 15 Feb 2003 21:39:58 +0000 (21:39 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 15 Feb 2003 21:39:58 +0000 (21:39 +0000)
a bad choice between sorted and hashed aggregation.

src/backend/optimizer/path/costsize.c
src/backend/optimizer/plan/planner.c

index 40621f97624378e70f12106f75e3eb469979fa74..54e47e242431891dee536e5fe4696ff163ce2260 100644 (file)
@@ -49,7 +49,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.105 2003/02/08 20:20:54 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.106 2003/02/15 21:39:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -601,6 +601,15 @@ cost_agg(Path *path, Query *root,
     *
     * We will produce a single output tuple if not grouping,
     * and a tuple per group otherwise.
+    *
+    * Note: in this cost model, AGG_SORTED and AGG_HASHED have exactly the
+    * same total CPU cost, but AGG_SORTED has lower startup cost.  If the
+    * input path is already sorted appropriately, AGG_SORTED should be
+    * preferred (since it has no risk of memory overflow).  This will happen
+    * as long as the computed total costs are indeed exactly equal --- but
+    * if there's roundoff error we might do the wrong thing.  So be sure
+    * that the computations below form the same intermediate values in the
+    * same order.
     */
    if (aggstrategy == AGG_PLAIN)
    {
@@ -614,15 +623,17 @@ cost_agg(Path *path, Query *root,
        /* Here we are able to deliver output on-the-fly */
        startup_cost = input_startup_cost;
        total_cost = input_total_cost;
-       total_cost += cpu_operator_cost * (input_tuples + numGroups) * numAggs;
+       /* calcs phrased this way to match HASHED case, see note above */
        total_cost += cpu_operator_cost * input_tuples * numGroupCols;
+       total_cost += cpu_operator_cost * input_tuples * numAggs;
+       total_cost += cpu_operator_cost * numGroups * numAggs;
    }
    else
    {
        /* must be AGG_HASHED */
        startup_cost = input_total_cost;
-       startup_cost += cpu_operator_cost * input_tuples * numAggs;
        startup_cost += cpu_operator_cost * input_tuples * numGroupCols;
+       startup_cost += cpu_operator_cost * input_tuples * numAggs;
        total_cost = startup_cost;
        total_cost += cpu_operator_cost * numGroups * numAggs;
    }
index 2b46b4b740102b229051a26c17071cb993422366..76862c769cb78eb9f84b6deb036f6714cbccd395 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.147 2003/02/15 20:12:40 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.148 2003/02/15 21:39:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1003,7 +1003,7 @@ grouping_planner(Query *parse, double tuple_fraction)
                        tuple_fraction /= dNumGroups;
 
                    if (compare_fractional_path_costs(&hashed_p, &sorted_p,
-                                                     tuple_fraction) <= 0)
+                                                     tuple_fraction) < 0)
                    {
                        /* Hashed is cheaper, so use it */
                        use_hashed_grouping = true;