Teach planner to account for HAVING quals in aggregation plan nodes.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 2 Nov 2017 15:24:12 +0000 (11:24 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 2 Nov 2017 15:24:12 +0000 (11:24 -0400)
For some reason, we have never accounted for either the evaluation cost
or the selectivity of filter conditions attached to Agg and Group nodes
(which, in practice, are always conditions from a HAVING clause).

Applying our regular selectivity logic to post-grouping conditions is a
bit bogus, but it's surely better than taking the selectivity as 1.0.
Perhaps someday the extended-statistics mechanism can be taught to provide
statistics that would help us in getting non-default estimates here.

Per a gripe from Benjamin Coutu.  This is surely a bug fix, but I'm
hesitant to back-patch because of the prospect of destabilizing existing
plan choices.  Given that it took us this long to notice the bug, it's
probably not hurting too many people in the field.

Discussion: https://postgr.es/m/20968.1509486337@sss.pgh.pa.us

src/backend/optimizer/path/costsize.c
src/backend/optimizer/prep/prepunion.c
src/backend/optimizer/util/pathnode.c
src/include/optimizer/cost.h

index ce32b8a4b902f958b85409bc71923496edf5f850..98fb16e85a0a7cf197a4d6146bf33d22868f890f 100644 (file)
@@ -1874,6 +1874,7 @@ void
 cost_agg(Path *path, PlannerInfo *root,
         AggStrategy aggstrategy, const AggClauseCosts *aggcosts,
         int numGroupCols, double numGroups,
+        List *quals,
         Cost input_startup_cost, Cost input_total_cost,
         double input_tuples)
 {
@@ -1955,6 +1956,26 @@ cost_agg(Path *path, PlannerInfo *root,
        output_tuples = numGroups;
    }
 
+   /*
+    * If there are quals (HAVING quals), account for their cost and
+    * selectivity.
+    */
+   if (quals)
+   {
+       QualCost    qual_cost;
+
+       cost_qual_eval(&qual_cost, quals, root);
+       startup_cost += qual_cost.startup;
+       total_cost += qual_cost.startup + output_tuples * qual_cost.per_tuple;
+
+       output_tuples = clamp_row_est(output_tuples *
+                                     clauselist_selectivity(root,
+                                                            quals,
+                                                            0,
+                                                            JOIN_INNER,
+                                                            NULL));
+   }
+
    path->rows = output_tuples;
    path->startup_cost = startup_cost;
    path->total_cost = total_cost;
@@ -2040,12 +2061,15 @@ cost_windowagg(Path *path, PlannerInfo *root,
 void
 cost_group(Path *path, PlannerInfo *root,
           int numGroupCols, double numGroups,
+          List *quals,
           Cost input_startup_cost, Cost input_total_cost,
           double input_tuples)
 {
+   double      output_tuples;
    Cost        startup_cost;
    Cost        total_cost;
 
+   output_tuples = numGroups;
    startup_cost = input_startup_cost;
    total_cost = input_total_cost;
 
@@ -2055,7 +2079,27 @@ cost_group(Path *path, PlannerInfo *root,
     */
    total_cost += cpu_operator_cost * input_tuples * numGroupCols;
 
-   path->rows = numGroups;
+   /*
+    * If there are quals (HAVING quals), account for their cost and
+    * selectivity.
+    */
+   if (quals)
+   {
+       QualCost    qual_cost;
+
+       cost_qual_eval(&qual_cost, quals, root);
+       startup_cost += qual_cost.startup;
+       total_cost += qual_cost.startup + output_tuples * qual_cost.per_tuple;
+
+       output_tuples = clamp_row_est(output_tuples *
+                                     clauselist_selectivity(root,
+                                                            quals,
+                                                            0,
+                                                            JOIN_INNER,
+                                                            NULL));
+   }
+
+   path->rows = output_tuples;
    path->startup_cost = startup_cost;
    path->total_cost = total_cost;
 }
index 1c84a2cb2807e172a2b1745d7a88ed3dfd538295..f620243ab440e8c3af0a7a2d16dde420010aaffb 100644 (file)
@@ -977,6 +977,7 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
     */
    cost_agg(&hashed_p, root, AGG_HASHED, NULL,
             numGroupCols, dNumGroups,
+            NIL,
             input_path->startup_cost, input_path->total_cost,
             input_path->rows);
 
@@ -991,6 +992,7 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
              input_path->rows, input_path->pathtarget->width,
              0.0, work_mem, -1.0);
    cost_group(&sorted_p, root, numGroupCols, dNumGroups,
+              NIL,
               sorted_p.startup_cost, sorted_p.total_cost,
               input_path->rows);
 
index 2d491eb0ba9c129b83b5d38ee0b1bfa9b276192c..36ec025b05ba5b894d3ac3149e5fa141fbc2f52d 100644 (file)
@@ -1374,6 +1374,11 @@ create_result_path(PlannerInfo *root, RelOptInfo *rel,
    pathnode->path.startup_cost = target->cost.startup;
    pathnode->path.total_cost = target->cost.startup +
        cpu_tuple_cost + target->cost.per_tuple;
+
+   /*
+    * Add cost of qual, if any --- but we ignore its selectivity, since our
+    * rowcount estimate should be 1 no matter what the qual is.
+    */
    if (resconstantqual)
    {
        QualCost    qual_cost;
@@ -1596,6 +1601,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
            cost_agg(&agg_path, root,
                     AGG_HASHED, NULL,
                     numCols, pathnode->path.rows,
+                    NIL,
                     subpath->startup_cost,
                     subpath->total_cost,
                     rel->rows);
@@ -2592,6 +2598,7 @@ create_group_path(PlannerInfo *root,
    cost_group(&pathnode->path, root,
               list_length(groupClause),
               numGroups,
+              qual,
               subpath->startup_cost, subpath->total_cost,
               subpath->rows);
 
@@ -2709,6 +2716,7 @@ create_agg_path(PlannerInfo *root,
    cost_agg(&pathnode->path, root,
             aggstrategy, aggcosts,
             list_length(groupClause), numGroups,
+            qual,
             subpath->startup_cost, subpath->total_cost,
             subpath->rows);
 
@@ -2817,6 +2825,7 @@ create_groupingsets_path(PlannerInfo *root,
                     agg_costs,
                     numGroupCols,
                     rollup->numGroups,
+                    having_qual,
                     subpath->startup_cost,
                     subpath->total_cost,
                     subpath->rows);
@@ -2840,6 +2849,7 @@ create_groupingsets_path(PlannerInfo *root,
                         agg_costs,
                         numGroupCols,
                         rollup->numGroups,
+                        having_qual,
                         0.0, 0.0,
                         subpath->rows);
                if (!rollup->is_hashed)
@@ -2863,6 +2873,7 @@ create_groupingsets_path(PlannerInfo *root,
                         agg_costs,
                         numGroupCols,
                         rollup->numGroups,
+                        having_qual,
                         sort_path.startup_cost,
                         sort_path.total_cost,
                         sort_path.rows);
@@ -2932,6 +2943,19 @@ create_minmaxagg_path(PlannerInfo *root,
    pathnode->path.total_cost = initplan_cost + target->cost.startup +
        target->cost.per_tuple + cpu_tuple_cost;
 
+   /*
+    * Add cost of qual, if any --- but we ignore its selectivity, since our
+    * rowcount estimate should be 1 no matter what the qual is.
+    */
+   if (quals)
+   {
+       QualCost    qual_cost;
+
+       cost_qual_eval(&qual_cost, quals, root);
+       pathnode->path.startup_cost += qual_cost.startup;
+       pathnode->path.total_cost += qual_cost.startup + qual_cost.per_tuple;
+   }
+
    return pathnode;
 }
 
@@ -3781,6 +3805,7 @@ reparameterize_pathlist_by_child(PlannerInfo *root,
    {
        Path       *path = reparameterize_path_by_child(root, lfirst(lc),
                                                        child_rel);
+
        if (path == NULL)
        {
            list_free(result);
index 306d923a22acf49051086fd3032c5bac1a59cc0f..6c2317df3977f322848add722cb8eceacd99959a 100644 (file)
@@ -116,6 +116,7 @@ extern void cost_material(Path *path,
 extern void cost_agg(Path *path, PlannerInfo *root,
         AggStrategy aggstrategy, const AggClauseCosts *aggcosts,
         int numGroupCols, double numGroups,
+        List *quals,
         Cost input_startup_cost, Cost input_total_cost,
         double input_tuples);
 extern void cost_windowagg(Path *path, PlannerInfo *root,
@@ -124,6 +125,7 @@ extern void cost_windowagg(Path *path, PlannerInfo *root,
               double input_tuples);
 extern void cost_group(Path *path, PlannerInfo *root,
           int numGroupCols, double numGroups,
+          List *quals,
           Cost input_startup_cost, Cost input_total_cost,
           double input_tuples);
 extern void initial_cost_nestloop(PlannerInfo *root,