Account for optimized MinMax aggregates during SS_finalize_plan.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 13 Jul 2023 20:50:13 +0000 (16:50 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 14 Jul 2023 15:41:20 +0000 (11:41 -0400)
We are capable of optimizing MIN() and MAX() aggregates on indexed
columns into subqueries that exploit the index, rather than the normal
thing of scanning the whole table.  When we do this, we replace the
Aggref node(s) with Params referencing subquery outputs.  Such Params
really ought to be included in the per-plan-node extParam/allParam
sets computed by SS_finalize_plan.  However, we've never done so
up to now because of an ancient implementation choice to perform
that substitution during set_plan_references, which runs after
SS_finalize_plan, so that SS_finalize_plan never sees these Params.

This seems like clearly a bug, yet there have been no field reports
of problems that could trace to it.  This may be because the types
of Plan nodes that could contain Aggrefs do not have any of the
rescan optimizations that are controlled by extParam/allParam.
Nonetheless it seems certain to bite us someday, so let's fix it
in a self-contained patch that can be back-patched if we find a
case in which there's a live bug pre-v17.

The cleanest fix would be to perform a separate tree walk to do
these substitutions before SS_finalize_plan runs.  That seems
unattractive, first because a whole-tree mutation pass is expensive,
and second because we lack infrastructure for visiting expression
subtrees in a Plan tree, so that we'd need a new function knowing
as much as SS_finalize_plan knows about that.  I also considered
swapping the order of SS_finalize_plan and set_plan_references,
but that fell foul of various assumptions that seem tricky to fix.
So the approach adopted here is to teach SS_finalize_plan itself
to check for such Aggrefs.  I refactored things a bit in setrefs.c
to avoid having three copies of the code that does that.

Given the lack of any currently-known bug, no test case here.

Discussion: https://postgr.es/m/2391880.1689025003@sss.pgh.pa.us

src/backend/optimizer/plan/setrefs.c
src/backend/optimizer/plan/subselect.c
src/include/optimizer/planmain.h

index c63758cb2b728769a1f161e53c6aa249f13aae41..16e5537f7f9f9262e1d901eeec3491d845310a46 100644 (file)
@@ -2181,22 +2181,14 @@ fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context)
        if (IsA(node, Aggref))
        {
                Aggref     *aggref = (Aggref *) node;
+               Param      *aggparam;
 
                /* See if the Aggref should be replaced by a Param */
-               if (context->root->minmax_aggs != NIL &&
-                       list_length(aggref->args) == 1)
+               aggparam = find_minmax_agg_replacement_param(context->root, aggref);
+               if (aggparam != NULL)
                {
-                       TargetEntry *curTarget = (TargetEntry *) linitial(aggref->args);
-                       ListCell   *lc;
-
-                       foreach(lc, context->root->minmax_aggs)
-                       {
-                               MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
-
-                               if (mminfo->aggfnoid == aggref->aggfnoid &&
-                                       equal(mminfo->target, curTarget->expr))
-                                       return (Node *) copyObject(mminfo->param);
-                       }
+                       /* Make a copy of the Param for paranoia's sake */
+                       return (Node *) copyObject(aggparam);
                }
                /* If no match, just fall through to process it normally */
        }
@@ -3225,22 +3217,14 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context)
        if (IsA(node, Aggref))
        {
                Aggref     *aggref = (Aggref *) node;
+               Param      *aggparam;
 
                /* See if the Aggref should be replaced by a Param */
-               if (context->root->minmax_aggs != NIL &&
-                       list_length(aggref->args) == 1)
+               aggparam = find_minmax_agg_replacement_param(context->root, aggref);
+               if (aggparam != NULL)
                {
-                       TargetEntry *curTarget = (TargetEntry *) linitial(aggref->args);
-                       ListCell   *lc;
-
-                       foreach(lc, context->root->minmax_aggs)
-                       {
-                               MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
-
-                               if (mminfo->aggfnoid == aggref->aggfnoid &&
-                                       equal(mminfo->target, curTarget->expr))
-                                       return (Node *) copyObject(mminfo->param);
-                       }
+                       /* Make a copy of the Param for paranoia's sake */
+                       return (Node *) copyObject(aggparam);
                }
                /* If no match, just fall through to process it normally */
        }
@@ -3395,6 +3379,38 @@ set_windowagg_runcondition_references(PlannerInfo *root,
        return newlist;
 }
 
+/*
+ * find_minmax_agg_replacement_param
+ *             If the given Aggref is one that we are optimizing into a subquery
+ *             (cf. planagg.c), then return the Param that should replace it.
+ *             Else return NULL.
+ *
+ * This is exported so that SS_finalize_plan can use it before setrefs.c runs.
+ * Note that it will not find anything until we have built a Plan from a
+ * MinMaxAggPath, as root->minmax_aggs will never be filled otherwise.
+ */
+Param *
+find_minmax_agg_replacement_param(PlannerInfo *root, Aggref *aggref)
+{
+       if (root->minmax_aggs != NIL &&
+               list_length(aggref->args) == 1)
+       {
+               TargetEntry *curTarget = (TargetEntry *) linitial(aggref->args);
+               ListCell   *lc;
+
+               foreach(lc, root->minmax_aggs)
+               {
+                       MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
+
+                       if (mminfo->aggfnoid == aggref->aggfnoid &&
+                               equal(mminfo->target, curTarget->expr))
+                               return mminfo->param;
+               }
+       }
+       return NULL;
+}
+
+
 /*****************************************************************************
  *                                     QUERY DEPENDENCY MANAGEMENT
  *****************************************************************************/
index da2d8abe015aec34c2110fd0459bd6956a4a1318..250ba8edcb80ba8457d8ee5c231f6b294cced63e 100644 (file)
@@ -2835,8 +2835,8 @@ finalize_plan(PlannerInfo *root, Plan *plan,
 }
 
 /*
- * finalize_primnode: add IDs of all PARAM_EXEC params appearing in the given
- * expression tree to the result set.
+ * finalize_primnode: add IDs of all PARAM_EXEC params that appear (or will
+ * appear) in the given expression tree to the result set.
  */
 static bool
 finalize_primnode(Node *node, finalize_primnode_context *context)
@@ -2853,7 +2853,26 @@ finalize_primnode(Node *node, finalize_primnode_context *context)
                }
                return false;                   /* no more to do here */
        }
-       if (IsA(node, SubPlan))
+       else if (IsA(node, Aggref))
+       {
+               /*
+                * Check to see if the aggregate will be replaced by a Param
+                * referencing a subquery output during setrefs.c.  If so, we must
+                * account for that Param here.  (For various reasons, it's not
+                * convenient to perform that substitution earlier than setrefs.c, nor
+                * to perform this processing after setrefs.c.  Thus we need a wart
+                * here.)
+                */
+               Aggref     *aggref = (Aggref *) node;
+               Param      *aggparam;
+
+               aggparam = find_minmax_agg_replacement_param(context->root, aggref);
+               if (aggparam != NULL)
+                       context->paramids = bms_add_member(context->paramids,
+                                                                                          aggparam->paramid);
+               /* Fall through to examine the agg's arguments */
+       }
+       else if (IsA(node, SubPlan))
        {
                SubPlan    *subplan = (SubPlan *) node;
                Plan       *plan = planner_subplan_get_plan(context->root, subplan);
index 5fc900737d8ff946f9290df2670bba1d0db27718..31c188176b7bd975731886b5f943f07f14c28925 100644 (file)
@@ -110,6 +110,8 @@ extern bool innerrel_is_unique(PlannerInfo *root,
  */
 extern Plan *set_plan_references(PlannerInfo *root, Plan *plan);
 extern bool trivial_subqueryscan(SubqueryScan *plan);
+extern Param *find_minmax_agg_replacement_param(PlannerInfo *root,
+                                                                                               Aggref *aggref);
 extern void record_plan_function_dependency(PlannerInfo *root, Oid funcid);
 extern void record_plan_type_dependency(PlannerInfo *root, Oid typid);
 extern bool extract_query_dependencies_walker(Node *node, PlannerInfo *context);