Fix planner failure in some cases of sorting by an aggregate.
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 20 Apr 2021 15:32:02 +0000 (11:32 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 20 Apr 2021 15:32:02 +0000 (11:32 -0400)
An oversight introduced by the incremental-sort patches caused
"could not find pathkey item to sort" errors in some situations
where a sort key involves an aggregate or window function.

The basic problem here is that find_em_expr_usable_for_sorting_rel
isn't properly modeling what prepare_sort_from_pathkeys will do
later.  Rather than hoping we can keep those functions in sync,
let's refactor so that they actually share the code for
identifying a suitable sort expression.

With this refactoring, tlist.c's tlist_member_ignore_relabel
is unused.  I removed it in HEAD but left it in place in v13,
in case any extensions are using it.

Per report from Luc Vlaming.  Back-patch to v13 where the
problem arose.

James Coleman and Tom Lane

Discussion: https://postgr.es/m/91f3ec99-85a4-fa55-ea74-33f85a5c651f@swarm64.com

src/backend/optimizer/path/equivclass.c
src/backend/optimizer/plan/createplan.c
src/backend/optimizer/util/tlist.c
src/include/optimizer/paths.h
src/include/optimizer/tlist.h
src/test/regress/expected/incremental_sort.out
src/test/regress/sql/incremental_sort.sql

index 0188c1e9a1894b9689138cc82e1e33ee1a894db5..6e87fba2aa3234b444658f5be4dc2db5cf6a9135 100644 (file)
@@ -35,6 +35,7 @@
 static EquivalenceMember *add_eq_member(EquivalenceClass *ec,
                                                                                Expr *expr, Relids relids, Relids nullable_relids,
                                                                                bool is_child, Oid datatype);
+static bool is_exprlist_member(Expr *node, List *exprs);
 static void generate_base_implied_equalities_const(PlannerInfo *root,
                                                                                                   EquivalenceClass *ec);
 static void generate_base_implied_equalities_no_const(PlannerInfo *root,
@@ -769,6 +770,167 @@ get_eclass_for_sort_expr(PlannerInfo *root,
        return newec;
 }
 
+/*
+ * find_ec_member_matching_expr
+ *             Locate an EquivalenceClass member matching the given expr, if any;
+ *             return NULL if no match.
+ *
+ * "Matching" is defined as "equal after stripping RelabelTypes".
+ * This is used for identifying sort expressions, and we need to allow
+ * binary-compatible relabeling for some cases involving binary-compatible
+ * sort operators.
+ *
+ * Child EC members are ignored unless they belong to given 'relids'.
+ */
+EquivalenceMember *
+find_ec_member_matching_expr(EquivalenceClass *ec,
+                                                        Expr *expr,
+                                                        Relids relids)
+{
+       ListCell   *lc;
+
+       /* We ignore binary-compatible relabeling on both ends */
+       while (expr && IsA(expr, RelabelType))
+               expr = ((RelabelType *) expr)->arg;
+
+       foreach(lc, ec->ec_members)
+       {
+               EquivalenceMember *em = (EquivalenceMember *) lfirst(lc);
+               Expr       *emexpr;
+
+               /*
+                * We shouldn't be trying to sort by an equivalence class that
+                * contains a constant, so no need to consider such cases any further.
+                */
+               if (em->em_is_const)
+                       continue;
+
+               /*
+                * Ignore child members unless they belong to the requested rel.
+                */
+               if (em->em_is_child &&
+                       !bms_is_subset(em->em_relids, relids))
+                       continue;
+
+               /*
+                * Match if same expression (after stripping relabel).
+                */
+               emexpr = em->em_expr;
+               while (emexpr && IsA(emexpr, RelabelType))
+                       emexpr = ((RelabelType *) emexpr)->arg;
+
+               if (equal(emexpr, expr))
+                       return em;
+       }
+
+       return NULL;
+}
+
+/*
+ * find_computable_ec_member
+ *             Locate an EquivalenceClass member that can be computed from the
+ *             expressions appearing in "exprs"; return NULL if no match.
+ *
+ * "exprs" can be either a list of bare expression trees, or a list of
+ * TargetEntry nodes.  Either way, it should contain Vars and possibly
+ * Aggrefs and WindowFuncs, which are matched to the corresponding elements
+ * of the EquivalenceClass's expressions.
+ *
+ * Unlike find_ec_member_matching_expr, there's no special provision here
+ * for binary-compatible relabeling.  This is intentional: if we have to
+ * compute an expression in this way, setrefs.c is going to insist on exact
+ * matches of Vars to the source tlist.
+ *
+ * Child EC members are ignored unless they belong to given 'relids'.
+ * Also, non-parallel-safe expressions are ignored if 'require_parallel_safe'.
+ *
+ * Note: some callers pass root == NULL for notational reasons.  This is OK
+ * when require_parallel_safe is false.
+ */
+EquivalenceMember *
+find_computable_ec_member(PlannerInfo *root,
+                                                 EquivalenceClass *ec,
+                                                 List *exprs,
+                                                 Relids relids,
+                                                 bool require_parallel_safe)
+{
+       ListCell   *lc;
+
+       foreach(lc, ec->ec_members)
+       {
+               EquivalenceMember *em = (EquivalenceMember *) lfirst(lc);
+               List       *exprvars;
+               ListCell   *lc2;
+
+               /*
+                * We shouldn't be trying to sort by an equivalence class that
+                * contains a constant, so no need to consider such cases any further.
+                */
+               if (em->em_is_const)
+                       continue;
+
+               /*
+                * Ignore child members unless they belong to the requested rel.
+                */
+               if (em->em_is_child &&
+                       !bms_is_subset(em->em_relids, relids))
+                       continue;
+
+               /*
+                * Match if all Vars and quasi-Vars are available in "exprs".
+                */
+               exprvars = pull_var_clause((Node *) em->em_expr,
+                                                                  PVC_INCLUDE_AGGREGATES |
+                                                                  PVC_INCLUDE_WINDOWFUNCS |
+                                                                  PVC_INCLUDE_PLACEHOLDERS);
+               foreach(lc2, exprvars)
+               {
+                       if (!is_exprlist_member(lfirst(lc2), exprs))
+                               break;
+               }
+               list_free(exprvars);
+               if (lc2)
+                       continue;                       /* we hit a non-available Var */
+
+               /*
+                * If requested, reject expressions that are not parallel-safe.  We
+                * check this last because it's a rather expensive test.
+                */
+               if (require_parallel_safe &&
+                       !is_parallel_safe(root, (Node *) em->em_expr))
+                       continue;
+
+               return em;                              /* found usable expression */
+       }
+
+       return NULL;
+}
+
+/*
+ * is_exprlist_member
+ *       Subroutine for find_computable_ec_member: is "node" in "exprs"?
+ *
+ * Per the requirements of that function, "exprs" might or might not have
+ * TargetEntry superstructure.
+ */
+static bool
+is_exprlist_member(Expr *node, List *exprs)
+{
+       ListCell   *lc;
+
+       foreach(lc, exprs)
+       {
+               Expr       *expr = (Expr *) lfirst(lc);
+
+               if (expr && IsA(expr, TargetEntry))
+                       expr = ((TargetEntry *) expr)->expr;
+
+               if (equal(node, expr))
+                       return true;
+       }
+       return false;
+}
+
 /*
  * Find an equivalence class member expression, all of whose Vars, come from
  * the indicated relation.
@@ -799,71 +961,78 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
 }
 
 /*
- * Find an equivalence class member expression that can be safely used to build
- * a sort node using the provided relation. The rules are a subset of those
- * applied in prepare_sort_from_pathkeys since that function deals with sorts
- * that must be delayed until the last stages of query execution, while here
- * we only care about proactive sorts.
+ * Find an equivalence class member expression that can be used to build
+ * a sort node using the provided relation; return NULL if no candidate.
+ *
+ * To succeed, we must find an EC member that prepare_sort_from_pathkeys knows
+ * how to sort on, given the rel's reltarget as input.  There are also a few
+ * additional constraints based on the fact that the desired sort will be done
+ * within the scan/join part of the plan.  Also, non-parallel-safe expressions
+ * are ignored if 'require_parallel_safe'.
  */
 Expr *
 find_em_expr_usable_for_sorting_rel(PlannerInfo *root, EquivalenceClass *ec,
                                                                        RelOptInfo *rel, bool require_parallel_safe)
 {
-       ListCell   *lc_em;
+       PathTarget *target = rel->reltarget;
+       EquivalenceMember *em;
+       ListCell   *lc;
 
        /*
-        * If there is more than one equivalence member matching these
-        * requirements we'll be content to choose any one of them.
+        * Reject volatile ECs immediately; such sorts must always be postponed.
         */
-       foreach(lc_em, ec->ec_members)
-       {
-               EquivalenceMember *em = lfirst(lc_em);
-               Expr       *em_expr = em->em_expr;
+       if (ec->ec_has_volatile)
+               return NULL;
 
-               /*
-                * We shouldn't be trying to sort by an equivalence class that
-                * contains a constant, so no need to consider such cases any further.
-                */
-               if (em->em_is_const)
-                       continue;
+       /*
+        * Try to find an EM directly matching some reltarget member.
+        */
+       foreach(lc, target->exprs)
+       {
+               Expr       *targetexpr = (Expr *) lfirst(lc);
 
-               /*
-                * Any Vars in the equivalence class member need to come from this
-                * relation. This is a superset of prepare_sort_from_pathkeys ignoring
-                * child members unless they belong to the rel being sorted.
-                */
-               if (!bms_is_subset(em->em_relids, rel->relids))
+               em = find_ec_member_matching_expr(ec, targetexpr, rel->relids);
+               if (!em)
                        continue;
 
                /*
-                * If requested, reject expressions that are not parallel-safe.
+                * Reject expressions involving set-returning functions, as those
+                * can't be computed early either.  (Note: this test and the following
+                * one are effectively checking properties of targetexpr, so there's
+                * no point in asking whether some other EC member would be better.)
                 */
-               if (require_parallel_safe && !is_parallel_safe(root, (Node *) em_expr))
+               if (IS_SRF_CALL((Node *) em->em_expr))
                        continue;
 
                /*
-                * Disallow SRFs so that all of them can be evaluated at the correct
-                * time as determined by make_sort_input_target.
+                * If requested, reject expressions that are not parallel-safe.  We
+                * check this last because it's a rather expensive test.
                 */
-               if (IS_SRF_CALL((Node *) em_expr))
+               if (require_parallel_safe &&
+                       !is_parallel_safe(root, (Node *) em->em_expr))
                        continue;
 
-               /*
-                * As long as the expression isn't volatile then
-                * prepare_sort_from_pathkeys is able to generate a new target entry,
-                * so there's no need to verify that one already exists.
-                *
-                * While prepare_sort_from_pathkeys has to be concerned about matching
-                * up a volatile expression to the proper tlist entry, it doesn't seem
-                * valuable here to expend the work trying to find a match in the
-                * target's exprs since such a sort will have to be postponed anyway.
-                */
-               if (!ec->ec_has_volatile)
-                       return em->em_expr;
+               return em->em_expr;
        }
 
-       /* We didn't find any suitable equivalence class expression */
-       return NULL;
+       /*
+        * Try to find a expression computable from the reltarget.
+        */
+       em = find_computable_ec_member(root, ec, target->exprs, rel->relids,
+                                                                  require_parallel_safe);
+       if (!em)
+               return NULL;
+
+       /*
+        * Reject expressions involving set-returning functions, as those can't be
+        * computed early either.  (There's no point in looking for another EC
+        * member in this case; since SRFs can't appear in WHERE, they cannot
+        * belong to multi-member ECs.)
+        */
+       if (IS_SRF_CALL((Node *) em->em_expr))
+               return NULL;
+
+       return em->em_expr;
 }
 
 /*
index 22f10fa339b837462fcfe069061c7afbafa8fdff..a9aff248314709f95eabd255c5c24b66d3a6743f 100644 (file)
@@ -269,9 +269,6 @@ static Plan *prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
                                                                                Oid **p_sortOperators,
                                                                                Oid **p_collations,
                                                                                bool **p_nullsFirst);
-static EquivalenceMember *find_ec_member_for_tle(EquivalenceClass *ec,
-                                                                                                TargetEntry *tle,
-                                                                                                Relids relids);
 static Sort *make_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
                                                                         Relids relids);
 static IncrementalSort *make_incrementalsort_from_pathkeys(Plan *lefttree,
@@ -2110,7 +2107,7 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags)
                                                                  flags | CP_SMALL_TLIST);
 
        /*
-        * make_sort_from_pathkeys() indirectly calls find_ec_member_for_tle(),
+        * make_sort_from_pathkeys indirectly calls find_ec_member_matching_expr,
         * which will ignore any child EC members that don't belong to the given
         * relids. Thus, if this sort path is based on a child relation, we must
         * pass its relids.
@@ -6017,9 +6014,6 @@ make_incrementalsort(Plan *lefttree, int numCols, int nPresortedCols,
  *
  * Returns the node which is to be the input to the Sort (either lefttree,
  * or a Result stacked atop lefttree).
- *
- * Note: Restrictions on what expressions are safely sortable may also need to
- * be added to find_em_expr_usable_for_sorting_rel.
  */
 static Plan *
 prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
@@ -6089,7 +6083,7 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
                        tle = get_tle_by_resno(tlist, reqColIdx[numsortkeys]);
                        if (tle)
                        {
-                               em = find_ec_member_for_tle(ec, tle, relids);
+                               em = find_ec_member_matching_expr(ec, tle->expr, relids);
                                if (em)
                                {
                                        /* found expr at right place in tlist */
@@ -6120,7 +6114,7 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
                        foreach(j, tlist)
                        {
                                tle = (TargetEntry *) lfirst(j);
-                               em = find_ec_member_for_tle(ec, tle, relids);
+                               em = find_ec_member_matching_expr(ec, tle->expr, relids);
                                if (em)
                                {
                                        /* found expr already in tlist */
@@ -6134,56 +6128,12 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
                if (!tle)
                {
                        /*
-                        * No matching tlist item; look for a computable expression. Note
-                        * that we treat Aggrefs as if they were variables; this is
-                        * necessary when attempting to sort the output from an Agg node
-                        * for use in a WindowFunc (since grouping_planner will have
-                        * treated the Aggrefs as variables, too).  Likewise, if we find a
-                        * WindowFunc in a sort expression, treat it as a variable.
+                        * No matching tlist item; look for a computable expression.
                         */
-                       Expr       *sortexpr = NULL;
-
-                       foreach(j, ec->ec_members)
-                       {
-                               EquivalenceMember *em = (EquivalenceMember *) lfirst(j);
-                               List       *exprvars;
-                               ListCell   *k;
-
-                               /*
-                                * We shouldn't be trying to sort by an equivalence class that
-                                * contains a constant, so no need to consider such cases any
-                                * further.
-                                */
-                               if (em->em_is_const)
-                                       continue;
-
-                               /*
-                                * Ignore child members unless they belong to the rel being
-                                * sorted.
-                                */
-                               if (em->em_is_child &&
-                                       !bms_is_subset(em->em_relids, relids))
-                                       continue;
-
-                               sortexpr = em->em_expr;
-                               exprvars = pull_var_clause((Node *) sortexpr,
-                                                                                  PVC_INCLUDE_AGGREGATES |
-                                                                                  PVC_INCLUDE_WINDOWFUNCS |
-                                                                                  PVC_INCLUDE_PLACEHOLDERS);
-                               foreach(k, exprvars)
-                               {
-                                       if (!tlist_member_ignore_relabel(lfirst(k), tlist))
-                                               break;
-                               }
-                               list_free(exprvars);
-                               if (!k)
-                               {
-                                       pk_datatype = em->em_datatype;
-                                       break;          /* found usable expression */
-                               }
-                       }
-                       if (!j)
+                       em = find_computable_ec_member(NULL, ec, tlist, relids, false);
+                       if (!em)
                                elog(ERROR, "could not find pathkey item to sort");
+                       pk_datatype = em->em_datatype;
 
                        /*
                         * Do we need to insert a Result node?
@@ -6203,7 +6153,7 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
                        /*
                         * Add resjunk entry to input's tlist
                         */
-                       tle = makeTargetEntry(sortexpr,
+                       tle = makeTargetEntry(copyObject(em->em_expr),
                                                                  list_length(tlist) + 1,
                                                                  NULL,
                                                                  true);
@@ -6242,56 +6192,6 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys,
        return lefttree;
 }
 
-/*
- * find_ec_member_for_tle
- *             Locate an EquivalenceClass member matching the given TLE, if any
- *
- * Child EC members are ignored unless they belong to given 'relids'.
- */
-static EquivalenceMember *
-find_ec_member_for_tle(EquivalenceClass *ec,
-                                          TargetEntry *tle,
-                                          Relids relids)
-{
-       Expr       *tlexpr;
-       ListCell   *lc;
-
-       /* We ignore binary-compatible relabeling on both ends */
-       tlexpr = tle->expr;
-       while (tlexpr && IsA(tlexpr, RelabelType))
-               tlexpr = ((RelabelType *) tlexpr)->arg;
-
-       foreach(lc, ec->ec_members)
-       {
-               EquivalenceMember *em = (EquivalenceMember *) lfirst(lc);
-               Expr       *emexpr;
-
-               /*
-                * We shouldn't be trying to sort by an equivalence class that
-                * contains a constant, so no need to consider such cases any further.
-                */
-               if (em->em_is_const)
-                       continue;
-
-               /*
-                * Ignore child members unless they belong to the rel being sorted.
-                */
-               if (em->em_is_child &&
-                       !bms_is_subset(em->em_relids, relids))
-                       continue;
-
-               /* Match if same expression (after stripping relabel) */
-               emexpr = em->em_expr;
-               while (emexpr && IsA(emexpr, RelabelType))
-                       emexpr = ((RelabelType *) emexpr)->arg;
-
-               if (equal(emexpr, tlexpr))
-                       return em;
-       }
-
-       return NULL;
-}
-
 /*
  * make_sort_from_pathkeys
  *       Create sort plan to sort according to given pathkeys
@@ -6753,7 +6653,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols)
                        foreach(j, plan->targetlist)
                        {
                                tle = (TargetEntry *) lfirst(j);
-                               em = find_ec_member_for_tle(ec, tle, NULL);
+                               em = find_ec_member_matching_expr(ec, tle->expr, NULL);
                                if (em)
                                {
                                        /* found expr already in tlist */
index 8a26288070d4878c97c628878b0fc29cc256def5..311579d05905192d242e06548f2332fe88d197a5 100644 (file)
@@ -79,34 +79,6 @@ tlist_member(Expr *node, List *targetlist)
        return NULL;
 }
 
-/*
- * tlist_member_ignore_relabel
- *       Same as above, except that we ignore top-level RelabelType nodes
- *       while checking for a match.  This is needed for some scenarios
- *       involving binary-compatible sort operations.
- */
-TargetEntry *
-tlist_member_ignore_relabel(Expr *node, List *targetlist)
-{
-       ListCell   *temp;
-
-       while (node && IsA(node, RelabelType))
-               node = ((RelabelType *) node)->arg;
-
-       foreach(temp, targetlist)
-       {
-               TargetEntry *tlentry = (TargetEntry *) lfirst(temp);
-               Expr       *tlexpr = tlentry->expr;
-
-               while (tlexpr && IsA(tlexpr, RelabelType))
-                       tlexpr = ((RelabelType *) tlexpr)->arg;
-
-               if (equal(node, tlexpr))
-                       return tlentry;
-       }
-       return NULL;
-}
-
 /*
  * tlist_member_match_var
  *       Same as above, except that we match the provided Var on the basis
index 035d3e1206984fa2d89db7d6afb33c2821c6143a..888e85ff5b3fa40d8b3b746dd64edcf91df026b9 100644 (file)
@@ -135,6 +135,14 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root,
                                                                                                  Index sortref,
                                                                                                  Relids rel,
                                                                                                  bool create_it);
+extern EquivalenceMember *find_ec_member_matching_expr(EquivalenceClass *ec,
+                                                                                                          Expr *expr,
+                                                                                                          Relids relids);
+extern EquivalenceMember *find_computable_ec_member(PlannerInfo *root,
+                                                                                                       EquivalenceClass *ec,
+                                                                                                       List *exprs,
+                                                                                                       Relids relids,
+                                                                                                       bool require_parallel_safe);
 extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
 extern Expr *find_em_expr_usable_for_sorting_rel(PlannerInfo *root,
                                                                                                 EquivalenceClass *ec,
index e081ef2d5e43970aee7cbe1adaaf4631bcb53dae..d62a09665a4e8520b5b1b5357c13422a64a464fe 100644 (file)
@@ -18,7 +18,6 @@
 
 
 extern TargetEntry *tlist_member(Expr *node, List *targetlist);
-extern TargetEntry *tlist_member_ignore_relabel(Expr *node, List *targetlist);
 
 extern List *add_to_flat_tlist(List *tlist, List *exprs);
 
index a417b566d95abc0e138bba459575f70fd1ea18d8..545e301e4827298308adc3e90f5ea63ec714cf13 100644 (file)
@@ -1579,6 +1579,32 @@ order by 1, 2;
    ->  Function Scan on generate_series
 (7 rows)
 
+-- Parallel sort with an aggregate that can be safely generated in parallel,
+-- but we can't sort by partial aggregate values.
+explain (costs off) select count(*)
+from tenk1 t1
+join tenk1 t2 on t1.unique1 = t2.unique2
+join tenk1 t3 on t2.unique1 = t3.unique1
+order by count(*);
+                                          QUERY PLAN                                           
+-----------------------------------------------------------------------------------------------
+ Sort
+   Sort Key: (count(*))
+   ->  Finalize Aggregate
+         ->  Gather
+               Workers Planned: 2
+               ->  Partial Aggregate
+                     ->  Parallel Hash Join
+                           Hash Cond: (t2.unique1 = t3.unique1)
+                           ->  Parallel Hash Join
+                                 Hash Cond: (t1.unique1 = t2.unique2)
+                                 ->  Parallel Index Only Scan using tenk1_unique1 on tenk1 t1
+                                 ->  Parallel Hash
+                                       ->  Parallel Index Scan using tenk1_unique2 on tenk1 t2
+                           ->  Parallel Hash
+                                 ->  Parallel Index Only Scan using tenk1_unique1 on tenk1 t3
+(15 rows)
+
 -- Parallel sort but with expression (correlated subquery) that
 -- is prohibited in parallel plans.
 explain (costs off) select distinct
index 81429164d45df62258f7a9ee3d2f14fa3ab3f211..d8768a6b54d7804b2e959e61d44a7ba5a3b05df8 100644 (file)
@@ -257,6 +257,13 @@ from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
 explain (costs off) select sub.unique1, md5(stringu1)
 from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
 order by 1, 2;
+-- Parallel sort with an aggregate that can be safely generated in parallel,
+-- but we can't sort by partial aggregate values.
+explain (costs off) select count(*)
+from tenk1 t1
+join tenk1 t2 on t1.unique1 = t2.unique2
+join tenk1 t3 on t2.unique1 = t3.unique1
+order by count(*);
 -- Parallel sort but with expression (correlated subquery) that
 -- is prohibited in parallel plans.
 explain (costs off) select distinct