Improve performance of ORDER BY / DISTINCT aggregates

author David Rowley <drowley@postgresql.org>

Tue, 2 Aug 2022 11:11:45 +0000 (23:11 +1200)

committer David Rowley <drowley@postgresql.org>

Tue, 2 Aug 2022 11:11:45 +0000 (23:11 +1200)
author David Rowley <drowley@postgresql.org>
Tue, 2 Aug 2022 11:11:45 +0000 (23:11 +1200)
committer David Rowley <drowley@postgresql.org>
Tue, 2 Aug 2022 11:11:45 +0000 (23:11 +1200)
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out

index ade797159dc7a213655e59f54bf22aebf0257dc4..52d0c3f3ed25cd828b862b6678c8613e0eb3da4d 100644 (file)
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -3295,15 +3295,18 @@ create operator class my_op_class for type int using btree family my_op_family a
  -- extension yet.
  explain (verbose, costs off)
  select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
-                                         QUERY PLAN                                         
---------------------------------------------------------------------------------------------
+                                            QUERY PLAN                                            
+--------------------------------------------------------------------------------------------------
   GroupAggregate
     Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2
     Group Key: ft2.c2
-   ->  Foreign Scan on public.ft2
-         Output: c1, c2
-         Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
-(6 rows)
+   ->  Sort
+         Output: c2, c1
+         Sort Key: ft2.c1 USING <^
+         ->  Foreign Scan on public.ft2
+               Output: c2, c1
+               Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
+(9 rows)
  
  -- This should not be pushed either.
  explain (verbose, costs off)
@@ -3329,6 +3332,7 @@ alter extension postgres_fdw add operator public.=^(int, int);
  alter extension postgres_fdw add operator public.>^(int, int);
  alter server loopback options (set extensions 'postgres_fdw');
  -- Now this will be pushed as sort operator is part of the extension.
+alter server loopback options (add fdw_tuple_cost '0.5');
  explain (verbose, costs off)
  select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
                                                                             QUERY PLAN                                                                           
@@ -3345,6 +3349,7 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6
   {6,16,26,36,46,56,66,76,86,96}
  (1 row)
  
+alter server loopback options (drop fdw_tuple_cost);
  -- This should be pushed too.
  explain (verbose, costs off)
  select * from ft2 order by c1 using operator(public.<^);
@@ -3366,15 +3371,18 @@ alter server loopback options (set extensions 'postgres_fdw');
  -- This will not be pushed as sort operator is now removed from the extension.
  explain (verbose, costs off)
  select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
-                                         QUERY PLAN                                         
---------------------------------------------------------------------------------------------
+                                            QUERY PLAN                                            
+--------------------------------------------------------------------------------------------------
   GroupAggregate
     Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2
     Group Key: ft2.c2
-   ->  Foreign Scan on public.ft2
-         Output: c1, c2
-         Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
-(6 rows)
+   ->  Sort
+         Output: c2, c1
+         Sort Key: ft2.c1 USING <^
+         ->  Foreign Scan on public.ft2
+               Output: c2, c1
+               Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
+(9 rows)
  
  -- Cleanup
  drop operator class my_op_class using btree;
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql

index b7817c5a415daa6e5373c68e18d2e8a2cb7b455f..2a250ee6324e019ff737ea4cf348be64a4ec980c 100644 (file)
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -943,9 +943,11 @@ alter extension postgres_fdw add operator public.>^(int, int);
  alter server loopback options (set extensions 'postgres_fdw');
  
  -- Now this will be pushed as sort operator is part of the extension.
+alter server loopback options (add fdw_tuple_cost '0.5');
  explain (verbose, costs off)
  select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
  select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
+alter server loopback options (drop fdw_tuple_cost);
  
  -- This should be pushed too.
  explain (verbose, costs off)
diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c

index c8d7145fe3840cc404eee70ef91febe39d90d3f3..d0a57c7aaee7f2dfebdc004f411a43561c2d9797 100644 (file)
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -3666,13 +3666,17 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
                 scratch.resnull = &state->resnull;
             }
             argno++;
+
+           Assert(pertrans->numInputs == argno);
         }
-       else if (pertrans->numSortCols == 0)
+       else if (!pertrans->aggsortrequired)
         {
             ListCell   *arg;
  
             /*
-            * Normal transition function without ORDER BY / DISTINCT.
+            * Normal transition function without ORDER BY / DISTINCT or with
+            * ORDER BY / DISTINCT but the planner has given us pre-sorted
+            * input.
              */
             strictargs = trans_fcinfo->args + 1;
  
@@ -3680,6 +3684,13 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
             {
                 TargetEntry *source_tle = (TargetEntry *) lfirst(arg);
  
+               /*
+                * Don't initialize args for any ORDER BY clause that might
+                * exist in a presorted aggregate.
+                */
+               if (argno == pertrans->numTransInputs)
+                   break;
+
                 /*
                  * Start from 1, since the 0th arg will be the transition
                  * value
@@ -3689,11 +3700,13 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
                                 &trans_fcinfo->args[argno + 1].isnull);
                 argno++;
             }
+           Assert(pertrans->numTransInputs == argno);
         }
         else if (pertrans->numInputs == 1)
         {
             /*
-            * DISTINCT and/or ORDER BY case, with a single column sorted on.
+            * Non-presorted DISTINCT and/or ORDER BY case, with a single
+            * column sorted on.
              */
             TargetEntry *source_tle =
             (TargetEntry *) linitial(pertrans->aggref->args);
@@ -3705,11 +3718,14 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
                             &state->resnull);
             strictnulls = &state->resnull;
             argno++;
+
+           Assert(pertrans->numInputs == argno);
         }
         else
         {
             /*
-            * DISTINCT and/or ORDER BY case, with multiple columns sorted on.
+            * Non-presorted DISTINCT and/or ORDER BY case, with multiple
+            * columns sorted on.
              */
             Datum      *values = pertrans->sortslot->tts_values;
             bool       *nulls = pertrans->sortslot->tts_isnull;
@@ -3725,8 +3741,8 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
                                 &values[argno], &nulls[argno]);
                 argno++;
             }
+           Assert(pertrans->numInputs == argno);
         }
-       Assert(pertrans->numInputs == argno);
  
         /*
          * For a strict transfn, nothing happens when there's a NULL input; we
@@ -3748,6 +3764,21 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
                                          state->steps_len - 1);
         }
  
+       /* Handle DISTINCT aggregates which have pre-sorted input */
+       if (pertrans->numDistinctCols > 0 && !pertrans->aggsortrequired)
+       {
+           if (pertrans->numDistinctCols > 1)
+               scratch.opcode = EEOP_AGG_PRESORTED_DISTINCT_MULTI;
+           else
+               scratch.opcode = EEOP_AGG_PRESORTED_DISTINCT_SINGLE;
+
+           scratch.d.agg_presorted_distinctcheck.pertrans = pertrans;
+           scratch.d.agg_presorted_distinctcheck.jumpdistinct = -1;    /* adjust later */
+           ExprEvalPushStep(state, &scratch);
+           adjust_bailout = lappend_int(adjust_bailout,
+                                        state->steps_len - 1);
+       }
+
         /*
          * Call transition function (once for each concurrently evaluated
          * grouping set). Do so for both sort and hash based computations, as
@@ -3808,6 +3839,12 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
                 Assert(as->d.agg_deserialize.jumpnull == -1);
                 as->d.agg_deserialize.jumpnull = state->steps_len;
             }
+           else if (as->opcode == EEOP_AGG_PRESORTED_DISTINCT_SINGLE ||
+                    as->opcode == EEOP_AGG_PRESORTED_DISTINCT_MULTI)
+           {
+               Assert(as->d.agg_presorted_distinctcheck.jumpdistinct == -1);
+               as->d.agg_presorted_distinctcheck.jumpdistinct = state->steps_len;
+           }
             else
                 Assert(false);
         }
@@ -3857,7 +3894,8 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate,
     /*
      * Determine appropriate transition implementation.
      *
-    * For non-ordered aggregates:
+    * For non-ordered aggregates and ORDER BY / DISTINCT aggregates with
+    * presorted input:
      *
      * If the initial value for the transition state doesn't exist in the
      * pg_aggregate table then we will let the first non-NULL value returned
@@ -3887,7 +3925,7 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate,
      * process_ordered_aggregate_{single, multi} and
      * advance_transition_function.
      */
-   if (pertrans->numSortCols == 0)
+   if (!pertrans->aggsortrequired)
     {
         if (pertrans->transtypeByVal)
         {
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c

index 723770fda0ed3b4d0c90d56cd035fbb253126af0..636794ca6f164145d1e46957135624b2b50f9bf8 100644 (file)
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -502,6 +502,8 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
         &&CASE_EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYREF,
         &&CASE_EEOP_AGG_PLAIN_TRANS_STRICT_BYREF,
         &&CASE_EEOP_AGG_PLAIN_TRANS_BYREF,
+       &&CASE_EEOP_AGG_PRESORTED_DISTINCT_SINGLE,
+       &&CASE_EEOP_AGG_PRESORTED_DISTINCT_MULTI,
         &&CASE_EEOP_AGG_ORDERED_TRANS_DATUM,
         &&CASE_EEOP_AGG_ORDERED_TRANS_TUPLE,
         &&CASE_EEOP_LAST
@@ -1786,6 +1788,28 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
             EEO_NEXT();
         }
  
+       EEO_CASE(EEOP_AGG_PRESORTED_DISTINCT_SINGLE)
+       {
+           AggStatePerTrans pertrans = op->d.agg_presorted_distinctcheck.pertrans;
+           AggState   *aggstate = castNode(AggState, state->parent);
+
+           if (ExecEvalPreOrderedDistinctSingle(aggstate, pertrans))
+               EEO_NEXT();
+           else
+               EEO_JUMP(op->d.agg_presorted_distinctcheck.jumpdistinct);
+       }
+
+       EEO_CASE(EEOP_AGG_PRESORTED_DISTINCT_MULTI)
+       {
+           AggState   *aggstate = castNode(AggState, state->parent);
+           AggStatePerTrans pertrans = op->d.agg_presorted_distinctcheck.pertrans;
+
+           if (ExecEvalPreOrderedDistinctMulti(aggstate, pertrans))
+               EEO_NEXT();
+           else
+               EEO_JUMP(op->d.agg_presorted_distinctcheck.jumpdistinct);
+       }
+
         /* process single-column ordered aggregate datum */
         EEO_CASE(EEOP_AGG_ORDERED_TRANS_DATUM)
         {
@@ -4402,6 +4426,84 @@ ExecAggTransReparent(AggState *aggstate, AggStatePerTrans pertrans,
     return newValue;
  }
  
+/*
+ * ExecEvalPreOrderedDistinctSingle
+ *     Returns true when the aggregate transition value Datum is distinct
+ *     from the previous input Datum and returns false when the input Datum
+ *     matches the previous input Datum.
+ */
+bool
+ExecEvalPreOrderedDistinctSingle(AggState *aggstate, AggStatePerTrans pertrans)
+{
+   Datum       value = pertrans->transfn_fcinfo->args[1].value;
+   bool        isnull = pertrans->transfn_fcinfo->args[1].isnull;
+
+   if (!pertrans->haslast ||
+       pertrans->lastisnull != isnull ||
+       !DatumGetBool(FunctionCall2Coll(&pertrans->equalfnOne,
+                                       pertrans->aggCollation,
+                                       pertrans->lastdatum, value)))
+   {
+       if (pertrans->haslast && !pertrans->inputtypeByVal)
+           pfree(DatumGetPointer(pertrans->lastdatum));
+
+       pertrans->haslast = true;
+       if (!isnull)
+       {
+           MemoryContext oldContext;
+
+           oldContext = MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory);
+
+           pertrans->lastdatum = datumCopy(value, pertrans->inputtypeByVal,
+                                           pertrans->inputtypeLen);
+
+           MemoryContextSwitchTo(oldContext);
+       }
+       else
+           pertrans->lastdatum = (Datum) 0;
+       pertrans->lastisnull = isnull;
+       return true;
+   }
+
+   return false;
+}
+
+/*
+ * ExecEvalPreOrderedDistinctMulti
+ *     Returns true when the aggregate input is distinct from the previous
+ *     input and returns false when the input matches the previous input.
+ */
+bool
+ExecEvalPreOrderedDistinctMulti(AggState *aggstate, AggStatePerTrans pertrans)
+{
+   ExprContext *tmpcontext = aggstate->tmpcontext;
+
+   for (int i = 0; i < pertrans->numTransInputs; i++)
+   {
+       pertrans->sortslot->tts_values[i] = pertrans->transfn_fcinfo->args[i + 1].value;
+       pertrans->sortslot->tts_isnull[i] = pertrans->transfn_fcinfo->args[i + 1].isnull;
+   }
+
+   ExecClearTuple(pertrans->sortslot);
+   pertrans->sortslot->tts_nvalid = pertrans->numInputs;
+   ExecStoreVirtualTuple(pertrans->sortslot);
+
+   tmpcontext->ecxt_outertuple = pertrans->sortslot;
+   tmpcontext->ecxt_innertuple = pertrans->uniqslot;
+
+   if (!pertrans->haslast ||
+       !ExecQual(pertrans->equalfnMulti, tmpcontext))
+   {
+       if (pertrans->haslast)
+           ExecClearTuple(pertrans->uniqslot);
+
+       pertrans->haslast = true;
+       ExecCopySlot(pertrans->uniqslot, pertrans->sortslot);
+       return true;
+   }
+   return false;
+}
+
  /*
   * Invoke ordered transition function, with a datum argument.
   */
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c

index 2fc606cf29d1179da19c68c5d85cf2085a788a84..96d200e4461b380e11287e30bc95cfc0c2f55609 100644 (file)
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -583,7 +583,7 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
     /*
      * Start a fresh sort operation for each DISTINCT/ORDER BY aggregate.
      */
-   if (pertrans->numSortCols > 0)
+   if (pertrans->aggsortrequired)
     {
         /*
          * In case of rescan, maybe there could be an uncompleted sort
@@ -1309,7 +1309,7 @@ finalize_aggregates(AggState *aggstate,
  
         pergroupstate = &pergroup[transno];
  
-       if (pertrans->numSortCols > 0)
+       if (pertrans->aggsortrequired)
         {
             Assert(aggstate->aggstrategy != AGG_HASHED &&
                    aggstate->aggstrategy != AGG_MIXED);
@@ -1323,6 +1323,21 @@ finalize_aggregates(AggState *aggstate,
                                                 pertrans,
                                                 pergroupstate);
         }
+       else if (pertrans->numDistinctCols > 0 && pertrans->haslast)
+       {
+           pertrans->haslast = false;
+
+           if (pertrans->numDistinctCols == 1)
+           {
+               if (!pertrans->inputtypeByVal && !pertrans->lastisnull)
+                   pfree(DatumGetPointer(pertrans->lastdatum));
+
+               pertrans->lastisnull = false;
+               pertrans->lastdatum = (Datum) 0;
+           }
+           else
+               ExecClearTuple(pertrans->uniqslot);
+       }
     }
  
     /*
@@ -4127,6 +4142,12 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
      * stick them into arrays.  We ignore ORDER BY for an ordered-set agg,
      * however; the agg's transfn and finalfn are responsible for that.
      *
+    * When the planner has set the aggpresorted flag, the input to the
+    * aggregate is already correctly sorted.  For ORDER BY aggregates we can
+    * simply treat these as normal aggregates.  For presorted DISTINCT
+    * aggregates an extra step must be added to remove duplicate consecutive
+    * inputs.
+    *
      * Note that by construction, if there is a DISTINCT clause then the ORDER
      * BY clause is a prefix of it (see transformDistinctClause).
      */
@@ -4134,18 +4155,27 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
     {
         sortlist = NIL;
         numSortCols = numDistinctCols = 0;
+       pertrans->aggsortrequired = false;
+   }
+   else if (aggref->aggpresorted && aggref->aggdistinct == NIL)
+   {
+       sortlist = NIL;
+       numSortCols = numDistinctCols = 0;
+       pertrans->aggsortrequired = false;
     }
     else if (aggref->aggdistinct)
     {
         sortlist = aggref->aggdistinct;
         numSortCols = numDistinctCols = list_length(sortlist);
         Assert(numSortCols >= list_length(aggref->aggorder));
+       pertrans->aggsortrequired = !aggref->aggpresorted;
     }
     else
     {
         sortlist = aggref->aggorder;
         numSortCols = list_length(sortlist);
         numDistinctCols = 0;
+       pertrans->aggsortrequired = (numSortCols > 0);
     }
  
     pertrans->numSortCols = numSortCols;
diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c

index b6b6512ef1fce3d1aab50f2b032571127979de6e..bd3965143da270ff007b1af5600950469b1887bb 100644 (file)
--- a/src/backend/jit/llvm/llvmjit_expr.c
+++ b/src/backend/jit/llvm/llvmjit_expr.c
@@ -2335,6 +2335,54 @@ llvm_compile_expr(ExprState *state)
                     break;
                 }
  
+           case EEOP_AGG_PRESORTED_DISTINCT_SINGLE:
+               {
+                   AggState   *aggstate = castNode(AggState, state->parent);
+                   AggStatePerTrans pertrans = op->d.agg_presorted_distinctcheck.pertrans;
+                   int         jumpdistinct = op->d.agg_presorted_distinctcheck.jumpdistinct;
+
+                   LLVMValueRef v_fn = llvm_pg_func(mod, "ExecEvalPreOrderedDistinctSingle");
+                   LLVMValueRef v_args[2];
+                   LLVMValueRef v_ret;
+
+                   v_args[0] = l_ptr_const(aggstate, l_ptr(StructAggState));
+                   v_args[1] = l_ptr_const(pertrans, l_ptr(StructAggStatePerTransData));
+
+                   v_ret = LLVMBuildCall(b, v_fn, v_args, 2, "");
+                   v_ret = LLVMBuildZExt(b, v_ret, TypeStorageBool, "");
+
+                   LLVMBuildCondBr(b,
+                                   LLVMBuildICmp(b, LLVMIntEQ, v_ret,
+                                                 l_sbool_const(1), ""),
+                                   opblocks[opno + 1],
+                                   opblocks[jumpdistinct]);
+                   break;
+               }
+
+           case EEOP_AGG_PRESORTED_DISTINCT_MULTI:
+               {
+                   AggState   *aggstate = castNode(AggState, state->parent);
+                   AggStatePerTrans pertrans = op->d.agg_presorted_distinctcheck.pertrans;
+                   int         jumpdistinct = op->d.agg_presorted_distinctcheck.jumpdistinct;
+
+                   LLVMValueRef v_fn = llvm_pg_func(mod, "ExecEvalPreOrderedDistinctMulti");
+                   LLVMValueRef v_args[2];
+                   LLVMValueRef v_ret;
+
+                   v_args[0] = l_ptr_const(aggstate, l_ptr(StructAggState));
+                   v_args[1] = l_ptr_const(pertrans, l_ptr(StructAggStatePerTransData));
+
+                   v_ret = LLVMBuildCall(b, v_fn, v_args, 2, "");
+                   v_ret = LLVMBuildZExt(b, v_ret, TypeStorageBool, "");
+
+                   LLVMBuildCondBr(b,
+                                   LLVMBuildICmp(b, LLVMIntEQ, v_ret,
+                                                 l_sbool_const(1), ""),
+                                   opblocks[opno + 1],
+                                   opblocks[jumpdistinct]);
+                   break;
+               }
+
             case EEOP_AGG_ORDERED_TRANS_DATUM:
                 build_EvalXFunc(b, mod, "ExecEvalAggOrderedTransDatum",
                                 v_state, op, v_econtext);
diff --git a/src/backend/jit/llvm/llvmjit_types.c b/src/backend/jit/llvm/llvmjit_types.c

index b2bda868893dfdfcb768c382df99d1224e9824e3..373471ad27fee85a071207e41a46153216712793 100644 (file)
--- a/src/backend/jit/llvm/llvmjit_types.c
+++ b/src/backend/jit/llvm/llvmjit_types.c
@@ -103,6 +103,8 @@ void       *referenced_functions[] =
  {
     ExecAggInitGroup,
     ExecAggTransReparent,
+   ExecEvalPreOrderedDistinctSingle,
+   ExecEvalPreOrderedDistinctMulti,
     ExecEvalAggOrderedTransDatum,
     ExecEvalAggOrderedTransTuple,
     ExecEvalArrayCoerce,
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c

index 2a1c923b4a857b2886e026c71289db822b534c88..1fa7fc99b5144cd2dea5fb05a0623e0a65d4e4bb 100644 (file)
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -103,6 +103,28 @@ make_canonical_pathkey(PlannerInfo *root,
     return pk;
  }
  
+/*
+ * append_pathkeys
+ *     Append all non-redundant PathKeys in 'source' onto 'target' and
+ *     returns the updated 'target' list.
+ */
+List *
+append_pathkeys(List *target, List *source)
+{
+   ListCell   *lc;
+
+   Assert(target != NIL);
+
+   foreach(lc, source)
+   {
+       PathKey    *pk = lfirst_node(PathKey, lc);
+
+       if (!pathkey_is_redundant(pk, target))
+           target = lappend(target, pk);
+   }
+   return target;
+}
+
  /*
   * pathkey_is_redundant
   *    Is a pathkey redundant with one already in the given list?
@@ -746,7 +768,8 @@ get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
  List *
  get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
                                 List *path_pathkeys,
-                               List *group_pathkeys, List *group_clauses)
+                               List *group_pathkeys, List *group_clauses,
+                               List *aggregate_pathkeys)
  {
     Query      *parse = root->parse;
     List       *infos = NIL;
@@ -758,7 +781,10 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
  
     /* always return at least the original pathkeys/clauses */
     info = makeNode(PathKeyInfo);
-   info->pathkeys = pathkeys;
+   if (aggregate_pathkeys != NIL)
+       info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
+   else
+       info->pathkeys = pathkeys;
     info->clauses = clauses;
  
     infos = lappend(infos, info);
@@ -783,7 +809,10 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
                                       n_preordered))
     {
         info = makeNode(PathKeyInfo);
-       info->pathkeys = pathkeys;
+       if (aggregate_pathkeys != NIL)
+           info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
+       else
+           info->pathkeys = pathkeys;
         info->clauses = clauses;
  
         infos = lappend(infos, info);
@@ -822,7 +851,10 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
          * still want to keep the keys reordered to path_pathkeys.
          */
         info = makeNode(PathKeyInfo);
-       info->pathkeys = pathkeys;
+       if (aggregate_pathkeys != NIL)
+           info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
+       else
+           info->pathkeys = pathkeys;
         info->clauses = clauses;
  
         infos = lappend(infos, info);
@@ -850,7 +882,10 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
  
         /* keep the group keys reordered to match ordering of input path */
         info = makeNode(PathKeyInfo);
-       info->pathkeys = pathkeys;
+       if (aggregate_pathkeys != NIL)
+           info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
+       else
+           info->pathkeys = pathkeys;
         info->clauses = clauses;
  
         infos = lappend(infos, info);
diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c

index e0e357960f2378546bf8382fbb649bc281df2f4a..ab3f7abba19312ec989497900bf94c1f6b33dd60 100644 (file)
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -245,7 +245,7 @@ can_minmax_aggs(PlannerInfo *root, List **context)
     foreach(lc, root->agginfos)
     {
         AggInfo    *agginfo = lfirst_node(AggInfo, lc);
-       Aggref     *aggref = agginfo->representative_aggref;
+       Aggref     *aggref = linitial_node(Aggref, agginfo->aggrefs);
         Oid         aggsortop;
         TargetEntry *curTarget;
         MinMaxAggInfo *mminfo;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index 06ad856eac1031693234cd0d2ae8585321acd483..64632db73cdcba13810cf0c1470fb9db3b00a044 100644 (file)
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -24,6 +24,7 @@
  #include "access/sysattr.h"
  #include "access/table.h"
  #include "access/xact.h"
+#include "catalog/pg_aggregate.h"
  #include "catalog/pg_constraint.h"
  #include "catalog/pg_inherits.h"
  #include "catalog/pg_proc.h"
@@ -3125,6 +3126,217 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
     return result;
  }
  
+/*
+ * make_pathkeys_for_groupagg
+ *     Determine the pathkeys for the GROUP BY clause and/or any ordered
+ *     aggregates.  We expect at least one of these here.
+ *
+ * Building the pathkeys for the GROUP BY is simple.  Most of the complexity
+ * involved here comes from calculating the best pathkeys for ordered
+ * aggregates.  We define "best" as the pathkeys that suit the most number of
+ * aggregate functions.  We find these by looking at the first ORDER BY /
+ * DISTINCT aggregate and take the pathkeys for that before searching for
+ * other aggregates that require the same or a more strict variation of the
+ * same pathkeys.  We then repeat that process for any remaining aggregates
+ * with different pathkeys and if we find another set of pathkeys that suits a
+ * larger number of aggregates then we return those pathkeys instead.
+ *
+ * *number_groupby_pathkeys gets set to the number of elements in the returned
+ * list that belong to the GROUP BY clause.  Any elements above this number
+ * must belong to ORDER BY / DISTINCT aggregates.
+ *
+ * When the best pathkeys are found we also mark each Aggref that can use
+ * those pathkeys as aggpresorted = true.
+ */
+static List *
+make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
+                          int *number_groupby_pathkeys)
+{
+   List       *grouppathkeys = NIL;
+   List       *bestpathkeys;
+   Bitmapset  *bestaggs;
+   Bitmapset  *unprocessed_aggs;
+   ListCell   *lc;
+   int         i;
+
+   Assert(groupClause != NIL || root->numOrderedAggs > 0);
+
+   if (groupClause != NIL)
+   {
+       /* no pathkeys possible if there's an unsortable GROUP BY */
+       if (!grouping_is_sortable(groupClause))
+       {
+           *number_groupby_pathkeys = 0;
+           return NIL;
+       }
+
+       grouppathkeys = make_pathkeys_for_sortclauses(root, groupClause,
+                                                     tlist);
+       *number_groupby_pathkeys = list_length(grouppathkeys);
+   }
+   else
+       *number_groupby_pathkeys = 0;
+
+   /*
+    * We can't add pathkeys for ordered aggregates if there are any grouping
+    * sets.  All handling specific to ordered aggregates must be done by the
+    * executor in that case.
+    */
+   if (root->numOrderedAggs == 0 || root->parse->groupingSets != NIL)
+       return grouppathkeys;
+
+   /*
+    * Make a first pass over all AggInfos to collect a Bitmapset containing
+    * the indexes of all AggInfos to be processed below.
+    */
+   unprocessed_aggs = NULL;
+   foreach(lc, root->agginfos)
+   {
+       AggInfo    *agginfo = lfirst_node(AggInfo, lc);
+       Aggref     *aggref = linitial_node(Aggref, agginfo->aggrefs);
+
+       if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
+           continue;
+
+       /* only add aggregates with a DISTINCT or ORDER BY */
+       if (aggref->aggdistinct != NIL || aggref->aggorder != NIL)
+           unprocessed_aggs = bms_add_member(unprocessed_aggs,
+                                             foreach_current_index(lc));
+   }
+
+   /*
+    * Now process all the unprocessed_aggs to find the best set of pathkeys
+    * for the given set of aggregates.
+    *
+    * On the first outer loop here 'bestaggs' will be empty.   We'll populate
+    * this during the first loop using the pathkeys for the very first
+    * AggInfo then taking any stronger pathkeys from any other AggInfos with
+    * a more strict set of compatible pathkeys.  Once the outer loop is
+    * complete, we mark off all the aggregates with compatible pathkeys then
+    * remove those from the unprocessed_aggs and repeat the process to try to
+    * find another set of pathkeys that are suitable for a larger number of
+    * aggregates.  The outer loop will stop when there are not enough
+    * unprocessed aggregates for it to be possible to find a set of pathkeys
+    * to suit a larger number of aggregates.
+    */
+   bestpathkeys = NIL;
+   bestaggs = NULL;
+   while (bms_num_members(unprocessed_aggs) > bms_num_members(bestaggs))
+   {
+       Bitmapset  *aggindexes = NULL;
+       List       *currpathkeys = NIL;
+
+       i = -1;
+       while ((i = bms_next_member(unprocessed_aggs, i)) >= 0)
+       {
+           AggInfo    *agginfo = list_nth_node(AggInfo, root->agginfos, i);
+           Aggref     *aggref = linitial_node(Aggref, agginfo->aggrefs);
+           List       *sortlist;
+
+           if (aggref->aggdistinct != NIL)
+               sortlist = aggref->aggdistinct;
+           else
+               sortlist = aggref->aggorder;
+
+           /*
+            * When not set yet, take the pathkeys from the first unprocessed
+            * aggregate.
+            */
+           if (currpathkeys == NIL)
+           {
+               currpathkeys = make_pathkeys_for_sortclauses(root, sortlist,
+                                                            aggref->args);
+
+               /* include the GROUP BY pathkeys, if they exist */
+               if (grouppathkeys != NIL)
+                   currpathkeys = append_pathkeys(list_copy(grouppathkeys),
+                                                  currpathkeys);
+
+               /* record that we found pathkeys for this aggregate */
+               aggindexes = bms_add_member(aggindexes, i);
+           }
+           else
+           {
+               List       *pathkeys;
+
+               /* now look for a stronger set of matching pathkeys */
+               pathkeys = make_pathkeys_for_sortclauses(root, sortlist,
+                                                        aggref->args);
+
+               /* include the GROUP BY pathkeys, if they exist */
+               if (grouppathkeys != NIL)
+                   pathkeys = append_pathkeys(list_copy(grouppathkeys),
+                                              pathkeys);
+
+               /* are 'pathkeys' compatible or better than 'currpathkeys'? */
+               switch (compare_pathkeys(currpathkeys, pathkeys))
+               {
+                   case PATHKEYS_BETTER2:
+                       /* 'pathkeys' are stronger, use these ones instead */
+                       currpathkeys = pathkeys;
+                       /* FALLTHROUGH */
+
+                   case PATHKEYS_BETTER1:
+                       /* 'pathkeys' are less strict */
+                       /* FALLTHROUGH */
+
+                   case PATHKEYS_EQUAL:
+                       /* mark this aggregate as covered by 'currpathkeys' */
+                       aggindexes = bms_add_member(aggindexes, i);
+                       break;
+
+                   case PATHKEYS_DIFFERENT:
+                       break;
+               }
+           }
+       }
+
+       /* remove the aggregates that we've just processed */
+       unprocessed_aggs = bms_del_members(unprocessed_aggs, aggindexes);
+
+       /*
+        * If this pass included more aggregates than the previous best then
+        * use these ones as the best set.
+        */
+       if (bms_num_members(aggindexes) > bms_num_members(bestaggs))
+       {
+           bestaggs = aggindexes;
+           bestpathkeys = currpathkeys;
+       }
+   }
+
+   /*
+    * Now that we've found the best set of aggregates we can set the
+    * presorted flag to indicate to the executor that it needn't bother
+    * performing a sort for these Aggrefs.  We're able to do this now as
+    * there's no chance of a Hash Aggregate plan as create_grouping_paths
+    * will not mark the GROUP BY as GROUPING_CAN_USE_HASH due to the presence
+    * of ordered aggregates.
+    */
+   i = -1;
+   while ((i = bms_next_member(bestaggs, i)) >= 0)
+   {
+       AggInfo    *agginfo = list_nth_node(AggInfo, root->agginfos, i);
+
+       foreach(lc, agginfo->aggrefs)
+       {
+           Aggref     *aggref = lfirst_node(Aggref, lc);
+
+           aggref->aggpresorted = true;
+       }
+   }
+
+   /*
+    * bestpathkeys includes the GROUP BY pathkeys, so if we found any ordered
+    * aggregates, then return bestpathkeys, otherwise return the
+    * grouppathkeys.
+    */
+   if (bestpathkeys != NIL)
+       return bestpathkeys;
+
+   return grouppathkeys;
+}
+
  /*
   * Compute query_pathkeys and other pathkeys during plan generation
   */
@@ -3137,18 +3349,19 @@ standard_qp_callback(PlannerInfo *root, void *extra)
     List       *activeWindows = qp_extra->activeWindows;
  
     /*
-    * Calculate pathkeys that represent grouping/ordering requirements.  The
-    * sortClause is certainly sort-able, but GROUP BY and DISTINCT might not
-    * be, in which case we just leave their pathkeys empty.
+    * Calculate pathkeys that represent grouping/ordering and/or ordered
+    * aggregate requirements.
      */
-   if (qp_extra->groupClause &&
-       grouping_is_sortable(qp_extra->groupClause))
-       root->group_pathkeys =
-           make_pathkeys_for_sortclauses(root,
-                                         qp_extra->groupClause,
-                                         tlist);
+   if (qp_extra->groupClause != NIL || root->numOrderedAggs > 0)
+       root->group_pathkeys = make_pathkeys_for_groupagg(root,
+                                                         qp_extra->groupClause,
+                                                         tlist,
+                                                         &root->num_groupby_pathkeys);
     else
+   {
         root->group_pathkeys = NIL;
+       root->num_groupby_pathkeys = 0;
+   }
  
     /* We consider only the first (bottom) window in pathkeys logic */
     if (activeWindows != NIL)
@@ -6222,6 +6435,26 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
  
     if (can_sort)
     {
+       List       *group_pathkeys;
+       List       *orderAggPathkeys;
+       int         numAggPathkeys;
+
+       numAggPathkeys = list_length(root->group_pathkeys) -
+           root->num_groupby_pathkeys;
+
+       if (numAggPathkeys > 0)
+       {
+           group_pathkeys = list_copy_head(root->group_pathkeys,
+                                           root->num_groupby_pathkeys);
+           orderAggPathkeys = list_copy_tail(root->group_pathkeys,
+                                             root->num_groupby_pathkeys);
+       }
+       else
+       {
+           group_pathkeys = root->group_pathkeys;
+           orderAggPathkeys = NIL;
+       }
+
         /*
          * Use any available suitably-sorted path as input, and also consider
          * sorting the cheapest-total path.
@@ -6234,7 +6467,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
  
             List       *pathkey_orderings = NIL;
  
-           List       *group_pathkeys = root->group_pathkeys;
             List       *group_clauses = parse->groupClause;
  
             /* generate alternative group orderings that might be useful */
@@ -6242,7 +6474,8 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
                                                                 path->rows,
                                                                 path->pathkeys,
                                                                 group_pathkeys,
-                                                               group_clauses);
+                                                               group_clauses,
+                                                               orderAggPathkeys);
  
             Assert(list_length(pathkey_orderings) > 0);
  
@@ -6414,7 +6647,8 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
                                                                     path->rows,
                                                                     path->pathkeys,
                                                                     group_pathkeys,
-                                                                   group_clauses);
+                                                                   group_clauses,
+                                                                   orderAggPathkeys);
  
                 Assert(list_length(pathkey_orderings) > 0);
  
@@ -6717,6 +6951,26 @@ create_partial_grouping_paths(PlannerInfo *root,
  
     if (can_sort && cheapest_total_path != NULL)
     {
+       List       *group_pathkeys;
+       List       *orderAggPathkeys;
+       int         numAggPathkeys;
+
+       numAggPathkeys = list_length(root->group_pathkeys) -
+           root->num_groupby_pathkeys;
+
+       if (numAggPathkeys > 0)
+       {
+           group_pathkeys = list_copy_head(root->group_pathkeys,
+                                           root->num_groupby_pathkeys);
+           orderAggPathkeys = list_copy_tail(root->group_pathkeys,
+                                             root->num_groupby_pathkeys);
+       }
+       else
+       {
+           group_pathkeys = root->group_pathkeys;
+           orderAggPathkeys = NIL;
+       }
+
         /* This should have been checked previously */
         Assert(parse->hasAggs || parse->groupClause);
  
@@ -6729,10 +6983,7 @@ create_partial_grouping_paths(PlannerInfo *root,
             ListCell   *lc2;
             Path       *path = (Path *) lfirst(lc);
             Path       *path_save = path;
-
             List       *pathkey_orderings = NIL;
-
-           List       *group_pathkeys = root->group_pathkeys;
             List       *group_clauses = parse->groupClause;
  
             /* generate alternative group orderings that might be useful */
@@ -6740,7 +6991,8 @@ create_partial_grouping_paths(PlannerInfo *root,
                                                                 path->rows,
                                                                 path->pathkeys,
                                                                 group_pathkeys,
-                                                               group_clauses);
+                                                               group_clauses,
+                                                               orderAggPathkeys);
  
             Assert(list_length(pathkey_orderings) > 0);
  
@@ -6856,16 +7108,33 @@ create_partial_grouping_paths(PlannerInfo *root,
  
     if (can_sort && cheapest_partial_path != NULL)
     {
+       List       *group_pathkeys;
+       List       *orderAggPathkeys;
+       int         numAggPathkeys;
+
+       numAggPathkeys = list_length(root->group_pathkeys) -
+           root->num_groupby_pathkeys;
+
+       if (numAggPathkeys > 0)
+       {
+           group_pathkeys = list_copy_head(root->group_pathkeys,
+                                           root->num_groupby_pathkeys);
+           orderAggPathkeys = list_copy_tail(root->group_pathkeys,
+                                             root->num_groupby_pathkeys);
+       }
+       else
+       {
+           group_pathkeys = root->group_pathkeys;
+           orderAggPathkeys = NIL;
+       }
+
         /* Similar to above logic, but for partial paths. */
         foreach(lc, input_rel->partial_pathlist)
         {
             ListCell   *lc2;
             Path       *path = (Path *) lfirst(lc);
             Path       *path_original = path;
-
             List       *pathkey_orderings = NIL;
-
-           List       *group_pathkeys = root->group_pathkeys;
             List       *group_clauses = parse->groupClause;
  
             /* generate alternative group orderings that might be useful */
@@ -6873,7 +7142,8 @@ create_partial_grouping_paths(PlannerInfo *root,
                                                                 path->rows,
                                                                 path->pathkeys,
                                                                 group_pathkeys,
-                                                               group_clauses);
+                                                               group_clauses,
+                                                               orderAggPathkeys);
  
             Assert(list_length(pathkey_orderings) > 0);
  
diff --git a/src/backend/optimizer/prep/prepagg.c b/src/backend/optimizer/prep/prepagg.c

index 5b12937eada4c9c3aa57a89a6012661015245ff1..da89b55402c5aaf8ce6edc13f38f5d2ce9c4d0df 100644 (file)
--- a/src/backend/optimizer/prep/prepagg.c
+++ b/src/backend/optimizer/prep/prepagg.c
@@ -225,6 +225,7 @@ preprocess_aggref(Aggref *aggref, PlannerInfo *root)
     {
         AggInfo    *agginfo = list_nth_node(AggInfo, root->agginfos, aggno);
  
+       agginfo->aggrefs = lappend(agginfo->aggrefs, aggref);
         transno = agginfo->transno;
     }
     else
@@ -232,7 +233,7 @@ preprocess_aggref(Aggref *aggref, PlannerInfo *root)
         AggInfo    *agginfo = makeNode(AggInfo);
  
         agginfo->finalfn_oid = aggfinalfn;
-       agginfo->representative_aggref = aggref;
+       agginfo->aggrefs = list_make1(aggref);
         agginfo->shareable = shareable;
  
         aggno = list_length(root->agginfos);
@@ -386,7 +387,7 @@ find_compatible_agg(PlannerInfo *root, Aggref *newagg,
  
         aggno++;
  
-       existingRef = agginfo->representative_aggref;
+       existingRef = linitial_node(Aggref, agginfo->aggrefs);
  
         /* all of the following must be the same or it's no match */
         if (newagg->inputcollid != existingRef->inputcollid ||
@@ -648,7 +649,7 @@ get_agg_clause_costs(PlannerInfo *root, AggSplit aggsplit, AggClauseCosts *costs
     foreach(lc, root->agginfos)
     {
         AggInfo    *agginfo = lfirst_node(AggInfo, lc);
-       Aggref     *aggref = agginfo->representative_aggref;
+       Aggref     *aggref = linitial_node(Aggref, agginfo->aggrefs);
  
         /*
          * Add the appropriate component function execution costs to
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c

index 059cb7097c7637e756d361816e7c1dd1f5e68ad3..fabb5f72076cba4adef566b95cebc01585456645 100644 (file)
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -3816,6 +3816,7 @@ transformJsonAggConstructor(ParseState *pstate, JsonAggConstructor *agg_ctor,
         aggref->aggstar = false;
         aggref->aggvariadic = false;
         aggref->aggkind = AGGKIND_NORMAL;
+       aggref->aggpresorted = false;
         /* agglevelsup will be set by transformAggregateCall */
         aggref->aggsplit = AGGSPLIT_SIMPLE; /* planner might change this */
         aggref->location = agg_ctor->location;
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c

index f71a682cd657bf1039856eece4bb2b7b5730c0fa..827989f37995717cece646bcdcede9e60cd6cb10 100644 (file)
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -774,6 +774,7 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
         aggref->aggstar = agg_star;
         aggref->aggvariadic = func_variadic;
         aggref->aggkind = aggkind;
+       aggref->aggpresorted = false;
         /* agglevelsup will be set by transformAggregateCall */
         aggref->aggsplit = AGGSPLIT_SIMPLE; /* planner might change this */
         aggref->aggno = -1;     /* planner will set aggno and aggtransno */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 9c254dafa783ea2b25b2cedfef9864b2c239d138..e51026cb55f865343705b00fd9dfa39951ac01b4 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -57,6 +57,6 @@
   */
  
  /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 202207291
+#define CATALOG_VERSION_NO 202208021
  
  #endif
diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h

index 1e3f1bbee8614f56c22766fb0106005c59e96df0..0739b389f3c705660fd7e5b098228e716c166336 100644 (file)
--- a/src/include/executor/execExpr.h
+++ b/src/include/executor/execExpr.h
@@ -258,6 +258,8 @@ typedef enum ExprEvalOp
     EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYREF,
     EEOP_AGG_PLAIN_TRANS_STRICT_BYREF,
     EEOP_AGG_PLAIN_TRANS_BYREF,
+   EEOP_AGG_PRESORTED_DISTINCT_SINGLE,
+   EEOP_AGG_PRESORTED_DISTINCT_MULTI,
     EEOP_AGG_ORDERED_TRANS_DATUM,
     EEOP_AGG_ORDERED_TRANS_TUPLE,
  
@@ -659,6 +661,17 @@ typedef struct ExprEvalStep
             int         jumpnull;
         }           agg_plain_pergroup_nullcheck;
  
+       /* for EEOP_AGG_PRESORTED_DISTINCT_{SINGLE,MULTI} */
+       struct
+       {
+           AggStatePerTrans pertrans;
+           ExprContext *aggcontext;
+           int         setno;
+           int         transno;
+           int         setoff;
+           int         jumpdistinct;
+       }           agg_presorted_distinctcheck;
+
         /* for EEOP_AGG_PLAIN_TRANS_[INIT_][STRICT_]{BYVAL,BYREF} */
         /* for EEOP_AGG_ORDERED_TRANS_{DATUM,TUPLE} */
         struct
@@ -868,6 +881,10 @@ extern void ExecAggInitGroup(AggState *aggstate, AggStatePerTrans pertrans, AggS
  extern Datum ExecAggTransReparent(AggState *aggstate, AggStatePerTrans pertrans,
                                   Datum newValue, bool newValueIsNull,
                                   Datum oldValue, bool oldValueIsNull);
+extern bool ExecEvalPreOrderedDistinctSingle(AggState *aggstate,
+                                            AggStatePerTrans pertrans);
+extern bool ExecEvalPreOrderedDistinctMulti(AggState *aggstate,
+                                           AggStatePerTrans pertrans);
  extern void ExecEvalAggOrderedTransDatum(ExprState *state, ExprEvalStep *op,
                                          ExprContext *econtext);
  extern void ExecEvalAggOrderedTransTuple(ExprState *state, ExprEvalStep *op,
diff --git a/src/include/executor/nodeAgg.h b/src/include/executor/nodeAgg.h

index 4d1bd929990c9b3d70bac4a945e0097e82f8fe5a..1229c9d1ab93dd8c92fb35c76bf148804c685638 100644 (file)
--- a/src/include/executor/nodeAgg.h
+++ b/src/include/executor/nodeAgg.h
@@ -48,6 +48,11 @@ typedef struct AggStatePerTransData
      */
     bool        aggshared;
  
+   /*
+    * True for ORDER BY and DISTINCT Aggrefs that are not aggpresorted.
+    */
+   bool        aggsortrequired;
+
     /*
      * Number of aggregated input columns.  This includes ORDER BY expressions
      * in both the plain-agg and ordered-set cases.  Ordered-set direct args
@@ -136,6 +141,9 @@ typedef struct AggStatePerTransData
     TupleTableSlot *sortslot;   /* current input tuple */
     TupleTableSlot *uniqslot;   /* used for multi-column DISTINCT */
     TupleDesc   sortdesc;       /* descriptor of input tuples */
+   Datum       lastdatum;      /* used for single-column DISTINCT */
+   bool        lastisnull;     /* used for single-column DISTINCT */
+   bool        haslast;        /* got a last value for DISTINCT check */
  
     /*
      * These values are working state that is initialized at the start of an
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h

index e2081db4edbb1227f4ae236de6d2fc2d422779e8..3b065139e68d61952eadf936319ec6609c753949 100644 (file)
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -365,6 +365,14 @@ struct PlannerInfo
  
     /* groupClause pathkeys, if any */
     List       *group_pathkeys;
+
+   /*
+    * The number of elements in the group_pathkeys list which belong to the
+    * GROUP BY clause.  Additional ones belong to ORDER BY / DISTINCT
+    * aggregates.
+    */
+   int         num_groupby_pathkeys;
+
     /* pathkeys of bottom window, if any */
     List       *window_pathkeys;
     /* distinctClause pathkeys, if any */
@@ -3134,12 +3142,12 @@ typedef struct AggInfo
     NodeTag     type;
  
     /*
-    * Link to an Aggref expr this state value is for.
+    * List of Aggref exprs that this state value is for.
      *
-    * There can be multiple identical Aggref's sharing the same per-agg. This
-    * points to the first one of them.
+    * There will always be at least one, but there can be multiple identical
+    * Aggref's sharing the same per-agg.
      */
-   Aggref     *representative_aggref;
+   List       *aggrefs;
  
     /* Transition state number for this aggregate */
     int         transno;
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h

index 1fc2fbffa33980a715c2456945e7fc043026735f..3aa96bb685566164fe97a15dd2295886d2d6f6d6 100644 (file)
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -357,6 +357,10 @@ typedef struct Param
   * replaced with a single argument representing the partial-aggregate
   * transition values.
   *
+ * aggpresorted is set by the query planner for ORDER BY and DISTINCT
+ * aggregates where the chosen plan provides presorted input for this
+ * aggregate during execution.
+ *
   * aggsplit indicates the expected partial-aggregation mode for the Aggref's
   * parent plan node.  It's always set to AGGSPLIT_SIMPLE in the parser, but
   * the planner might change it to something else.  We use this mainly as
@@ -422,6 +426,9 @@ typedef struct Aggref
     /* aggregate kind (see pg_aggregate.h) */
     char        aggkind;
  
+   /* aggregate input already sorted */
+   bool        aggpresorted pg_node_attr(equal_ignore);
+
     /* > 0 if agg belongs to outer query */
     Index       agglevelsup;
  
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h

index 54ab709c67ce5c0605237a6ff8b3d58fadb6d343..d11cdac7f8d20c648f14b88b30f7100520ae8a73 100644 (file)
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -208,7 +208,8 @@ extern int  group_keys_reorder_by_pathkeys(List *pathkeys,
                                            List **group_clauses);
  extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
                                              List *path_pathkeys,
-                                            List *group_pathkeys, List *group_clauses);
+                                            List *group_pathkeys, List *group_clauses,
+                                            List *aggregate_pathkeys);
  extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
                                             Relids required_outer,
                                             CostSelector cost_criterion,
@@ -255,6 +256,7 @@ extern List *truncate_useless_pathkeys(PlannerInfo *root,
                                        RelOptInfo *rel,
                                        List *pathkeys);
  extern bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel);
+extern List *append_pathkeys(List *target, List *source);
  extern PathKey *make_canonical_pathkey(PlannerInfo *root,
                                        EquivalenceClass *eclass, Oid opfamily,
                                        int strategy, bool nulls_first);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out

index 601047fa3ddb2ecd2222145b71f81b805622bac0..b2198724e3c95bb5ddec4c8d3a07a059adaef3a1 100644 (file)
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1392,6 +1392,84 @@ ERROR:  column "t1.f1" must appear in the GROUP BY clause or be used in an aggre
  LINE 1: select t1.f1 from t1 left join t2 using (f1) group by f1;
                 ^
  drop table t1, t2;
+--
+-- Test planner's selection of pathkeys for ORDER BY aggregates
+--
+-- Ensure we order by four.  This suits the most aggregate functions.
+explain (costs off)
+select sum(two order by two),max(four order by four), min(four order by four)
+from tenk1;
+          QUERY PLAN           
+-------------------------------
+ Aggregate
+   ->  Sort
+         Sort Key: four
+         ->  Seq Scan on tenk1
+(4 rows)
+
+-- Ensure we order by two.  It's a tie between ordering by two and four but
+-- we tiebreak on the aggregate's position.
+explain (costs off)
+select
+  sum(two order by two), max(four order by four),
+  min(four order by four), max(two order by two)
+from tenk1;
+          QUERY PLAN           
+-------------------------------
+ Aggregate
+   ->  Sort
+         Sort Key: two
+         ->  Seq Scan on tenk1
+(4 rows)
+
+-- Similar to above, but tiebreak on ordering by four
+explain (costs off)
+select
+  max(four order by four), sum(two order by two),
+  min(four order by four), max(two order by two)
+from tenk1;
+          QUERY PLAN           
+-------------------------------
+ Aggregate
+   ->  Sort
+         Sort Key: four
+         ->  Seq Scan on tenk1
+(4 rows)
+
+-- Ensure this one orders by ten since there are 3 aggregates that require ten
+-- vs two that suit two and four.
+explain (costs off)
+select
+  max(four order by four), sum(two order by two),
+  min(four order by four), max(two order by two),
+  sum(ten order by ten), min(ten order by ten), max(ten order by ten)
+from tenk1;
+          QUERY PLAN           
+-------------------------------
+ Aggregate
+   ->  Sort
+         Sort Key: ten
+         ->  Seq Scan on tenk1
+(4 rows)
+
+-- Try a case involving a GROUP BY clause where the GROUP BY column is also
+-- part of an aggregate's ORDER BY clause.  We want a sort order that works
+-- for the GROUP BY along with the first and the last aggregate.
+explain (costs off)
+select
+  sum(unique1 order by ten, two), sum(unique1 order by four),
+  sum(unique1 order by two, four)
+from tenk1
+group by ten;
+            QUERY PLAN            
+----------------------------------
+ GroupAggregate
+   Group Key: ten
+   ->  Sort
+         Sort Key: ten, two, four
+         ->  Seq Scan on tenk1
+(5 rows)
+
  --
  -- Test combinations of DISTINCT and/or ORDER BY
  --
@@ -2263,8 +2341,8 @@ NOTICE:  avg_transfn called with 3
  -- shouldn't share states due to the distinctness not matching.
  select my_avg(distinct one),my_sum(one) from (values(1),(3)) t(one);
  NOTICE:  avg_transfn called with 1
-NOTICE:  avg_transfn called with 3
  NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 3
  NOTICE:  avg_transfn called with 3
   my_avg | my_sum 
  --------+--------
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out

index a08a3825ff61dcf43e04b26a6296fa5645410c8e..4b41ccf1aa81ada97e2a58c4c0998db39bac413b 100644 (file)
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -367,17 +367,17 @@ SELECT c, sum(b order by a) FROM pagg_tab GROUP BY c ORDER BY 1, 2;
           ->  GroupAggregate
                 Group Key: pagg_tab.c
                 ->  Sort
-                     Sort Key: pagg_tab.c
+                     Sort Key: pagg_tab.c, pagg_tab.a
                       ->  Seq Scan on pagg_tab_p1 pagg_tab
           ->  GroupAggregate
                 Group Key: pagg_tab_1.c
                 ->  Sort
-                     Sort Key: pagg_tab_1.c
+                     Sort Key: pagg_tab_1.c, pagg_tab_1.a
                       ->  Seq Scan on pagg_tab_p2 pagg_tab_1
           ->  GroupAggregate
                 Group Key: pagg_tab_2.c
                 ->  Sort
-                     Sort Key: pagg_tab_2.c
+                     Sort Key: pagg_tab_2.c, pagg_tab_2.a
                       ->  Seq Scan on pagg_tab_p3 pagg_tab_2
  (18 rows)
  
@@ -948,34 +948,36 @@ SET max_parallel_workers_per_gather TO 2;
  -- is not partial agg safe.
  EXPLAIN (COSTS OFF)
  SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
-                                      QUERY PLAN                                      
---------------------------------------------------------------------------------------
- Sort
-   Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
-   ->  Append
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml.a
-               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml.a
-                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_2.a
-               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_2.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_5.a
-               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_5.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-(25 rows)
+                                         QUERY PLAN                                         
+--------------------------------------------------------------------------------------------
+ Gather Merge
+   Workers Planned: 2
+   ->  Sort
+         Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
+         ->  Parallel Append
+               ->  GroupAggregate
+                     Group Key: pagg_tab_ml.a
+                     Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+                     ->  Sort
+                           Sort Key: pagg_tab_ml.a, pagg_tab_ml.c
+                           ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+               ->  GroupAggregate
+                     Group Key: pagg_tab_ml_5.a
+                     Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+                     ->  Sort
+                           Sort Key: pagg_tab_ml_5.a, pagg_tab_ml_5.c
+                           ->  Append
+                                 ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                                 ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+               ->  GroupAggregate
+                     Group Key: pagg_tab_ml_2.a
+                     Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+                     ->  Sort
+                           Sort Key: pagg_tab_ml_2.a, pagg_tab_ml_2.c
+                           ->  Append
+                                 ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                                 ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+(27 rows)
  
  SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
   a  | sum  |  array_agg  | count 
@@ -994,32 +996,34 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
  -- Without ORDER BY clause, to test Gather at top-most path
  EXPLAIN (COSTS OFF)
  SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3;
-                             QUERY PLAN                              
----------------------------------------------------------------------
- Append
-   ->  GroupAggregate
-         Group Key: pagg_tab_ml.a
-         Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-         ->  Sort
-               Sort Key: pagg_tab_ml.a
-               ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-   ->  GroupAggregate
-         Group Key: pagg_tab_ml_2.a
-         Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-         ->  Sort
-               Sort Key: pagg_tab_ml_2.a
-               ->  Append
-                     ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                     ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-   ->  GroupAggregate
-         Group Key: pagg_tab_ml_5.a
-         Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-         ->  Sort
-               Sort Key: pagg_tab_ml_5.a
-               ->  Append
-                     ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                     ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-(23 rows)
+                                QUERY PLAN                                 
+---------------------------------------------------------------------------
+ Gather
+   Workers Planned: 2
+   ->  Parallel Append
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml.a
+               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml.a, pagg_tab_ml.c
+                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_5.a
+               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_5.a, pagg_tab_ml_5.c
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_2.a
+               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_2.a, pagg_tab_ml_2.c
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+(25 rows)
  
  -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
  -- for level 1 only. For subpartitions, GROUP BY clause does not match with
diff --git a/src/test/regress/expected/sqljson.out b/src/test/regress/expected/sqljson.out

index bdd0969a509a781a1c26fb83db6040e41de26740..748dfdb04d430e368efdb0ba2c6521bbbe615023 100644 (file)
--- a/src/test/regress/expected/sqljson.out
+++ b/src/test/regress/expected/sqljson.out
@@ -866,14 +866,14 @@ FROM
     (VALUES (NULL), (3), (1), (NULL), (NULL), (5), (2), (4), (NULL)) foo(bar);
    json_arrayagg  |  json_arrayagg  |  json_arrayagg  |  json_arrayagg  |              json_arrayagg              |              json_arrayagg              |  json_arrayagg  |                                                      json_arrayagg                                                       | json_arrayagg |            json_arrayagg             
  -----------------+-----------------+-----------------+-----------------+-----------------------------------------+-----------------------------------------+-----------------+--------------------------------------------------------------------------------------------------------------------------+---------------+--------------------------------------
- [3, 1, 5, 2, 4] | [3, 1, 5, 2, 4] | [3, 1, 5, 2, 4] | [3, 1, 5, 2, 4] | [null, 3, 1, null, null, 5, 2, 4, null] | [null, 3, 1, null, null, 5, 2, 4, null] | [{"bar":null}, +| [{"bar": null}, {"bar": 3}, {"bar": 1}, {"bar": null}, {"bar": null}, {"bar": 5}, {"bar": 2}, {"bar": 4}, {"bar": null}] | [{"bar":3},  +| [{"bar": 3}, {"bar": 4}, {"bar": 5}]
-                 |                 |                 |                 |                                         |                                         |  {"bar":3},    +|                                                                                                                          |  {"bar":4},  +| 
-                 |                 |                 |                 |                                         |                                         |  {"bar":1},    +|                                                                                                                          |  {"bar":5}]   | 
+ [1, 2, 3, 4, 5] | [1, 2, 3, 4, 5] | [1, 2, 3, 4, 5] | [1, 2, 3, 4, 5] | [1, 2, 3, 4, 5, null, null, null, null] | [1, 2, 3, 4, 5, null, null, null, null] | [{"bar":1},    +| [{"bar": 1}, {"bar": 2}, {"bar": 3}, {"bar": 4}, {"bar": 5}, {"bar": null}, {"bar": null}, {"bar": null}, {"bar": null}] | [{"bar":3},  +| [{"bar": 3}, {"bar": 4}, {"bar": 5}]
+                 |                 |                 |                 |                                         |                                         |  {"bar":2},    +|                                                                                                                          |  {"bar":4},  +| 
+                 |                 |                 |                 |                                         |                                         |  {"bar":3},    +|                                                                                                                          |  {"bar":5}]   | 
+                 |                 |                 |                 |                                         |                                         |  {"bar":4},    +|                                                                                                                          |               | 
+                 |                 |                 |                 |                                         |                                         |  {"bar":5},    +|                                                                                                                          |               | 
+                 |                 |                 |                 |                                         |                                         |  {"bar":null}, +|                                                                                                                          |               | 
                   |                 |                 |                 |                                         |                                         |  {"bar":null}, +|                                                                                                                          |               | 
                   |                 |                 |                 |                                         |                                         |  {"bar":null}, +|                                                                                                                          |               | 
-                 |                 |                 |                 |                                         |                                         |  {"bar":5},    +|                                                                                                                          |               | 
-                 |                 |                 |                 |                                         |                                         |  {"bar":2},    +|                                                                                                                          |               | 
-                 |                 |                 |                 |                                         |                                         |  {"bar":4},    +|                                                                                                                          |               | 
                   |                 |                 |                 |                                         |                                         |  {"bar":null}]  |                                                                                                                          |               | 
  (1 row)
  
diff --git a/src/test/regress/expected/tuplesort.out b/src/test/regress/expected/tuplesort.out

index 418f296a3f9e40d056cbd98ee487e84b9694352a..a2efa179fc19107e029d7ce74c9c7f2ece668100 100644 (file)
--- a/src/test/regress/expected/tuplesort.out
+++ b/src/test/regress/expected/tuplesort.out
@@ -622,15 +622,18 @@ EXPLAIN (COSTS OFF) :qry;
           ->  GroupAggregate
                 Group Key: a.col12
                 Filter: (count(*) > 1)
-               ->  Merge Join
-                     Merge Cond: (a.col12 = b.col12)
-                     ->  Sort
-                           Sort Key: a.col12 DESC
-                           ->  Seq Scan on test_mark_restore a
-                     ->  Sort
-                           Sort Key: b.col12 DESC
-                           ->  Seq Scan on test_mark_restore b
-(14 rows)
+               ->  Incremental Sort
+                     Sort Key: a.col12 DESC, a.col1
+                     Presorted Key: a.col12
+                     ->  Merge Join
+                           Merge Cond: (a.col12 = b.col12)
+                           ->  Sort
+                                 Sort Key: a.col12 DESC
+                                 ->  Seq Scan on test_mark_restore a
+                           ->  Sort
+                                 Sort Key: b.col12 DESC
+                                 ->  Seq Scan on test_mark_restore b
+(17 rows)
  
  :qry;
   col12 | count | count | count | count | count 
@@ -658,15 +661,18 @@ EXPLAIN (COSTS OFF) :qry;
           ->  GroupAggregate
                 Group Key: a.col12
                 Filter: (count(*) > 1)
-               ->  Merge Join
-                     Merge Cond: (a.col12 = b.col12)
-                     ->  Sort
-                           Sort Key: a.col12 DESC
-                           ->  Seq Scan on test_mark_restore a
-                     ->  Sort
-                           Sort Key: b.col12 DESC
-                           ->  Seq Scan on test_mark_restore b
-(14 rows)
+               ->  Incremental Sort
+                     Sort Key: a.col12 DESC, a.col1
+                     Presorted Key: a.col12
+                     ->  Merge Join
+                           Merge Cond: (a.col12 = b.col12)
+                           ->  Sort
+                                 Sort Key: a.col12 DESC
+                                 ->  Seq Scan on test_mark_restore a
+                           ->  Sort
+                                 Sort Key: b.col12 DESC
+                                 ->  Seq Scan on test_mark_restore b
+(17 rows)
  
  :qry;
   col12 | count | count | count | count | count 
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql

index c6e0d7ba2b71f6ea0c7145b1063919b1804d8eff..4540a06f454e2ba3a96559da5ebc6db238357d45 100644 (file)
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -503,6 +503,49 @@ select t1.f1 from t1 left join t2 using (f1) group by f1;
  
  drop table t1, t2;
  
+--
+-- Test planner's selection of pathkeys for ORDER BY aggregates
+--
+
+-- Ensure we order by four.  This suits the most aggregate functions.
+explain (costs off)
+select sum(two order by two),max(four order by four), min(four order by four)
+from tenk1;
+
+-- Ensure we order by two.  It's a tie between ordering by two and four but
+-- we tiebreak on the aggregate's position.
+explain (costs off)
+select
+  sum(two order by two), max(four order by four),
+  min(four order by four), max(two order by two)
+from tenk1;
+
+-- Similar to above, but tiebreak on ordering by four
+explain (costs off)
+select
+  max(four order by four), sum(two order by two),
+  min(four order by four), max(two order by two)
+from tenk1;
+
+-- Ensure this one orders by ten since there are 3 aggregates that require ten
+-- vs two that suit two and four.
+explain (costs off)
+select
+  max(four order by four), sum(two order by two),
+  min(four order by four), max(two order by two),
+  sum(ten order by ten), min(ten order by ten), max(ten order by ten)
+from tenk1;
+
+-- Try a case involving a GROUP BY clause where the GROUP BY column is also
+-- part of an aggregate's ORDER BY clause.  We want a sort order that works
+-- for the GROUP BY along with the first and the last aggregate.
+explain (costs off)
+select
+  sum(unique1 order by ten, two), sum(unique1 order by four),
+  sum(unique1 order by two, four)
+from tenk1
+group by ten;
+
  --
  -- Test combinations of DISTINCT and/or ORDER BY
  --
author	David Rowley <drowley@postgresql.org>
	Tue, 2 Aug 2022 11:11:45 +0000 (23:11 +1200)
committer	David Rowley <drowley@postgresql.org>
	Tue, 2 Aug 2022 11:11:45 +0000 (23:11 +1200)
contrib/postgres_fdw/expected/postgres_fdw.out		patch \| blob \| blame \| history
contrib/postgres_fdw/sql/postgres_fdw.sql		patch \| blob \| blame \| history
src/backend/executor/execExpr.c		patch \| blob \| blame \| history
src/backend/executor/execExprInterp.c		patch \| blob \| blame \| history
src/backend/executor/nodeAgg.c		patch \| blob \| blame \| history
src/backend/jit/llvm/llvmjit_expr.c		patch \| blob \| blame \| history
src/backend/jit/llvm/llvmjit_types.c		patch \| blob \| blame \| history
src/backend/optimizer/path/pathkeys.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/planagg.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/planner.c		patch \| blob \| blame \| history
src/backend/optimizer/prep/prepagg.c		patch \| blob \| blame \| history
src/backend/parser/parse_expr.c		patch \| blob \| blame \| history
src/backend/parser/parse_func.c		patch \| blob \| blame \| history
src/include/catalog/catversion.h		patch \| blob \| blame \| history
src/include/executor/execExpr.h		patch \| blob \| blame \| history
src/include/executor/nodeAgg.h		patch \| blob \| blame \| history
src/include/nodes/pathnodes.h		patch \| blob \| blame \| history
src/include/nodes/primnodes.h		patch \| blob \| blame \| history
src/include/optimizer/paths.h		patch \| blob \| blame \| history
src/test/regress/expected/aggregates.out		patch \| blob \| blame \| history
src/test/regress/expected/partition_aggregate.out		patch \| blob \| blame \| history
src/test/regress/expected/sqljson.out		patch \| blob \| blame \| history
src/test/regress/expected/tuplesort.out		patch \| blob \| blame \| history
src/test/regress/sql/aggregates.sql		patch \| blob \| blame \| history