Improve performance of ORDER BY / DISTINCT aggregates
authorDavid Rowley <drowley@postgresql.org>
Tue, 2 Aug 2022 11:11:45 +0000 (23:11 +1200)
committerDavid Rowley <drowley@postgresql.org>
Tue, 2 Aug 2022 11:11:45 +0000 (23:11 +1200)
ORDER BY / DISTINCT aggreagtes have, since implemented in Postgres, been
executed by always performing a sort in nodeAgg.c to sort the tuples in
the current group into the correct order before calling the transition
function on the sorted tuples.  This was not great as often there might be
an index that could have provided pre-sorted input and allowed the
transition functions to be called as the rows come in, rather than having
to store them in a tuplestore in order to sort them once all the tuples
for the group have arrived.

Here we change the planner so it requests a path with a sort order which
supports the most amount of ORDER BY / DISTINCT aggregate functions and
add new code to the executor to allow it to support the processing of
ORDER BY / DISTINCT aggregates where the tuples are already sorted in the
correct order.

Since there can be many ORDER BY / DISTINCT aggregates in any given query
level, it's very possible that we can't find an order that suits all of
these aggregates.  The sort order that the planner chooses is simply the
one that suits the most aggregate functions.  We take the most strictly
sorted variation of each order and see how many aggregate functions can
use that, then we try again with the order of the remaining aggregates to
see if another order would suit more aggregate functions.  For example:

SELECT agg(a ORDER BY a),agg2(a ORDER BY a,b) ...

would request the sort order to be {a, b} because {a} is a subset of the
sort order of {a,b}, but;

SELECT agg(a ORDER BY a),agg2(a ORDER BY c) ...

would just pick a plan ordered by {a} (we give precedence to aggregates
which are earlier in the targetlist).

SELECT agg(a ORDER BY a),agg2(a ORDER BY b),agg3(a ORDER BY b) ...

would choose to order by {b} since two aggregates suit that vs just one
that requires input ordered by {a}.

Author: David Rowley
Reviewed-by: Ronan Dunklau, James Coleman, Ranier Vilela, Richard Guo, Tom Lane
Discussion: https://postgr.es/m/CAApHDvpHzfo92%3DR4W0%2BxVua3BUYCKMckWAmo-2t_KiXN-wYH%3Dw%40mail.gmail.com

24 files changed:
contrib/postgres_fdw/expected/postgres_fdw.out
contrib/postgres_fdw/sql/postgres_fdw.sql
src/backend/executor/execExpr.c
src/backend/executor/execExprInterp.c
src/backend/executor/nodeAgg.c
src/backend/jit/llvm/llvmjit_expr.c
src/backend/jit/llvm/llvmjit_types.c
src/backend/optimizer/path/pathkeys.c
src/backend/optimizer/plan/planagg.c
src/backend/optimizer/plan/planner.c
src/backend/optimizer/prep/prepagg.c
src/backend/parser/parse_expr.c
src/backend/parser/parse_func.c
src/include/catalog/catversion.h
src/include/executor/execExpr.h
src/include/executor/nodeAgg.h
src/include/nodes/pathnodes.h
src/include/nodes/primnodes.h
src/include/optimizer/paths.h
src/test/regress/expected/aggregates.out
src/test/regress/expected/partition_aggregate.out
src/test/regress/expected/sqljson.out
src/test/regress/expected/tuplesort.out
src/test/regress/sql/aggregates.sql

index ade797159dc7a213655e59f54bf22aebf0257dc4..52d0c3f3ed25cd828b862b6678c8613e0eb3da4d 100644 (file)
@@ -3295,15 +3295,18 @@ create operator class my_op_class for type int using btree family my_op_family a
 -- extension yet.
 explain (verbose, costs off)
 select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
-                                         QUERY PLAN                                         
---------------------------------------------------------------------------------------------
+                                            QUERY PLAN                                            
+--------------------------------------------------------------------------------------------------
  GroupAggregate
    Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2
    Group Key: ft2.c2
-   ->  Foreign Scan on public.ft2
-         Output: c1, c2
-         Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
-(6 rows)
+   ->  Sort
+         Output: c2, c1
+         Sort Key: ft2.c1 USING <^
+         ->  Foreign Scan on public.ft2
+               Output: c2, c1
+               Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
+(9 rows)
 
 -- This should not be pushed either.
 explain (verbose, costs off)
@@ -3329,6 +3332,7 @@ alter extension postgres_fdw add operator public.=^(int, int);
 alter extension postgres_fdw add operator public.>^(int, int);
 alter server loopback options (set extensions 'postgres_fdw');
 -- Now this will be pushed as sort operator is part of the extension.
+alter server loopback options (add fdw_tuple_cost '0.5');
 explain (verbose, costs off)
 select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
                                                                            QUERY PLAN                                                                           
@@ -3345,6 +3349,7 @@ select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6
  {6,16,26,36,46,56,66,76,86,96}
 (1 row)
 
+alter server loopback options (drop fdw_tuple_cost);
 -- This should be pushed too.
 explain (verbose, costs off)
 select * from ft2 order by c1 using operator(public.<^);
@@ -3366,15 +3371,18 @@ alter server loopback options (set extensions 'postgres_fdw');
 -- This will not be pushed as sort operator is now removed from the extension.
 explain (verbose, costs off)
 select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
-                                         QUERY PLAN                                         
---------------------------------------------------------------------------------------------
+                                            QUERY PLAN                                            
+--------------------------------------------------------------------------------------------------
  GroupAggregate
    Output: array_agg(c1 ORDER BY c1 USING <^ NULLS LAST), c2
    Group Key: ft2.c2
-   ->  Foreign Scan on public.ft2
-         Output: c1, c2
-         Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
-(6 rows)
+   ->  Sort
+         Output: c2, c1
+         Sort Key: ft2.c1 USING <^
+         ->  Foreign Scan on public.ft2
+               Output: c2, c1
+               Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE (("C 1" < 100)) AND ((c2 = 6))
+(9 rows)
 
 -- Cleanup
 drop operator class my_op_class using btree;
index b7817c5a415daa6e5373c68e18d2e8a2cb7b455f..2a250ee6324e019ff737ea4cf348be64a4ec980c 100644 (file)
@@ -943,9 +943,11 @@ alter extension postgres_fdw add operator public.>^(int, int);
 alter server loopback options (set extensions 'postgres_fdw');
 
 -- Now this will be pushed as sort operator is part of the extension.
+alter server loopback options (add fdw_tuple_cost '0.5');
 explain (verbose, costs off)
 select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
 select array_agg(c1 order by c1 using operator(public.<^)) from ft2 where c2 = 6 and c1 < 100 group by c2;
+alter server loopback options (drop fdw_tuple_cost);
 
 -- This should be pushed too.
 explain (verbose, costs off)
index c8d7145fe3840cc404eee70ef91febe39d90d3f3..d0a57c7aaee7f2dfebdc004f411a43561c2d9797 100644 (file)
@@ -3666,13 +3666,17 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
                scratch.resnull = &state->resnull;
            }
            argno++;
+
+           Assert(pertrans->numInputs == argno);
        }
-       else if (pertrans->numSortCols == 0)
+       else if (!pertrans->aggsortrequired)
        {
            ListCell   *arg;
 
            /*
-            * Normal transition function without ORDER BY / DISTINCT.
+            * Normal transition function without ORDER BY / DISTINCT or with
+            * ORDER BY / DISTINCT but the planner has given us pre-sorted
+            * input.
             */
            strictargs = trans_fcinfo->args + 1;
 
@@ -3680,6 +3684,13 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
            {
                TargetEntry *source_tle = (TargetEntry *) lfirst(arg);
 
+               /*
+                * Don't initialize args for any ORDER BY clause that might
+                * exist in a presorted aggregate.
+                */
+               if (argno == pertrans->numTransInputs)
+                   break;
+
                /*
                 * Start from 1, since the 0th arg will be the transition
                 * value
@@ -3689,11 +3700,13 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
                                &trans_fcinfo->args[argno + 1].isnull);
                argno++;
            }
+           Assert(pertrans->numTransInputs == argno);
        }
        else if (pertrans->numInputs == 1)
        {
            /*
-            * DISTINCT and/or ORDER BY case, with a single column sorted on.
+            * Non-presorted DISTINCT and/or ORDER BY case, with a single
+            * column sorted on.
             */
            TargetEntry *source_tle =
            (TargetEntry *) linitial(pertrans->aggref->args);
@@ -3705,11 +3718,14 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
                            &state->resnull);
            strictnulls = &state->resnull;
            argno++;
+
+           Assert(pertrans->numInputs == argno);
        }
        else
        {
            /*
-            * DISTINCT and/or ORDER BY case, with multiple columns sorted on.
+            * Non-presorted DISTINCT and/or ORDER BY case, with multiple
+            * columns sorted on.
             */
            Datum      *values = pertrans->sortslot->tts_values;
            bool       *nulls = pertrans->sortslot->tts_isnull;
@@ -3725,8 +3741,8 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
                                &values[argno], &nulls[argno]);
                argno++;
            }
+           Assert(pertrans->numInputs == argno);
        }
-       Assert(pertrans->numInputs == argno);
 
        /*
         * For a strict transfn, nothing happens when there's a NULL input; we
@@ -3748,6 +3764,21 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
                                         state->steps_len - 1);
        }
 
+       /* Handle DISTINCT aggregates which have pre-sorted input */
+       if (pertrans->numDistinctCols > 0 && !pertrans->aggsortrequired)
+       {
+           if (pertrans->numDistinctCols > 1)
+               scratch.opcode = EEOP_AGG_PRESORTED_DISTINCT_MULTI;
+           else
+               scratch.opcode = EEOP_AGG_PRESORTED_DISTINCT_SINGLE;
+
+           scratch.d.agg_presorted_distinctcheck.pertrans = pertrans;
+           scratch.d.agg_presorted_distinctcheck.jumpdistinct = -1;    /* adjust later */
+           ExprEvalPushStep(state, &scratch);
+           adjust_bailout = lappend_int(adjust_bailout,
+                                        state->steps_len - 1);
+       }
+
        /*
         * Call transition function (once for each concurrently evaluated
         * grouping set). Do so for both sort and hash based computations, as
@@ -3808,6 +3839,12 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
                Assert(as->d.agg_deserialize.jumpnull == -1);
                as->d.agg_deserialize.jumpnull = state->steps_len;
            }
+           else if (as->opcode == EEOP_AGG_PRESORTED_DISTINCT_SINGLE ||
+                    as->opcode == EEOP_AGG_PRESORTED_DISTINCT_MULTI)
+           {
+               Assert(as->d.agg_presorted_distinctcheck.jumpdistinct == -1);
+               as->d.agg_presorted_distinctcheck.jumpdistinct = state->steps_len;
+           }
            else
                Assert(false);
        }
@@ -3857,7 +3894,8 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate,
    /*
     * Determine appropriate transition implementation.
     *
-    * For non-ordered aggregates:
+    * For non-ordered aggregates and ORDER BY / DISTINCT aggregates with
+    * presorted input:
     *
     * If the initial value for the transition state doesn't exist in the
     * pg_aggregate table then we will let the first non-NULL value returned
@@ -3887,7 +3925,7 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate,
     * process_ordered_aggregate_{single, multi} and
     * advance_transition_function.
     */
-   if (pertrans->numSortCols == 0)
+   if (!pertrans->aggsortrequired)
    {
        if (pertrans->transtypeByVal)
        {
index 723770fda0ed3b4d0c90d56cd035fbb253126af0..636794ca6f164145d1e46957135624b2b50f9bf8 100644 (file)
@@ -502,6 +502,8 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
        &&CASE_EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYREF,
        &&CASE_EEOP_AGG_PLAIN_TRANS_STRICT_BYREF,
        &&CASE_EEOP_AGG_PLAIN_TRANS_BYREF,
+       &&CASE_EEOP_AGG_PRESORTED_DISTINCT_SINGLE,
+       &&CASE_EEOP_AGG_PRESORTED_DISTINCT_MULTI,
        &&CASE_EEOP_AGG_ORDERED_TRANS_DATUM,
        &&CASE_EEOP_AGG_ORDERED_TRANS_TUPLE,
        &&CASE_EEOP_LAST
@@ -1786,6 +1788,28 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
            EEO_NEXT();
        }
 
+       EEO_CASE(EEOP_AGG_PRESORTED_DISTINCT_SINGLE)
+       {
+           AggStatePerTrans pertrans = op->d.agg_presorted_distinctcheck.pertrans;
+           AggState   *aggstate = castNode(AggState, state->parent);
+
+           if (ExecEvalPreOrderedDistinctSingle(aggstate, pertrans))
+               EEO_NEXT();
+           else
+               EEO_JUMP(op->d.agg_presorted_distinctcheck.jumpdistinct);
+       }
+
+       EEO_CASE(EEOP_AGG_PRESORTED_DISTINCT_MULTI)
+       {
+           AggState   *aggstate = castNode(AggState, state->parent);
+           AggStatePerTrans pertrans = op->d.agg_presorted_distinctcheck.pertrans;
+
+           if (ExecEvalPreOrderedDistinctMulti(aggstate, pertrans))
+               EEO_NEXT();
+           else
+               EEO_JUMP(op->d.agg_presorted_distinctcheck.jumpdistinct);
+       }
+
        /* process single-column ordered aggregate datum */
        EEO_CASE(EEOP_AGG_ORDERED_TRANS_DATUM)
        {
@@ -4402,6 +4426,84 @@ ExecAggTransReparent(AggState *aggstate, AggStatePerTrans pertrans,
    return newValue;
 }
 
+/*
+ * ExecEvalPreOrderedDistinctSingle
+ *     Returns true when the aggregate transition value Datum is distinct
+ *     from the previous input Datum and returns false when the input Datum
+ *     matches the previous input Datum.
+ */
+bool
+ExecEvalPreOrderedDistinctSingle(AggState *aggstate, AggStatePerTrans pertrans)
+{
+   Datum       value = pertrans->transfn_fcinfo->args[1].value;
+   bool        isnull = pertrans->transfn_fcinfo->args[1].isnull;
+
+   if (!pertrans->haslast ||
+       pertrans->lastisnull != isnull ||
+       !DatumGetBool(FunctionCall2Coll(&pertrans->equalfnOne,
+                                       pertrans->aggCollation,
+                                       pertrans->lastdatum, value)))
+   {
+       if (pertrans->haslast && !pertrans->inputtypeByVal)
+           pfree(DatumGetPointer(pertrans->lastdatum));
+
+       pertrans->haslast = true;
+       if (!isnull)
+       {
+           MemoryContext oldContext;
+
+           oldContext = MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory);
+
+           pertrans->lastdatum = datumCopy(value, pertrans->inputtypeByVal,
+                                           pertrans->inputtypeLen);
+
+           MemoryContextSwitchTo(oldContext);
+       }
+       else
+           pertrans->lastdatum = (Datum) 0;
+       pertrans->lastisnull = isnull;
+       return true;
+   }
+
+   return false;
+}
+
+/*
+ * ExecEvalPreOrderedDistinctMulti
+ *     Returns true when the aggregate input is distinct from the previous
+ *     input and returns false when the input matches the previous input.
+ */
+bool
+ExecEvalPreOrderedDistinctMulti(AggState *aggstate, AggStatePerTrans pertrans)
+{
+   ExprContext *tmpcontext = aggstate->tmpcontext;
+
+   for (int i = 0; i < pertrans->numTransInputs; i++)
+   {
+       pertrans->sortslot->tts_values[i] = pertrans->transfn_fcinfo->args[i + 1].value;
+       pertrans->sortslot->tts_isnull[i] = pertrans->transfn_fcinfo->args[i + 1].isnull;
+   }
+
+   ExecClearTuple(pertrans->sortslot);
+   pertrans->sortslot->tts_nvalid = pertrans->numInputs;
+   ExecStoreVirtualTuple(pertrans->sortslot);
+
+   tmpcontext->ecxt_outertuple = pertrans->sortslot;
+   tmpcontext->ecxt_innertuple = pertrans->uniqslot;
+
+   if (!pertrans->haslast ||
+       !ExecQual(pertrans->equalfnMulti, tmpcontext))
+   {
+       if (pertrans->haslast)
+           ExecClearTuple(pertrans->uniqslot);
+
+       pertrans->haslast = true;
+       ExecCopySlot(pertrans->uniqslot, pertrans->sortslot);
+       return true;
+   }
+   return false;
+}
+
 /*
  * Invoke ordered transition function, with a datum argument.
  */
index 2fc606cf29d1179da19c68c5d85cf2085a788a84..96d200e4461b380e11287e30bc95cfc0c2f55609 100644 (file)
@@ -583,7 +583,7 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
    /*
     * Start a fresh sort operation for each DISTINCT/ORDER BY aggregate.
     */
-   if (pertrans->numSortCols > 0)
+   if (pertrans->aggsortrequired)
    {
        /*
         * In case of rescan, maybe there could be an uncompleted sort
@@ -1309,7 +1309,7 @@ finalize_aggregates(AggState *aggstate,
 
        pergroupstate = &pergroup[transno];
 
-       if (pertrans->numSortCols > 0)
+       if (pertrans->aggsortrequired)
        {
            Assert(aggstate->aggstrategy != AGG_HASHED &&
                   aggstate->aggstrategy != AGG_MIXED);
@@ -1323,6 +1323,21 @@ finalize_aggregates(AggState *aggstate,
                                                pertrans,
                                                pergroupstate);
        }
+       else if (pertrans->numDistinctCols > 0 && pertrans->haslast)
+       {
+           pertrans->haslast = false;
+
+           if (pertrans->numDistinctCols == 1)
+           {
+               if (!pertrans->inputtypeByVal && !pertrans->lastisnull)
+                   pfree(DatumGetPointer(pertrans->lastdatum));
+
+               pertrans->lastisnull = false;
+               pertrans->lastdatum = (Datum) 0;
+           }
+           else
+               ExecClearTuple(pertrans->uniqslot);
+       }
    }
 
    /*
@@ -4127,6 +4142,12 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
     * stick them into arrays.  We ignore ORDER BY for an ordered-set agg,
     * however; the agg's transfn and finalfn are responsible for that.
     *
+    * When the planner has set the aggpresorted flag, the input to the
+    * aggregate is already correctly sorted.  For ORDER BY aggregates we can
+    * simply treat these as normal aggregates.  For presorted DISTINCT
+    * aggregates an extra step must be added to remove duplicate consecutive
+    * inputs.
+    *
     * Note that by construction, if there is a DISTINCT clause then the ORDER
     * BY clause is a prefix of it (see transformDistinctClause).
     */
@@ -4134,18 +4155,27 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
    {
        sortlist = NIL;
        numSortCols = numDistinctCols = 0;
+       pertrans->aggsortrequired = false;
+   }
+   else if (aggref->aggpresorted && aggref->aggdistinct == NIL)
+   {
+       sortlist = NIL;
+       numSortCols = numDistinctCols = 0;
+       pertrans->aggsortrequired = false;
    }
    else if (aggref->aggdistinct)
    {
        sortlist = aggref->aggdistinct;
        numSortCols = numDistinctCols = list_length(sortlist);
        Assert(numSortCols >= list_length(aggref->aggorder));
+       pertrans->aggsortrequired = !aggref->aggpresorted;
    }
    else
    {
        sortlist = aggref->aggorder;
        numSortCols = list_length(sortlist);
        numDistinctCols = 0;
+       pertrans->aggsortrequired = (numSortCols > 0);
    }
 
    pertrans->numSortCols = numSortCols;
index b6b6512ef1fce3d1aab50f2b032571127979de6e..bd3965143da270ff007b1af5600950469b1887bb 100644 (file)
@@ -2335,6 +2335,54 @@ llvm_compile_expr(ExprState *state)
                    break;
                }
 
+           case EEOP_AGG_PRESORTED_DISTINCT_SINGLE:
+               {
+                   AggState   *aggstate = castNode(AggState, state->parent);
+                   AggStatePerTrans pertrans = op->d.agg_presorted_distinctcheck.pertrans;
+                   int         jumpdistinct = op->d.agg_presorted_distinctcheck.jumpdistinct;
+
+                   LLVMValueRef v_fn = llvm_pg_func(mod, "ExecEvalPreOrderedDistinctSingle");
+                   LLVMValueRef v_args[2];
+                   LLVMValueRef v_ret;
+
+                   v_args[0] = l_ptr_const(aggstate, l_ptr(StructAggState));
+                   v_args[1] = l_ptr_const(pertrans, l_ptr(StructAggStatePerTransData));
+
+                   v_ret = LLVMBuildCall(b, v_fn, v_args, 2, "");
+                   v_ret = LLVMBuildZExt(b, v_ret, TypeStorageBool, "");
+
+                   LLVMBuildCondBr(b,
+                                   LLVMBuildICmp(b, LLVMIntEQ, v_ret,
+                                                 l_sbool_const(1), ""),
+                                   opblocks[opno + 1],
+                                   opblocks[jumpdistinct]);
+                   break;
+               }
+
+           case EEOP_AGG_PRESORTED_DISTINCT_MULTI:
+               {
+                   AggState   *aggstate = castNode(AggState, state->parent);
+                   AggStatePerTrans pertrans = op->d.agg_presorted_distinctcheck.pertrans;
+                   int         jumpdistinct = op->d.agg_presorted_distinctcheck.jumpdistinct;
+
+                   LLVMValueRef v_fn = llvm_pg_func(mod, "ExecEvalPreOrderedDistinctMulti");
+                   LLVMValueRef v_args[2];
+                   LLVMValueRef v_ret;
+
+                   v_args[0] = l_ptr_const(aggstate, l_ptr(StructAggState));
+                   v_args[1] = l_ptr_const(pertrans, l_ptr(StructAggStatePerTransData));
+
+                   v_ret = LLVMBuildCall(b, v_fn, v_args, 2, "");
+                   v_ret = LLVMBuildZExt(b, v_ret, TypeStorageBool, "");
+
+                   LLVMBuildCondBr(b,
+                                   LLVMBuildICmp(b, LLVMIntEQ, v_ret,
+                                                 l_sbool_const(1), ""),
+                                   opblocks[opno + 1],
+                                   opblocks[jumpdistinct]);
+                   break;
+               }
+
            case EEOP_AGG_ORDERED_TRANS_DATUM:
                build_EvalXFunc(b, mod, "ExecEvalAggOrderedTransDatum",
                                v_state, op, v_econtext);
index b2bda868893dfdfcb768c382df99d1224e9824e3..373471ad27fee85a071207e41a46153216712793 100644 (file)
@@ -103,6 +103,8 @@ void       *referenced_functions[] =
 {
    ExecAggInitGroup,
    ExecAggTransReparent,
+   ExecEvalPreOrderedDistinctSingle,
+   ExecEvalPreOrderedDistinctMulti,
    ExecEvalAggOrderedTransDatum,
    ExecEvalAggOrderedTransTuple,
    ExecEvalArrayCoerce,
index 2a1c923b4a857b2886e026c71289db822b534c88..1fa7fc99b5144cd2dea5fb05a0623e0a65d4e4bb 100644 (file)
@@ -103,6 +103,28 @@ make_canonical_pathkey(PlannerInfo *root,
    return pk;
 }
 
+/*
+ * append_pathkeys
+ *     Append all non-redundant PathKeys in 'source' onto 'target' and
+ *     returns the updated 'target' list.
+ */
+List *
+append_pathkeys(List *target, List *source)
+{
+   ListCell   *lc;
+
+   Assert(target != NIL);
+
+   foreach(lc, source)
+   {
+       PathKey    *pk = lfirst_node(PathKey, lc);
+
+       if (!pathkey_is_redundant(pk, target))
+           target = lappend(target, pk);
+   }
+   return target;
+}
+
 /*
  * pathkey_is_redundant
  *    Is a pathkey redundant with one already in the given list?
@@ -746,7 +768,8 @@ get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
 List *
 get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
                                List *path_pathkeys,
-                               List *group_pathkeys, List *group_clauses)
+                               List *group_pathkeys, List *group_clauses,
+                               List *aggregate_pathkeys)
 {
    Query      *parse = root->parse;
    List       *infos = NIL;
@@ -758,7 +781,10 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
 
    /* always return at least the original pathkeys/clauses */
    info = makeNode(PathKeyInfo);
-   info->pathkeys = pathkeys;
+   if (aggregate_pathkeys != NIL)
+       info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
+   else
+       info->pathkeys = pathkeys;
    info->clauses = clauses;
 
    infos = lappend(infos, info);
@@ -783,7 +809,10 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
                                      n_preordered))
    {
        info = makeNode(PathKeyInfo);
-       info->pathkeys = pathkeys;
+       if (aggregate_pathkeys != NIL)
+           info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
+       else
+           info->pathkeys = pathkeys;
        info->clauses = clauses;
 
        infos = lappend(infos, info);
@@ -822,7 +851,10 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
         * still want to keep the keys reordered to path_pathkeys.
         */
        info = makeNode(PathKeyInfo);
-       info->pathkeys = pathkeys;
+       if (aggregate_pathkeys != NIL)
+           info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
+       else
+           info->pathkeys = pathkeys;
        info->clauses = clauses;
 
        infos = lappend(infos, info);
@@ -850,7 +882,10 @@ get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
 
        /* keep the group keys reordered to match ordering of input path */
        info = makeNode(PathKeyInfo);
-       info->pathkeys = pathkeys;
+       if (aggregate_pathkeys != NIL)
+           info->pathkeys = list_concat_copy(pathkeys, aggregate_pathkeys);
+       else
+           info->pathkeys = pathkeys;
        info->clauses = clauses;
 
        infos = lappend(infos, info);
index e0e357960f2378546bf8382fbb649bc281df2f4a..ab3f7abba19312ec989497900bf94c1f6b33dd60 100644 (file)
@@ -245,7 +245,7 @@ can_minmax_aggs(PlannerInfo *root, List **context)
    foreach(lc, root->agginfos)
    {
        AggInfo    *agginfo = lfirst_node(AggInfo, lc);
-       Aggref     *aggref = agginfo->representative_aggref;
+       Aggref     *aggref = linitial_node(Aggref, agginfo->aggrefs);
        Oid         aggsortop;
        TargetEntry *curTarget;
        MinMaxAggInfo *mminfo;
index 06ad856eac1031693234cd0d2ae8585321acd483..64632db73cdcba13810cf0c1470fb9db3b00a044 100644 (file)
@@ -24,6 +24,7 @@
 #include "access/sysattr.h"
 #include "access/table.h"
 #include "access/xact.h"
+#include "catalog/pg_aggregate.h"
 #include "catalog/pg_constraint.h"
 #include "catalog/pg_inherits.h"
 #include "catalog/pg_proc.h"
@@ -3125,6 +3126,217 @@ reorder_grouping_sets(List *groupingsets, List *sortclause)
    return result;
 }
 
+/*
+ * make_pathkeys_for_groupagg
+ *     Determine the pathkeys for the GROUP BY clause and/or any ordered
+ *     aggregates.  We expect at least one of these here.
+ *
+ * Building the pathkeys for the GROUP BY is simple.  Most of the complexity
+ * involved here comes from calculating the best pathkeys for ordered
+ * aggregates.  We define "best" as the pathkeys that suit the most number of
+ * aggregate functions.  We find these by looking at the first ORDER BY /
+ * DISTINCT aggregate and take the pathkeys for that before searching for
+ * other aggregates that require the same or a more strict variation of the
+ * same pathkeys.  We then repeat that process for any remaining aggregates
+ * with different pathkeys and if we find another set of pathkeys that suits a
+ * larger number of aggregates then we return those pathkeys instead.
+ *
+ * *number_groupby_pathkeys gets set to the number of elements in the returned
+ * list that belong to the GROUP BY clause.  Any elements above this number
+ * must belong to ORDER BY / DISTINCT aggregates.
+ *
+ * When the best pathkeys are found we also mark each Aggref that can use
+ * those pathkeys as aggpresorted = true.
+ */
+static List *
+make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
+                          int *number_groupby_pathkeys)
+{
+   List       *grouppathkeys = NIL;
+   List       *bestpathkeys;
+   Bitmapset  *bestaggs;
+   Bitmapset  *unprocessed_aggs;
+   ListCell   *lc;
+   int         i;
+
+   Assert(groupClause != NIL || root->numOrderedAggs > 0);
+
+   if (groupClause != NIL)
+   {
+       /* no pathkeys possible if there's an unsortable GROUP BY */
+       if (!grouping_is_sortable(groupClause))
+       {
+           *number_groupby_pathkeys = 0;
+           return NIL;
+       }
+
+       grouppathkeys = make_pathkeys_for_sortclauses(root, groupClause,
+                                                     tlist);
+       *number_groupby_pathkeys = list_length(grouppathkeys);
+   }
+   else
+       *number_groupby_pathkeys = 0;
+
+   /*
+    * We can't add pathkeys for ordered aggregates if there are any grouping
+    * sets.  All handling specific to ordered aggregates must be done by the
+    * executor in that case.
+    */
+   if (root->numOrderedAggs == 0 || root->parse->groupingSets != NIL)
+       return grouppathkeys;
+
+   /*
+    * Make a first pass over all AggInfos to collect a Bitmapset containing
+    * the indexes of all AggInfos to be processed below.
+    */
+   unprocessed_aggs = NULL;
+   foreach(lc, root->agginfos)
+   {
+       AggInfo    *agginfo = lfirst_node(AggInfo, lc);
+       Aggref     *aggref = linitial_node(Aggref, agginfo->aggrefs);
+
+       if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
+           continue;
+
+       /* only add aggregates with a DISTINCT or ORDER BY */
+       if (aggref->aggdistinct != NIL || aggref->aggorder != NIL)
+           unprocessed_aggs = bms_add_member(unprocessed_aggs,
+                                             foreach_current_index(lc));
+   }
+
+   /*
+    * Now process all the unprocessed_aggs to find the best set of pathkeys
+    * for the given set of aggregates.
+    *
+    * On the first outer loop here 'bestaggs' will be empty.   We'll populate
+    * this during the first loop using the pathkeys for the very first
+    * AggInfo then taking any stronger pathkeys from any other AggInfos with
+    * a more strict set of compatible pathkeys.  Once the outer loop is
+    * complete, we mark off all the aggregates with compatible pathkeys then
+    * remove those from the unprocessed_aggs and repeat the process to try to
+    * find another set of pathkeys that are suitable for a larger number of
+    * aggregates.  The outer loop will stop when there are not enough
+    * unprocessed aggregates for it to be possible to find a set of pathkeys
+    * to suit a larger number of aggregates.
+    */
+   bestpathkeys = NIL;
+   bestaggs = NULL;
+   while (bms_num_members(unprocessed_aggs) > bms_num_members(bestaggs))
+   {
+       Bitmapset  *aggindexes = NULL;
+       List       *currpathkeys = NIL;
+
+       i = -1;
+       while ((i = bms_next_member(unprocessed_aggs, i)) >= 0)
+       {
+           AggInfo    *agginfo = list_nth_node(AggInfo, root->agginfos, i);
+           Aggref     *aggref = linitial_node(Aggref, agginfo->aggrefs);
+           List       *sortlist;
+
+           if (aggref->aggdistinct != NIL)
+               sortlist = aggref->aggdistinct;
+           else
+               sortlist = aggref->aggorder;
+
+           /*
+            * When not set yet, take the pathkeys from the first unprocessed
+            * aggregate.
+            */
+           if (currpathkeys == NIL)
+           {
+               currpathkeys = make_pathkeys_for_sortclauses(root, sortlist,
+                                                            aggref->args);
+
+               /* include the GROUP BY pathkeys, if they exist */
+               if (grouppathkeys != NIL)
+                   currpathkeys = append_pathkeys(list_copy(grouppathkeys),
+                                                  currpathkeys);
+
+               /* record that we found pathkeys for this aggregate */
+               aggindexes = bms_add_member(aggindexes, i);
+           }
+           else
+           {
+               List       *pathkeys;
+
+               /* now look for a stronger set of matching pathkeys */
+               pathkeys = make_pathkeys_for_sortclauses(root, sortlist,
+                                                        aggref->args);
+
+               /* include the GROUP BY pathkeys, if they exist */
+               if (grouppathkeys != NIL)
+                   pathkeys = append_pathkeys(list_copy(grouppathkeys),
+                                              pathkeys);
+
+               /* are 'pathkeys' compatible or better than 'currpathkeys'? */
+               switch (compare_pathkeys(currpathkeys, pathkeys))
+               {
+                   case PATHKEYS_BETTER2:
+                       /* 'pathkeys' are stronger, use these ones instead */
+                       currpathkeys = pathkeys;
+                       /* FALLTHROUGH */
+
+                   case PATHKEYS_BETTER1:
+                       /* 'pathkeys' are less strict */
+                       /* FALLTHROUGH */
+
+                   case PATHKEYS_EQUAL:
+                       /* mark this aggregate as covered by 'currpathkeys' */
+                       aggindexes = bms_add_member(aggindexes, i);
+                       break;
+
+                   case PATHKEYS_DIFFERENT:
+                       break;
+               }
+           }
+       }
+
+       /* remove the aggregates that we've just processed */
+       unprocessed_aggs = bms_del_members(unprocessed_aggs, aggindexes);
+
+       /*
+        * If this pass included more aggregates than the previous best then
+        * use these ones as the best set.
+        */
+       if (bms_num_members(aggindexes) > bms_num_members(bestaggs))
+       {
+           bestaggs = aggindexes;
+           bestpathkeys = currpathkeys;
+       }
+   }
+
+   /*
+    * Now that we've found the best set of aggregates we can set the
+    * presorted flag to indicate to the executor that it needn't bother
+    * performing a sort for these Aggrefs.  We're able to do this now as
+    * there's no chance of a Hash Aggregate plan as create_grouping_paths
+    * will not mark the GROUP BY as GROUPING_CAN_USE_HASH due to the presence
+    * of ordered aggregates.
+    */
+   i = -1;
+   while ((i = bms_next_member(bestaggs, i)) >= 0)
+   {
+       AggInfo    *agginfo = list_nth_node(AggInfo, root->agginfos, i);
+
+       foreach(lc, agginfo->aggrefs)
+       {
+           Aggref     *aggref = lfirst_node(Aggref, lc);
+
+           aggref->aggpresorted = true;
+       }
+   }
+
+   /*
+    * bestpathkeys includes the GROUP BY pathkeys, so if we found any ordered
+    * aggregates, then return bestpathkeys, otherwise return the
+    * grouppathkeys.
+    */
+   if (bestpathkeys != NIL)
+       return bestpathkeys;
+
+   return grouppathkeys;
+}
+
 /*
  * Compute query_pathkeys and other pathkeys during plan generation
  */
@@ -3137,18 +3349,19 @@ standard_qp_callback(PlannerInfo *root, void *extra)
    List       *activeWindows = qp_extra->activeWindows;
 
    /*
-    * Calculate pathkeys that represent grouping/ordering requirements.  The
-    * sortClause is certainly sort-able, but GROUP BY and DISTINCT might not
-    * be, in which case we just leave their pathkeys empty.
+    * Calculate pathkeys that represent grouping/ordering and/or ordered
+    * aggregate requirements.
     */
-   if (qp_extra->groupClause &&
-       grouping_is_sortable(qp_extra->groupClause))
-       root->group_pathkeys =
-           make_pathkeys_for_sortclauses(root,
-                                         qp_extra->groupClause,
-                                         tlist);
+   if (qp_extra->groupClause != NIL || root->numOrderedAggs > 0)
+       root->group_pathkeys = make_pathkeys_for_groupagg(root,
+                                                         qp_extra->groupClause,
+                                                         tlist,
+                                                         &root->num_groupby_pathkeys);
    else
+   {
        root->group_pathkeys = NIL;
+       root->num_groupby_pathkeys = 0;
+   }
 
    /* We consider only the first (bottom) window in pathkeys logic */
    if (activeWindows != NIL)
@@ -6222,6 +6435,26 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 
    if (can_sort)
    {
+       List       *group_pathkeys;
+       List       *orderAggPathkeys;
+       int         numAggPathkeys;
+
+       numAggPathkeys = list_length(root->group_pathkeys) -
+           root->num_groupby_pathkeys;
+
+       if (numAggPathkeys > 0)
+       {
+           group_pathkeys = list_copy_head(root->group_pathkeys,
+                                           root->num_groupby_pathkeys);
+           orderAggPathkeys = list_copy_tail(root->group_pathkeys,
+                                             root->num_groupby_pathkeys);
+       }
+       else
+       {
+           group_pathkeys = root->group_pathkeys;
+           orderAggPathkeys = NIL;
+       }
+
        /*
         * Use any available suitably-sorted path as input, and also consider
         * sorting the cheapest-total path.
@@ -6234,7 +6467,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 
            List       *pathkey_orderings = NIL;
 
-           List       *group_pathkeys = root->group_pathkeys;
            List       *group_clauses = parse->groupClause;
 
            /* generate alternative group orderings that might be useful */
@@ -6242,7 +6474,8 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
                                                                path->rows,
                                                                path->pathkeys,
                                                                group_pathkeys,
-                                                               group_clauses);
+                                                               group_clauses,
+                                                               orderAggPathkeys);
 
            Assert(list_length(pathkey_orderings) > 0);
 
@@ -6414,7 +6647,8 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
                                                                    path->rows,
                                                                    path->pathkeys,
                                                                    group_pathkeys,
-                                                                   group_clauses);
+                                                                   group_clauses,
+                                                                   orderAggPathkeys);
 
                Assert(list_length(pathkey_orderings) > 0);
 
@@ -6717,6 +6951,26 @@ create_partial_grouping_paths(PlannerInfo *root,
 
    if (can_sort && cheapest_total_path != NULL)
    {
+       List       *group_pathkeys;
+       List       *orderAggPathkeys;
+       int         numAggPathkeys;
+
+       numAggPathkeys = list_length(root->group_pathkeys) -
+           root->num_groupby_pathkeys;
+
+       if (numAggPathkeys > 0)
+       {
+           group_pathkeys = list_copy_head(root->group_pathkeys,
+                                           root->num_groupby_pathkeys);
+           orderAggPathkeys = list_copy_tail(root->group_pathkeys,
+                                             root->num_groupby_pathkeys);
+       }
+       else
+       {
+           group_pathkeys = root->group_pathkeys;
+           orderAggPathkeys = NIL;
+       }
+
        /* This should have been checked previously */
        Assert(parse->hasAggs || parse->groupClause);
 
@@ -6729,10 +6983,7 @@ create_partial_grouping_paths(PlannerInfo *root,
            ListCell   *lc2;
            Path       *path = (Path *) lfirst(lc);
            Path       *path_save = path;
-
            List       *pathkey_orderings = NIL;
-
-           List       *group_pathkeys = root->group_pathkeys;
            List       *group_clauses = parse->groupClause;
 
            /* generate alternative group orderings that might be useful */
@@ -6740,7 +6991,8 @@ create_partial_grouping_paths(PlannerInfo *root,
                                                                path->rows,
                                                                path->pathkeys,
                                                                group_pathkeys,
-                                                               group_clauses);
+                                                               group_clauses,
+                                                               orderAggPathkeys);
 
            Assert(list_length(pathkey_orderings) > 0);
 
@@ -6856,16 +7108,33 @@ create_partial_grouping_paths(PlannerInfo *root,
 
    if (can_sort && cheapest_partial_path != NULL)
    {
+       List       *group_pathkeys;
+       List       *orderAggPathkeys;
+       int         numAggPathkeys;
+
+       numAggPathkeys = list_length(root->group_pathkeys) -
+           root->num_groupby_pathkeys;
+
+       if (numAggPathkeys > 0)
+       {
+           group_pathkeys = list_copy_head(root->group_pathkeys,
+                                           root->num_groupby_pathkeys);
+           orderAggPathkeys = list_copy_tail(root->group_pathkeys,
+                                             root->num_groupby_pathkeys);
+       }
+       else
+       {
+           group_pathkeys = root->group_pathkeys;
+           orderAggPathkeys = NIL;
+       }
+
        /* Similar to above logic, but for partial paths. */
        foreach(lc, input_rel->partial_pathlist)
        {
            ListCell   *lc2;
            Path       *path = (Path *) lfirst(lc);
            Path       *path_original = path;
-
            List       *pathkey_orderings = NIL;
-
-           List       *group_pathkeys = root->group_pathkeys;
            List       *group_clauses = parse->groupClause;
 
            /* generate alternative group orderings that might be useful */
@@ -6873,7 +7142,8 @@ create_partial_grouping_paths(PlannerInfo *root,
                                                                path->rows,
                                                                path->pathkeys,
                                                                group_pathkeys,
-                                                               group_clauses);
+                                                               group_clauses,
+                                                               orderAggPathkeys);
 
            Assert(list_length(pathkey_orderings) > 0);
 
index 5b12937eada4c9c3aa57a89a6012661015245ff1..da89b55402c5aaf8ce6edc13f38f5d2ce9c4d0df 100644 (file)
@@ -225,6 +225,7 @@ preprocess_aggref(Aggref *aggref, PlannerInfo *root)
    {
        AggInfo    *agginfo = list_nth_node(AggInfo, root->agginfos, aggno);
 
+       agginfo->aggrefs = lappend(agginfo->aggrefs, aggref);
        transno = agginfo->transno;
    }
    else
@@ -232,7 +233,7 @@ preprocess_aggref(Aggref *aggref, PlannerInfo *root)
        AggInfo    *agginfo = makeNode(AggInfo);
 
        agginfo->finalfn_oid = aggfinalfn;
-       agginfo->representative_aggref = aggref;
+       agginfo->aggrefs = list_make1(aggref);
        agginfo->shareable = shareable;
 
        aggno = list_length(root->agginfos);
@@ -386,7 +387,7 @@ find_compatible_agg(PlannerInfo *root, Aggref *newagg,
 
        aggno++;
 
-       existingRef = agginfo->representative_aggref;
+       existingRef = linitial_node(Aggref, agginfo->aggrefs);
 
        /* all of the following must be the same or it's no match */
        if (newagg->inputcollid != existingRef->inputcollid ||
@@ -648,7 +649,7 @@ get_agg_clause_costs(PlannerInfo *root, AggSplit aggsplit, AggClauseCosts *costs
    foreach(lc, root->agginfos)
    {
        AggInfo    *agginfo = lfirst_node(AggInfo, lc);
-       Aggref     *aggref = agginfo->representative_aggref;
+       Aggref     *aggref = linitial_node(Aggref, agginfo->aggrefs);
 
        /*
         * Add the appropriate component function execution costs to
index 059cb7097c7637e756d361816e7c1dd1f5e68ad3..fabb5f72076cba4adef566b95cebc01585456645 100644 (file)
@@ -3816,6 +3816,7 @@ transformJsonAggConstructor(ParseState *pstate, JsonAggConstructor *agg_ctor,
        aggref->aggstar = false;
        aggref->aggvariadic = false;
        aggref->aggkind = AGGKIND_NORMAL;
+       aggref->aggpresorted = false;
        /* agglevelsup will be set by transformAggregateCall */
        aggref->aggsplit = AGGSPLIT_SIMPLE; /* planner might change this */
        aggref->location = agg_ctor->location;
index f71a682cd657bf1039856eece4bb2b7b5730c0fa..827989f37995717cece646bcdcede9e60cd6cb10 100644 (file)
@@ -774,6 +774,7 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
        aggref->aggstar = agg_star;
        aggref->aggvariadic = func_variadic;
        aggref->aggkind = aggkind;
+       aggref->aggpresorted = false;
        /* agglevelsup will be set by transformAggregateCall */
        aggref->aggsplit = AGGSPLIT_SIMPLE; /* planner might change this */
        aggref->aggno = -1;     /* planner will set aggno and aggtransno */
index 9c254dafa783ea2b25b2cedfef9864b2c239d138..e51026cb55f865343705b00fd9dfa39951ac01b4 100644 (file)
@@ -57,6 +57,6 @@
  */
 
 /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 202207291
+#define CATALOG_VERSION_NO 202208021
 
 #endif
index 1e3f1bbee8614f56c22766fb0106005c59e96df0..0739b389f3c705660fd7e5b098228e716c166336 100644 (file)
@@ -258,6 +258,8 @@ typedef enum ExprEvalOp
    EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYREF,
    EEOP_AGG_PLAIN_TRANS_STRICT_BYREF,
    EEOP_AGG_PLAIN_TRANS_BYREF,
+   EEOP_AGG_PRESORTED_DISTINCT_SINGLE,
+   EEOP_AGG_PRESORTED_DISTINCT_MULTI,
    EEOP_AGG_ORDERED_TRANS_DATUM,
    EEOP_AGG_ORDERED_TRANS_TUPLE,
 
@@ -659,6 +661,17 @@ typedef struct ExprEvalStep
            int         jumpnull;
        }           agg_plain_pergroup_nullcheck;
 
+       /* for EEOP_AGG_PRESORTED_DISTINCT_{SINGLE,MULTI} */
+       struct
+       {
+           AggStatePerTrans pertrans;
+           ExprContext *aggcontext;
+           int         setno;
+           int         transno;
+           int         setoff;
+           int         jumpdistinct;
+       }           agg_presorted_distinctcheck;
+
        /* for EEOP_AGG_PLAIN_TRANS_[INIT_][STRICT_]{BYVAL,BYREF} */
        /* for EEOP_AGG_ORDERED_TRANS_{DATUM,TUPLE} */
        struct
@@ -868,6 +881,10 @@ extern void ExecAggInitGroup(AggState *aggstate, AggStatePerTrans pertrans, AggS
 extern Datum ExecAggTransReparent(AggState *aggstate, AggStatePerTrans pertrans,
                                  Datum newValue, bool newValueIsNull,
                                  Datum oldValue, bool oldValueIsNull);
+extern bool ExecEvalPreOrderedDistinctSingle(AggState *aggstate,
+                                            AggStatePerTrans pertrans);
+extern bool ExecEvalPreOrderedDistinctMulti(AggState *aggstate,
+                                           AggStatePerTrans pertrans);
 extern void ExecEvalAggOrderedTransDatum(ExprState *state, ExprEvalStep *op,
                                         ExprContext *econtext);
 extern void ExecEvalAggOrderedTransTuple(ExprState *state, ExprEvalStep *op,
index 4d1bd929990c9b3d70bac4a945e0097e82f8fe5a..1229c9d1ab93dd8c92fb35c76bf148804c685638 100644 (file)
@@ -48,6 +48,11 @@ typedef struct AggStatePerTransData
     */
    bool        aggshared;
 
+   /*
+    * True for ORDER BY and DISTINCT Aggrefs that are not aggpresorted.
+    */
+   bool        aggsortrequired;
+
    /*
     * Number of aggregated input columns.  This includes ORDER BY expressions
     * in both the plain-agg and ordered-set cases.  Ordered-set direct args
@@ -136,6 +141,9 @@ typedef struct AggStatePerTransData
    TupleTableSlot *sortslot;   /* current input tuple */
    TupleTableSlot *uniqslot;   /* used for multi-column DISTINCT */
    TupleDesc   sortdesc;       /* descriptor of input tuples */
+   Datum       lastdatum;      /* used for single-column DISTINCT */
+   bool        lastisnull;     /* used for single-column DISTINCT */
+   bool        haslast;        /* got a last value for DISTINCT check */
 
    /*
     * These values are working state that is initialized at the start of an
index e2081db4edbb1227f4ae236de6d2fc2d422779e8..3b065139e68d61952eadf936319ec6609c753949 100644 (file)
@@ -365,6 +365,14 @@ struct PlannerInfo
 
    /* groupClause pathkeys, if any */
    List       *group_pathkeys;
+
+   /*
+    * The number of elements in the group_pathkeys list which belong to the
+    * GROUP BY clause.  Additional ones belong to ORDER BY / DISTINCT
+    * aggregates.
+    */
+   int         num_groupby_pathkeys;
+
    /* pathkeys of bottom window, if any */
    List       *window_pathkeys;
    /* distinctClause pathkeys, if any */
@@ -3134,12 +3142,12 @@ typedef struct AggInfo
    NodeTag     type;
 
    /*
-    * Link to an Aggref expr this state value is for.
+    * List of Aggref exprs that this state value is for.
     *
-    * There can be multiple identical Aggref's sharing the same per-agg. This
-    * points to the first one of them.
+    * There will always be at least one, but there can be multiple identical
+    * Aggref's sharing the same per-agg.
     */
-   Aggref     *representative_aggref;
+   List       *aggrefs;
 
    /* Transition state number for this aggregate */
    int         transno;
index 1fc2fbffa33980a715c2456945e7fc043026735f..3aa96bb685566164fe97a15dd2295886d2d6f6d6 100644 (file)
@@ -357,6 +357,10 @@ typedef struct Param
  * replaced with a single argument representing the partial-aggregate
  * transition values.
  *
+ * aggpresorted is set by the query planner for ORDER BY and DISTINCT
+ * aggregates where the chosen plan provides presorted input for this
+ * aggregate during execution.
+ *
  * aggsplit indicates the expected partial-aggregation mode for the Aggref's
  * parent plan node.  It's always set to AGGSPLIT_SIMPLE in the parser, but
  * the planner might change it to something else.  We use this mainly as
@@ -422,6 +426,9 @@ typedef struct Aggref
    /* aggregate kind (see pg_aggregate.h) */
    char        aggkind;
 
+   /* aggregate input already sorted */
+   bool        aggpresorted pg_node_attr(equal_ignore);
+
    /* > 0 if agg belongs to outer query */
    Index       agglevelsup;
 
index 54ab709c67ce5c0605237a6ff8b3d58fadb6d343..d11cdac7f8d20c648f14b88b30f7100520ae8a73 100644 (file)
@@ -208,7 +208,8 @@ extern int  group_keys_reorder_by_pathkeys(List *pathkeys,
                                           List **group_clauses);
 extern List *get_useful_group_keys_orderings(PlannerInfo *root, double nrows,
                                             List *path_pathkeys,
-                                            List *group_pathkeys, List *group_clauses);
+                                            List *group_pathkeys, List *group_clauses,
+                                            List *aggregate_pathkeys);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
                                            Relids required_outer,
                                            CostSelector cost_criterion,
@@ -255,6 +256,7 @@ extern List *truncate_useless_pathkeys(PlannerInfo *root,
                                       RelOptInfo *rel,
                                       List *pathkeys);
 extern bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel);
+extern List *append_pathkeys(List *target, List *source);
 extern PathKey *make_canonical_pathkey(PlannerInfo *root,
                                       EquivalenceClass *eclass, Oid opfamily,
                                       int strategy, bool nulls_first);
index 601047fa3ddb2ecd2222145b71f81b805622bac0..b2198724e3c95bb5ddec4c8d3a07a059adaef3a1 100644 (file)
@@ -1392,6 +1392,84 @@ ERROR:  column "t1.f1" must appear in the GROUP BY clause or be used in an aggre
 LINE 1: select t1.f1 from t1 left join t2 using (f1) group by f1;
                ^
 drop table t1, t2;
+--
+-- Test planner's selection of pathkeys for ORDER BY aggregates
+--
+-- Ensure we order by four.  This suits the most aggregate functions.
+explain (costs off)
+select sum(two order by two),max(four order by four), min(four order by four)
+from tenk1;
+          QUERY PLAN           
+-------------------------------
+ Aggregate
+   ->  Sort
+         Sort Key: four
+         ->  Seq Scan on tenk1
+(4 rows)
+
+-- Ensure we order by two.  It's a tie between ordering by two and four but
+-- we tiebreak on the aggregate's position.
+explain (costs off)
+select
+  sum(two order by two), max(four order by four),
+  min(four order by four), max(two order by two)
+from tenk1;
+          QUERY PLAN           
+-------------------------------
+ Aggregate
+   ->  Sort
+         Sort Key: two
+         ->  Seq Scan on tenk1
+(4 rows)
+
+-- Similar to above, but tiebreak on ordering by four
+explain (costs off)
+select
+  max(four order by four), sum(two order by two),
+  min(four order by four), max(two order by two)
+from tenk1;
+          QUERY PLAN           
+-------------------------------
+ Aggregate
+   ->  Sort
+         Sort Key: four
+         ->  Seq Scan on tenk1
+(4 rows)
+
+-- Ensure this one orders by ten since there are 3 aggregates that require ten
+-- vs two that suit two and four.
+explain (costs off)
+select
+  max(four order by four), sum(two order by two),
+  min(four order by four), max(two order by two),
+  sum(ten order by ten), min(ten order by ten), max(ten order by ten)
+from tenk1;
+          QUERY PLAN           
+-------------------------------
+ Aggregate
+   ->  Sort
+         Sort Key: ten
+         ->  Seq Scan on tenk1
+(4 rows)
+
+-- Try a case involving a GROUP BY clause where the GROUP BY column is also
+-- part of an aggregate's ORDER BY clause.  We want a sort order that works
+-- for the GROUP BY along with the first and the last aggregate.
+explain (costs off)
+select
+  sum(unique1 order by ten, two), sum(unique1 order by four),
+  sum(unique1 order by two, four)
+from tenk1
+group by ten;
+            QUERY PLAN            
+----------------------------------
+ GroupAggregate
+   Group Key: ten
+   ->  Sort
+         Sort Key: ten, two, four
+         ->  Seq Scan on tenk1
+(5 rows)
+
 --
 -- Test combinations of DISTINCT and/or ORDER BY
 --
@@ -2263,8 +2341,8 @@ NOTICE:  avg_transfn called with 3
 -- shouldn't share states due to the distinctness not matching.
 select my_avg(distinct one),my_sum(one) from (values(1),(3)) t(one);
 NOTICE:  avg_transfn called with 1
-NOTICE:  avg_transfn called with 3
 NOTICE:  avg_transfn called with 1
+NOTICE:  avg_transfn called with 3
 NOTICE:  avg_transfn called with 3
  my_avg | my_sum 
 --------+--------
index a08a3825ff61dcf43e04b26a6296fa5645410c8e..4b41ccf1aa81ada97e2a58c4c0998db39bac413b 100644 (file)
@@ -367,17 +367,17 @@ SELECT c, sum(b order by a) FROM pagg_tab GROUP BY c ORDER BY 1, 2;
          ->  GroupAggregate
                Group Key: pagg_tab.c
                ->  Sort
-                     Sort Key: pagg_tab.c
+                     Sort Key: pagg_tab.c, pagg_tab.a
                      ->  Seq Scan on pagg_tab_p1 pagg_tab
          ->  GroupAggregate
                Group Key: pagg_tab_1.c
                ->  Sort
-                     Sort Key: pagg_tab_1.c
+                     Sort Key: pagg_tab_1.c, pagg_tab_1.a
                      ->  Seq Scan on pagg_tab_p2 pagg_tab_1
          ->  GroupAggregate
                Group Key: pagg_tab_2.c
                ->  Sort
-                     Sort Key: pagg_tab_2.c
+                     Sort Key: pagg_tab_2.c, pagg_tab_2.a
                      ->  Seq Scan on pagg_tab_p3 pagg_tab_2
 (18 rows)
 
@@ -948,34 +948,36 @@ SET max_parallel_workers_per_gather TO 2;
 -- is not partial agg safe.
 EXPLAIN (COSTS OFF)
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
-                                      QUERY PLAN                                      
---------------------------------------------------------------------------------------
- Sort
-   Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
-   ->  Append
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml.a
-               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml.a
-                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_2.a
-               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_2.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-         ->  GroupAggregate
-               Group Key: pagg_tab_ml_5.a
-               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-               ->  Sort
-                     Sort Key: pagg_tab_ml_5.a
-                     ->  Append
-                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-(25 rows)
+                                         QUERY PLAN                                         
+--------------------------------------------------------------------------------------------
+ Gather Merge
+   Workers Planned: 2
+   ->  Sort
+         Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
+         ->  Parallel Append
+               ->  GroupAggregate
+                     Group Key: pagg_tab_ml.a
+                     Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+                     ->  Sort
+                           Sort Key: pagg_tab_ml.a, pagg_tab_ml.c
+                           ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+               ->  GroupAggregate
+                     Group Key: pagg_tab_ml_5.a
+                     Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+                     ->  Sort
+                           Sort Key: pagg_tab_ml_5.a, pagg_tab_ml_5.c
+                           ->  Append
+                                 ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                                 ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+               ->  GroupAggregate
+                     Group Key: pagg_tab_ml_2.a
+                     Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+                     ->  Sort
+                           Sort Key: pagg_tab_ml_2.a, pagg_tab_ml_2.c
+                           ->  Append
+                                 ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                                 ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+(27 rows)
 
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
  a  | sum  |  array_agg  | count 
@@ -994,32 +996,34 @@ SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HA
 -- Without ORDER BY clause, to test Gather at top-most path
 EXPLAIN (COSTS OFF)
 SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3;
-                             QUERY PLAN                              
----------------------------------------------------------------------
- Append
-   ->  GroupAggregate
-         Group Key: pagg_tab_ml.a
-         Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
-         ->  Sort
-               Sort Key: pagg_tab_ml.a
-               ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
-   ->  GroupAggregate
-         Group Key: pagg_tab_ml_2.a
-         Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
-         ->  Sort
-               Sort Key: pagg_tab_ml_2.a
-               ->  Append
-                     ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
-                     ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
-   ->  GroupAggregate
-         Group Key: pagg_tab_ml_5.a
-         Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
-         ->  Sort
-               Sort Key: pagg_tab_ml_5.a
-               ->  Append
-                     ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
-                     ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
-(23 rows)
+                                QUERY PLAN                                 
+---------------------------------------------------------------------------
+ Gather
+   Workers Planned: 2
+   ->  Parallel Append
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml.a
+               Filter: (avg(pagg_tab_ml.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml.a, pagg_tab_ml.c
+                     ->  Seq Scan on pagg_tab_ml_p1 pagg_tab_ml
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_5.a
+               Filter: (avg(pagg_tab_ml_5.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_5.a, pagg_tab_ml_5.c
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p3_s1 pagg_tab_ml_5
+                           ->  Seq Scan on pagg_tab_ml_p3_s2 pagg_tab_ml_6
+         ->  GroupAggregate
+               Group Key: pagg_tab_ml_2.a
+               Filter: (avg(pagg_tab_ml_2.b) < '3'::numeric)
+               ->  Sort
+                     Sort Key: pagg_tab_ml_2.a, pagg_tab_ml_2.c
+                     ->  Append
+                           ->  Seq Scan on pagg_tab_ml_p2_s1 pagg_tab_ml_2
+                           ->  Seq Scan on pagg_tab_ml_p2_s2 pagg_tab_ml_3
+(25 rows)
 
 -- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY
 -- for level 1 only. For subpartitions, GROUP BY clause does not match with
index bdd0969a509a781a1c26fb83db6040e41de26740..748dfdb04d430e368efdb0ba2c6521bbbe615023 100644 (file)
@@ -866,14 +866,14 @@ FROM
    (VALUES (NULL), (3), (1), (NULL), (NULL), (5), (2), (4), (NULL)) foo(bar);
   json_arrayagg  |  json_arrayagg  |  json_arrayagg  |  json_arrayagg  |              json_arrayagg              |              json_arrayagg              |  json_arrayagg  |                                                      json_arrayagg                                                       | json_arrayagg |            json_arrayagg             
 -----------------+-----------------+-----------------+-----------------+-----------------------------------------+-----------------------------------------+-----------------+--------------------------------------------------------------------------------------------------------------------------+---------------+--------------------------------------
- [3, 1, 5, 2, 4] | [3, 1, 5, 2, 4] | [3, 1, 5, 2, 4] | [3, 1, 5, 2, 4] | [null, 3, 1, null, null, 5, 2, 4, null] | [null, 3, 1, null, null, 5, 2, 4, null] | [{"bar":null}, +| [{"bar": null}, {"bar": 3}, {"bar": 1}, {"bar": null}, {"bar": null}, {"bar": 5}, {"bar": 2}, {"bar": 4}, {"bar": null}] | [{"bar":3},  +| [{"bar": 3}, {"bar": 4}, {"bar": 5}]
-                 |                 |                 |                 |                                         |                                         |  {"bar":3},    +|                                                                                                                          |  {"bar":4},  +| 
-                 |                 |                 |                 |                                         |                                         |  {"bar":1},    +|                                                                                                                          |  {"bar":5}]   | 
+ [1, 2, 3, 4, 5] | [1, 2, 3, 4, 5] | [1, 2, 3, 4, 5] | [1, 2, 3, 4, 5] | [1, 2, 3, 4, 5, null, null, null, null] | [1, 2, 3, 4, 5, null, null, null, null] | [{"bar":1},    +| [{"bar": 1}, {"bar": 2}, {"bar": 3}, {"bar": 4}, {"bar": 5}, {"bar": null}, {"bar": null}, {"bar": null}, {"bar": null}] | [{"bar":3},  +| [{"bar": 3}, {"bar": 4}, {"bar": 5}]
+                 |                 |                 |                 |                                         |                                         |  {"bar":2},    +|                                                                                                                          |  {"bar":4},  +| 
+                 |                 |                 |                 |                                         |                                         |  {"bar":3},    +|                                                                                                                          |  {"bar":5}]   | 
+                 |                 |                 |                 |                                         |                                         |  {"bar":4},    +|                                                                                                                          |               | 
+                 |                 |                 |                 |                                         |                                         |  {"bar":5},    +|                                                                                                                          |               | 
+                 |                 |                 |                 |                                         |                                         |  {"bar":null}, +|                                                                                                                          |               | 
                  |                 |                 |                 |                                         |                                         |  {"bar":null}, +|                                                                                                                          |               | 
                  |                 |                 |                 |                                         |                                         |  {"bar":null}, +|                                                                                                                          |               | 
-                 |                 |                 |                 |                                         |                                         |  {"bar":5},    +|                                                                                                                          |               | 
-                 |                 |                 |                 |                                         |                                         |  {"bar":2},    +|                                                                                                                          |               | 
-                 |                 |                 |                 |                                         |                                         |  {"bar":4},    +|                                                                                                                          |               | 
                  |                 |                 |                 |                                         |                                         |  {"bar":null}]  |                                                                                                                          |               | 
 (1 row)
 
index 418f296a3f9e40d056cbd98ee487e84b9694352a..a2efa179fc19107e029d7ce74c9c7f2ece668100 100644 (file)
@@ -622,15 +622,18 @@ EXPLAIN (COSTS OFF) :qry;
          ->  GroupAggregate
                Group Key: a.col12
                Filter: (count(*) > 1)
-               ->  Merge Join
-                     Merge Cond: (a.col12 = b.col12)
-                     ->  Sort
-                           Sort Key: a.col12 DESC
-                           ->  Seq Scan on test_mark_restore a
-                     ->  Sort
-                           Sort Key: b.col12 DESC
-                           ->  Seq Scan on test_mark_restore b
-(14 rows)
+               ->  Incremental Sort
+                     Sort Key: a.col12 DESC, a.col1
+                     Presorted Key: a.col12
+                     ->  Merge Join
+                           Merge Cond: (a.col12 = b.col12)
+                           ->  Sort
+                                 Sort Key: a.col12 DESC
+                                 ->  Seq Scan on test_mark_restore a
+                           ->  Sort
+                                 Sort Key: b.col12 DESC
+                                 ->  Seq Scan on test_mark_restore b
+(17 rows)
 
 :qry;
  col12 | count | count | count | count | count 
@@ -658,15 +661,18 @@ EXPLAIN (COSTS OFF) :qry;
          ->  GroupAggregate
                Group Key: a.col12
                Filter: (count(*) > 1)
-               ->  Merge Join
-                     Merge Cond: (a.col12 = b.col12)
-                     ->  Sort
-                           Sort Key: a.col12 DESC
-                           ->  Seq Scan on test_mark_restore a
-                     ->  Sort
-                           Sort Key: b.col12 DESC
-                           ->  Seq Scan on test_mark_restore b
-(14 rows)
+               ->  Incremental Sort
+                     Sort Key: a.col12 DESC, a.col1
+                     Presorted Key: a.col12
+                     ->  Merge Join
+                           Merge Cond: (a.col12 = b.col12)
+                           ->  Sort
+                                 Sort Key: a.col12 DESC
+                                 ->  Seq Scan on test_mark_restore a
+                           ->  Sort
+                                 Sort Key: b.col12 DESC
+                                 ->  Seq Scan on test_mark_restore b
+(17 rows)
 
 :qry;
  col12 | count | count | count | count | count 
index c6e0d7ba2b71f6ea0c7145b1063919b1804d8eff..4540a06f454e2ba3a96559da5ebc6db238357d45 100644 (file)
@@ -503,6 +503,49 @@ select t1.f1 from t1 left join t2 using (f1) group by f1;
 
 drop table t1, t2;
 
+--
+-- Test planner's selection of pathkeys for ORDER BY aggregates
+--
+
+-- Ensure we order by four.  This suits the most aggregate functions.
+explain (costs off)
+select sum(two order by two),max(four order by four), min(four order by four)
+from tenk1;
+
+-- Ensure we order by two.  It's a tie between ordering by two and four but
+-- we tiebreak on the aggregate's position.
+explain (costs off)
+select
+  sum(two order by two), max(four order by four),
+  min(four order by four), max(two order by two)
+from tenk1;
+
+-- Similar to above, but tiebreak on ordering by four
+explain (costs off)
+select
+  max(four order by four), sum(two order by two),
+  min(four order by four), max(two order by two)
+from tenk1;
+
+-- Ensure this one orders by ten since there are 3 aggregates that require ten
+-- vs two that suit two and four.
+explain (costs off)
+select
+  max(four order by four), sum(two order by two),
+  min(four order by four), max(two order by two),
+  sum(ten order by ten), min(ten order by ten), max(ten order by ten)
+from tenk1;
+
+-- Try a case involving a GROUP BY clause where the GROUP BY column is also
+-- part of an aggregate's ORDER BY clause.  We want a sort order that works
+-- for the GROUP BY along with the first and the last aggregate.
+explain (costs off)
+select
+  sum(unique1 order by ten, two), sum(unique1 order by four),
+  sum(unique1 order by two, four)
+from tenk1
+group by ten;
+
 --
 -- Test combinations of DISTINCT and/or ORDER BY
 --