aggregate(DISTINCT ...) works, per SQL spec.

author Tom Lane <tgl@sss.pgh.pa.us>

Mon, 13 Dec 1999 01:27:21 +0000 (01:27 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Mon, 13 Dec 1999 01:27:21 +0000 (01:27 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Mon, 13 Dec 1999 01:27:21 +0000 (01:27 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Mon, 13 Dec 1999 01:27:21 +0000 (01:27 +0000)
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c

index 0956af455f1347061cbf3faee2e5c92fe82bb501..0a95c92347fb858ffd470245ef1203807364f51c 100644 (file)
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -3,15 +3,35 @@
   * nodeAgg.c
   *   Routines to handle aggregate nodes.
   *
- * Copyright (c) 1994, Regents of the University of California
+ *   ExecAgg evaluates each aggregate in the following steps: (initcond1,
+ *   initcond2 are the initial values and sfunc1, sfunc2, and finalfunc are
+ *   the transition functions.)
+ *
+ *      value1 = initcond1
+ *      value2 = initcond2
+ *      foreach input_value do
+ *         value1 = sfunc1(value1, input_value)
+ *         value2 = sfunc2(value2)
+ *      value1 = finalfunc(value1, value2)
+ *
+ *   If initcond1 is NULL then the first non-NULL input_value is
+ *   assigned directly to value1.  sfunc1 isn't applied until value1
+ *   is non-NULL.
+ *
+ *   sfunc1 is never applied when the current tuple's input_value is NULL.
+ *   sfunc2 is applied for each tuple if the aggref is marked 'usenulls',
+ *   otherwise it is only applied when input_value is not NULL.
+ *   (usenulls was formerly used for COUNT(*), but is no longer needed for
+ *   that purpose; as of 10/1999 the support for usenulls is dead code.
+ *   I have not removed it because it seems like a potentially useful
+ *   feature for user-defined aggregates.  We'd just need to add a
+ *   flag column to pg_aggregate and a parameter to CREATE AGGREGATE...)
   *
   *
- * NOTE
- *   The implementation of Agg node has been reworked to handle legal
- *   SQL aggregates. (Do not expect POSTQUEL semantics.)    -- ay 2/95
+ * Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.59 1999/10/30 02:35:14 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.60 1999/12/13 01:26:52 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -20,11 +40,15 @@
  
  #include "access/heapam.h"
  #include "catalog/pg_aggregate.h"
+#include "catalog/pg_operator.h"
  #include "executor/executor.h"
  #include "executor/nodeAgg.h"
  #include "optimizer/clauses.h"
+#include "parser/parse_expr.h"
+#include "parser/parse_oper.h"
  #include "parser/parse_type.h"
  #include "utils/syscache.h"
+#include "utils/tuplesort.h"
  
  /*
   * AggStatePerAggData - per-aggregate working state for the Agg scan
@@ -36,6 +60,9 @@ typedef struct AggStatePerAggData
      * thereafter:
      */
  
+   /* Link to Aggref node this working state is for */
+   Aggref     *aggref;
+
     /* Oids of transfer functions */
     Oid         xfn1_oid;
     Oid         xfn2_oid;
@@ -47,6 +74,18 @@ typedef struct AggStatePerAggData
     FmgrInfo    xfn1;
     FmgrInfo    xfn2;
     FmgrInfo    finalfn;
+   /*
+    * Type of input data and Oid of sort operator to use for it;
+    * only set/used when aggregate has DISTINCT flag.  (These are not
+    * used directly by nodeAgg, but must be passed to the Tuplesort object.)
+    */
+   Oid         inputType;
+   Oid         sortOperator;
+   /*
+    * fmgr lookup data for input type's equality operator --- only set/used
+    * when aggregate has DISTINCT flag.
+    */
+   FmgrInfo    equalfn;
     /*
      * initial values from pg_aggregate entry
      */
@@ -55,19 +94,29 @@ typedef struct AggStatePerAggData
     bool        initValue1IsNull,
                 initValue2IsNull;
     /*
-    * We need the len and byval info for the agg's transition status types
-    * in order to know how to copy/delete values.
+    * We need the len and byval info for the agg's input and transition
+    * data types in order to know how to copy/delete values.
      */
-   int         transtype1Len,
+   int         inputtypeLen,
+               transtype1Len,
                 transtype2Len;
-   bool        transtype1ByVal,
+   bool        inputtypeByVal,
+               transtype1ByVal,
                 transtype2ByVal;
  
     /*
      * These values are working state that is initialized at the start
-    * of an input tuple group and updated for each input tuple:
+    * of an input tuple group and updated for each input tuple.
+    *
+    * For a simple (non DISTINCT) aggregate, we just feed the input values
+    * straight to the transition functions.  If it's DISTINCT, we pass the
+    * input values into a Tuplesort object; then at completion of the input
+    * tuple group, we scan the sorted values, eliminate duplicates, and run
+    * the transition functions on the rest.
      */
  
+   Tuplesortstate *sortstate;  /* sort object, if a DISTINCT agg */
+
     Datum       value1,         /* current transfer values 1 and 2 */
                 value2;
     bool        value1IsNull,
@@ -82,28 +131,248 @@ typedef struct AggStatePerAggData
  } AggStatePerAggData;
  
  
+static void initialize_aggregate (AggStatePerAgg peraggstate);
+static void advance_transition_functions (AggStatePerAgg peraggstate,
+                                         Datum newVal, bool isNull);
+static void finalize_aggregate (AggStatePerAgg peraggstate,
+                               Datum *resultVal, bool *resultIsNull);
+static Datum copyDatum(Datum val, int typLen, bool typByVal);
+
+
  /*
- * Helper routine to make a copy of a Datum.
- *
- * NB: input had better not be a NULL; might cause null-pointer dereference.
+ * Initialize one aggregate for a new set of input values.
   */
-static Datum
-copyDatum(Datum val, int typLen, bool typByVal)
+static void
+initialize_aggregate (AggStatePerAgg peraggstate)
  {
-   if (typByVal)
-       return val;
+   Aggref         *aggref = peraggstate->aggref;
+
+   /*
+    * Start a fresh sort operation for each DISTINCT aggregate.
+    */
+   if (aggref->aggdistinct)
+   {
+       /* In case of rescan, maybe there could be an uncompleted
+        * sort operation?  Clean it up if so.
+        */
+       if (peraggstate->sortstate)
+           tuplesort_end(peraggstate->sortstate);
+
+       peraggstate->sortstate =
+           tuplesort_begin_datum(peraggstate->inputType,
+                                 peraggstate->sortOperator,
+                                 false);
+   }
+
+   /*
+    * (Re)set value1 and value2 to their initial values.
+    */
+   if (OidIsValid(peraggstate->xfn1_oid) &&
+       ! peraggstate->initValue1IsNull)
+       peraggstate->value1 = copyDatum(peraggstate->initValue1, 
+                                       peraggstate->transtype1Len,
+                                       peraggstate->transtype1ByVal);
+   else
+       peraggstate->value1 = (Datum) NULL;
+   peraggstate->value1IsNull = peraggstate->initValue1IsNull;
+
+   if (OidIsValid(peraggstate->xfn2_oid) &&
+       ! peraggstate->initValue2IsNull)
+       peraggstate->value2 = copyDatum(peraggstate->initValue2, 
+                                       peraggstate->transtype2Len,
+                                       peraggstate->transtype2ByVal);
     else
+       peraggstate->value2 = (Datum) NULL;
+   peraggstate->value2IsNull = peraggstate->initValue2IsNull;
+
+   /* ------------------------------------------
+    * If the initial value for the first transition function
+    * doesn't exist in the pg_aggregate table then we will let
+    * the first value returned from the outer procNode become
+    * the initial value. (This is useful for aggregates like
+    * max{} and min{}.)  The noInitValue flag signals that we
+    * still need to do this.
+    * ------------------------------------------
+    */
+   peraggstate->noInitValue = peraggstate->initValue1IsNull;
+}
+
+/*
+ * Given a new input value, advance the transition functions of an aggregate.
+ *
+ * Note: if the agg does not have usenulls set, null inputs will be filtered
+ * out before reaching here.
+ */
+static void
+advance_transition_functions (AggStatePerAgg peraggstate,
+                             Datum newVal, bool isNull)
+{
+   Datum       args[2];
+
+   if (OidIsValid(peraggstate->xfn1_oid) && !isNull)
     {
-       char   *newVal;
+       if (peraggstate->noInitValue)
+       {
+           /*
+            * value1 has not been initialized. This is the first non-NULL
+            * input value. We use it as the initial value for value1.
+            *
+            * XXX We assume, without having checked, that the agg's input
+            * type is binary-compatible with its transtype1!
+            *
+            * We have to copy the datum since the tuple from which it came
+            * will be freed on the next iteration of the scan.
+            */
+           peraggstate->value1 = copyDatum(newVal,
+                                           peraggstate->transtype1Len,
+                                           peraggstate->transtype1ByVal);
+           peraggstate->value1IsNull = false;
+           peraggstate->noInitValue = false;
+       }
+       else
+       {
+           /* apply transition function 1 */
+           args[0] = peraggstate->value1;
+           args[1] = newVal;
+           newVal = (Datum) fmgr_c(&peraggstate->xfn1,
+                                   (FmgrValues *) args,
+                                   &isNull);
+           if (! peraggstate->transtype1ByVal)
+               pfree(peraggstate->value1);
+           peraggstate->value1 = newVal;
+       }
+   }
  
-       if (typLen == -1)       /* variable length type? */
-           typLen = VARSIZE((struct varlena *) DatumGetPointer(val));
-       newVal = (char *) palloc(typLen);
-       memcpy(newVal, DatumGetPointer(val), typLen);
-       return PointerGetDatum(newVal);
+   if (OidIsValid(peraggstate->xfn2_oid))
+   {
+       /* apply transition function 2 */
+       args[0] = peraggstate->value2;
+       isNull = false;         /* value2 cannot be null, currently */
+       newVal = (Datum) fmgr_c(&peraggstate->xfn2,
+                               (FmgrValues *) args,
+                               &isNull);
+       if (! peraggstate->transtype2ByVal)
+           pfree(peraggstate->value2);
+       peraggstate->value2 = newVal;
     }
  }
  
+/*
+ * Compute the final value of one aggregate.
+ */
+static void
+finalize_aggregate (AggStatePerAgg peraggstate,
+                   Datum *resultVal, bool *resultIsNull)
+{
+   Aggref     *aggref = peraggstate->aggref;
+   char       *args[2];
+
+   /*
+    * If it's a DISTINCT aggregate, all we've done so far is to stuff the
+    * input values into the sort object.  Complete the sort, then run
+    * the transition functions on the non-duplicate values.  Note that
+    * DISTINCT always suppresses nulls, per SQL spec, regardless of usenulls.
+    */
+   if (aggref->aggdistinct)
+   {
+       Datum       oldVal = (Datum) 0;
+       bool        haveOldVal = false;
+       Datum       newVal;
+       bool        isNull;
+
+       tuplesort_performsort(peraggstate->sortstate);
+       while (tuplesort_getdatum(peraggstate->sortstate, true,
+                                 &newVal, &isNull))
+       {
+           if (isNull)
+               continue;
+           if (haveOldVal)
+           {
+               Datum   equal;
+
+               equal = (Datum) (*fmgr_faddr(&peraggstate->equalfn)) (oldVal,
+                                                                     newVal);
+               if (DatumGetInt32(equal) != 0)
+               {
+                   if (! peraggstate->inputtypeByVal)
+                       pfree(DatumGetPointer(newVal));
+                   continue;
+               }
+           }
+           advance_transition_functions(peraggstate, newVal, false);
+           if (haveOldVal && ! peraggstate->inputtypeByVal)
+               pfree(DatumGetPointer(oldVal));
+           oldVal = newVal;
+           haveOldVal = true;
+       }
+       if (haveOldVal && ! peraggstate->inputtypeByVal)
+           pfree(DatumGetPointer(oldVal));
+       tuplesort_end(peraggstate->sortstate);
+       peraggstate->sortstate = NULL;
+   }
+
+   /*
+    * Now apply the agg's finalfn, or substitute the appropriate transition
+    * value if there is no finalfn.
+    *
+    * XXX For now, only apply finalfn if we got at least one
+    * non-null input value.  This prevents zero divide in AVG().
+    * If we had cleaner handling of null inputs/results in functions,
+    * we could probably take out this hack and define the result
+    * for no inputs as whatever finalfn returns for null input.
+    */
+   if (OidIsValid(peraggstate->finalfn_oid) &&
+       ! peraggstate->noInitValue)
+   {
+       if (peraggstate->finalfn.fn_nargs > 1)
+       {
+           args[0] = (char *) peraggstate->value1;
+           args[1] = (char *) peraggstate->value2;
+       }
+       else if (OidIsValid(peraggstate->xfn1_oid))
+           args[0] = (char *) peraggstate->value1;
+       else if (OidIsValid(peraggstate->xfn2_oid))
+           args[0] = (char *) peraggstate->value2;
+       else
+           elog(ERROR, "ExecAgg: no valid transition functions??");
+       *resultIsNull = false;
+       *resultVal = (Datum) fmgr_c(&peraggstate->finalfn,
+                                   (FmgrValues *) args,
+                                   resultIsNull);
+   }
+   else if (OidIsValid(peraggstate->xfn1_oid))
+   {
+       /* Return value1 */
+       *resultVal = peraggstate->value1;
+       *resultIsNull = peraggstate->value1IsNull;
+       /* prevent pfree below */
+       peraggstate->value1IsNull = true;
+   }
+   else if (OidIsValid(peraggstate->xfn2_oid))
+   {
+       /* Return value2 */
+       *resultVal = peraggstate->value2;
+       *resultIsNull = peraggstate->value2IsNull;
+       /* prevent pfree below */
+       peraggstate->value2IsNull = true;
+   }
+   else
+       elog(ERROR, "ExecAgg: no valid transition functions??");
+
+   /*
+    * Release any per-group working storage, unless we're passing
+    * it back as the result of the aggregate.
+    */
+   if (OidIsValid(peraggstate->xfn1_oid) &&
+       ! peraggstate->value1IsNull &&
+       ! peraggstate->transtype1ByVal)
+       pfree(peraggstate->value1);
+   
+   if (OidIsValid(peraggstate->xfn2_oid) &&
+       ! peraggstate->value2IsNull &&
+       ! peraggstate->transtype2ByVal)
+       pfree(peraggstate->value2);
+}
  
  /* ---------------------------------------
   *
@@ -118,30 +387,6 @@ copyDatum(Datum val, int typLen, bool typByVal)
   *   the expression context to be used when ExecProject evaluates the
   *   result tuple.
   *
- *   ExecAgg evaluates each aggregate in the following steps: (initcond1,
- *   initcond2 are the initial values and sfunc1, sfunc2, and finalfunc are
- *   the transition functions.)
- *
- *      value1 = initcond1
- *      value2 = initcond2
- *      foreach tuple do
- *         value1 = sfunc1(value1, aggregated_value)
- *         value2 = sfunc2(value2)
- *      value1 = finalfunc(value1, value2)
- *
- *   If initcond1 is NULL then the first non-NULL aggregated_value is
- *   assigned directly to value1.  sfunc1 isn't applied until value1
- *   is non-NULL.
- *
- *   sfunc1 is never applied when the current tuple's aggregated_value
- *   is NULL.  sfunc2 is applied for each tuple if the aggref is marked
- *   'usenulls', otherwise it is only applied when aggregated_value is
- *   not NULL.  (usenulls was formerly used for COUNT(*), but is no longer
- *   needed for that purpose; as of 10/1999 the support for usenulls is
- *   dead code.  I have not removed it because it seems like a potentially
- *   useful feature for user-defined aggregates.  We'd just need to add a
- *   flag column to pg_aggregate and a parameter to CREATE AGGREGATE...)
- *
   *   If the outer subplan is a Group node, ExecAgg returns as many tuples
   *   as there are groups.
   *
@@ -161,7 +406,6 @@ ExecAgg(Agg *node)
     TupleTableSlot *resultSlot;
     HeapTuple   inputTuple;
     int         aggno;
-   List       *alist;
     bool        isDone;
     bool        isNull;
  
@@ -190,42 +434,11 @@ ExecAgg(Agg *node)
         /*
          * Initialize working state for a new input tuple group
          */
-       aggno = -1;
-       foreach(alist, aggstate->aggs)
+       for (aggno = 0; aggno < aggstate->numaggs; aggno++)
         {
-           AggStatePerAgg  peraggstate = &peragg[++aggno];
+           AggStatePerAgg  peraggstate = &peragg[aggno];
  
-           /*
-            * (Re)set value1 and value2 to their initial values.
-            */
-           if (OidIsValid(peraggstate->xfn1_oid) &&
-               ! peraggstate->initValue1IsNull)
-               peraggstate->value1 = copyDatum(peraggstate->initValue1, 
-                                               peraggstate->transtype1Len,
-                                               peraggstate->transtype1ByVal);
-           else
-               peraggstate->value1 = (Datum) NULL;
-           peraggstate->value1IsNull = peraggstate->initValue1IsNull;
-
-           if (OidIsValid(peraggstate->xfn2_oid) &&
-               ! peraggstate->initValue2IsNull)
-               peraggstate->value2 = copyDatum(peraggstate->initValue2, 
-                                               peraggstate->transtype2Len,
-                                               peraggstate->transtype2ByVal);
-           else
-               peraggstate->value2 = (Datum) NULL;
-           peraggstate->value2IsNull = peraggstate->initValue2IsNull;
-
-           /* ------------------------------------------
-            * If the initial value for the first transition function
-            * doesn't exist in the pg_aggregate table then we will let
-            * the first value returned from the outer procNode become
-            * the initial value. (This is useful for aggregates like
-            * max{} and min{}.)  The noInitValue flag signals that we
-            * still need to do this.
-            * ------------------------------------------
-            */
-           peraggstate->noInitValue = peraggstate->initValue1IsNull;
+           initialize_aggregate(peraggstate);
         }
  
         inputTuple = NULL;      /* no saved input tuple yet */
@@ -243,13 +456,11 @@ ExecAgg(Agg *node)
                 break;
             econtext->ecxt_scantuple = outerslot;
  
-           aggno = -1;
-           foreach(alist, aggstate->aggs)
+           for (aggno = 0; aggno < aggstate->numaggs; aggno++)
             {
-               Aggref         *aggref = (Aggref *) lfirst(alist);
-               AggStatePerAgg  peraggstate = &peragg[++aggno];
+               AggStatePerAgg  peraggstate = &peragg[aggno];
+               Aggref         *aggref = peraggstate->aggref;
                 Datum           newVal;
-               Datum           args[2];
  
                 newVal = ExecEvalExpr(aggref->target, econtext,
                                       &isNull, &isDone);
@@ -257,53 +468,12 @@ ExecAgg(Agg *node)
                 if (isNull && !aggref->usenulls)
                     continue;   /* ignore this tuple for this agg */
  
-               if (OidIsValid(peraggstate->xfn1_oid) && !isNull)
-               {
-                   if (peraggstate->noInitValue)
-                   {
-                       /*
-                        * value1 has not been initialized. This is the
-                        * first non-NULL input value. We use it as the
-                        * initial value for value1.  XXX We assume,
-                        * without having checked, that the agg's input type
-                        * is binary-compatible with its transtype1!
-                        *
-                        * We have to copy the datum since the tuple from
-                        * which it came will be freed on the next iteration
-                        * of the scan.  
-                        */
-                       peraggstate->value1 = copyDatum(newVal,
-                                               peraggstate->transtype1Len,
-                                               peraggstate->transtype1ByVal);
-                       peraggstate->value1IsNull = false;
-                       peraggstate->noInitValue = false;
-                   }
-                   else
-                   {
-                       /* apply transition function 1 */
-                       args[0] = peraggstate->value1;
-                       args[1] = newVal;
-                       newVal = (Datum) fmgr_c(&peraggstate->xfn1,
-                                               (FmgrValues *) args,
-                                               &isNull);
-                       if (! peraggstate->transtype1ByVal)
-                           pfree(peraggstate->value1);
-                       peraggstate->value1 = newVal;
-                   }
-               }
-
-               if (OidIsValid(peraggstate->xfn2_oid))
-               {
-                   /* apply transition function 2 */
-                   args[0] = peraggstate->value2;
-                   isNull = false; /* value2 cannot be null, currently */
-                   newVal = (Datum) fmgr_c(&peraggstate->xfn2,
-                                           (FmgrValues *) args,
-                                           &isNull);
-                   if (! peraggstate->transtype2ByVal)
-                       pfree(peraggstate->value2);
-                   peraggstate->value2 = newVal;
-               }
+               if (aggref->aggdistinct)
+                   tuplesort_putdatum(peraggstate->sortstate,
+                                      newVal, isNull);
+               else
+                   advance_transition_functions(peraggstate,
+                                                newVal, isNull);
             }
  
             /*
@@ -320,70 +490,12 @@ ExecAgg(Agg *node)
          * Done scanning input tuple group.
          * Finalize each aggregate calculation.
          */
-       aggno = -1;
-       foreach(alist, aggstate->aggs)
+       for (aggno = 0; aggno < aggstate->numaggs; aggno++)
         {
-           AggStatePerAgg  peraggstate = &peragg[++aggno];
-           char           *args[2];
-
-           /*
-            * XXX For now, only apply finalfn if we got at least one
-            * non-null input value.  This prevents zero divide in AVG().
-            * If we had cleaner handling of null inputs/results in functions,
-            * we could probably take out this hack and define the result
-            * for no inputs as whatever finalfn returns for null input.
-            */
-           if (OidIsValid(peraggstate->finalfn_oid) &&
-               ! peraggstate->noInitValue)
-           {
-               if (peraggstate->finalfn.fn_nargs > 1)
-               {
-                   args[0] = (char *) peraggstate->value1;
-                   args[1] = (char *) peraggstate->value2;
-               }
-               else if (OidIsValid(peraggstate->xfn1_oid))
-                   args[0] = (char *) peraggstate->value1;
-               else if (OidIsValid(peraggstate->xfn2_oid))
-                   args[0] = (char *) peraggstate->value2;
-               else
-                   elog(ERROR, "ExecAgg: no valid transition functions??");
-               aggnulls[aggno] = false;
-               aggvalues[aggno] = (Datum) fmgr_c(&peraggstate->finalfn,
-                                                 (FmgrValues *) args,
-                                                 &(aggnulls[aggno]));
-           }
-           else if (OidIsValid(peraggstate->xfn1_oid))
-           {
-               /* Return value1 */
-               aggvalues[aggno] = peraggstate->value1;
-               aggnulls[aggno] = peraggstate->value1IsNull;
-               /* prevent pfree below */
-               peraggstate->value1IsNull = true;
-           }
-           else if (OidIsValid(peraggstate->xfn2_oid))
-           {
-               /* Return value2 */
-               aggvalues[aggno] = peraggstate->value2;
-               aggnulls[aggno] = peraggstate->value2IsNull;
-               /* prevent pfree below */
-               peraggstate->value2IsNull = true;
-           }
-           else
-               elog(ERROR, "ExecAgg: no valid transition functions??");
-
-           /*
-            * Release any per-group working storage, unless we're passing
-            * it back as the result of the aggregate.
-            */
-           if (OidIsValid(peraggstate->xfn1_oid) &&
-               ! peraggstate->value1IsNull &&
-               ! peraggstate->transtype1ByVal)
-               pfree(peraggstate->value1);
+           AggStatePerAgg  peraggstate = &peragg[aggno];
  
-           if (OidIsValid(peraggstate->xfn2_oid) &&
-               ! peraggstate->value2IsNull &&
-               ! peraggstate->transtype2ByVal)
-               pfree(peraggstate->value2);
+           finalize_aggregate(peraggstate,
+                              & aggvalues[aggno], & aggnulls[aggno]);
         }
  
         /*
@@ -458,14 +570,14 @@ ExecAgg(Agg *node)
  
         /*
          * Form a projection tuple using the aggregate results and the
-        * representative input tuple.  Store it in the result tuple slot,
-        * and return it if it meets my qual condition.
+        * representative input tuple.  Store it in the result tuple slot.
          */
         resultSlot = ExecProject(projInfo, &isDone);
  
         /*
          * If the completed tuple does not match the qualifications,
          * it is ignored and we loop back to try to process another group.
+        * Otherwise, return the tuple.
          */
     }
     while (! ExecQual(node->plan.qual, econtext));
@@ -505,6 +617,11 @@ ExecInitAgg(Agg *node, EState *estate, Plan *parent)
  
     /*
      * find aggregates in targetlist and quals
+    *
+    * Note: pull_agg_clauses also checks that no aggs contain other agg
+    * calls in their arguments.  This would make no sense under SQL semantics
+    * anyway (and it's forbidden by the spec).  Because that is true, we
+    * don't need to worry about evaluating the aggs in any particular order.
      */
     aggstate->aggs = nconc(pull_agg_clause((Node *) node->plan.targetlist),
                            pull_agg_clause((Node *) node->plan.qual));
@@ -588,6 +705,9 @@ ExecInitAgg(Agg *node, EState *estate, Plan *parent)
         /* Mark Aggref node with its associated index in the result array */
         aggref->aggno = aggno;
  
+       /* Fill in the peraggstate data */
+       peraggstate->aggref = aggref;
+
         aggTuple = SearchSysCacheTuple(AGGNAME,
                                        PointerGetDatum(aggname),
                                        ObjectIdGetDatum(aggref->basetype),
@@ -644,6 +764,29 @@ ExecInitAgg(Agg *node, EState *estate, Plan *parent)
         {
             fmgr_info(finalfn_oid, &peraggstate->finalfn);
         }
+
+       if (aggref->aggdistinct)
+       {
+           Oid         inputType = exprType(aggref->target);
+           Operator    eq_operator;
+           Form_pg_operator pgopform;
+
+           peraggstate->inputType = inputType;
+           typeInfo = typeidType(inputType);
+           peraggstate->inputtypeLen = typeLen(typeInfo);
+           peraggstate->inputtypeByVal = typeByVal(typeInfo);
+
+           eq_operator = oper("=", inputType, inputType, true);
+           if (!HeapTupleIsValid(eq_operator))
+           {
+               elog(ERROR, "Unable to identify an equality operator for type '%s'",
+                    typeidTypeName(inputType));
+           }
+           pgopform = (Form_pg_operator) GETSTRUCT(eq_operator);
+           fmgr_info(pgopform->oprcode, &(peraggstate->equalfn));
+           peraggstate->sortOperator = any_ordering_op(inputType);
+           peraggstate->sortstate = NULL;
+       }
     }
  
     return TRUE;
@@ -690,3 +833,26 @@ ExecReScanAgg(Agg *node, ExprContext *exprCtxt, Plan *parent)
         ExecReScan(((Plan *) node)->lefttree, exprCtxt, (Plan *) node);
  
  }
+
+
+/*
+ * Helper routine to make a copy of a Datum.
+ *
+ * NB: input had better not be a NULL; might cause null-pointer dereference.
+ */
+static Datum
+copyDatum(Datum val, int typLen, bool typByVal)
+{
+   if (typByVal)
+       return val;
+   else
+   {
+       char   *newVal;
+
+       if (typLen == -1)       /* variable length type? */
+           typLen = VARSIZE((struct varlena *) DatumGetPointer(val));
+       newVal = (char *) palloc(typLen);
+       memcpy(newVal, DatumGetPointer(val), typLen);
+       return PointerGetDatum(newVal);
+   }
+}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c

index 1b2726f822647b655497c40c1ac7018b47e48738..884926b9b628f28d2d7372f4bdc4ae9223ef70e1 100644 (file)
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.97 1999/11/23 20:06:52 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.98 1999/12/13 01:26:53 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -832,6 +832,8 @@ _copyAggref(Aggref *from)
     newnode->aggtype = from->aggtype;
     Node_Copy(from, newnode, target);
     newnode->usenulls = from->usenulls;
+   newnode->aggstar = from->aggstar;
+   newnode->aggdistinct = from->aggdistinct;
     newnode->aggno = from->aggno; /* probably not needed */
  
     return newnode;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c

index b35b271275404fab7d284eb823593009f7280211..f70fe508ae58dbb110e633b8b513d1179a795841 100644 (file)
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.52 1999/11/23 20:06:52 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.53 1999/12/13 01:26:53 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -219,6 +219,10 @@ _equalAggref(Aggref *a, Aggref *b)
         return false;
     if (a->usenulls != b->usenulls)
         return false;
+   if (a->aggstar != b->aggstar)
+       return false;
+   if (a->aggdistinct != b->aggdistinct)
+       return false;
     /* ignore aggno, which is only a private field for the executor */
     return true;
  }
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c

index 78bda61b30fc72e4e2e8f674ab5860e8d274dc68..7907f1b62ef8de354cb08656298de17f9990d68c 100644 (file)
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -5,7 +5,7 @@
   *
   * Copyright (c) 1994, Regents of the University of California
   *
- * $Id: outfuncs.c,v 1.99 1999/12/10 07:37:31 tgl Exp $
+ * $Id: outfuncs.c,v 1.100 1999/12/13 01:26:53 tgl Exp $
   *
   * NOTES
   *   Every (plan) node in POSTGRES has an associated "out" routine which
@@ -680,14 +680,17 @@ static void
  _outAggref(StringInfo str, Aggref *node)
  {
     appendStringInfo(str,
-                " AGGREG :aggname %s :basetype %u :aggtype %u :target ",
+                    " AGGREG :aggname %s :basetype %u :aggtype %u :target ",
                      stringStringInfo(node->aggname),
                      node->basetype,
                      node->aggtype);
     _outNode(str, node->target);
  
-   appendStringInfo(str, " :usenulls %s ",
-                    node->usenulls ? "true" : "false");
+   appendStringInfo(str, " :usenulls %s :aggstar %s :aggdistinct %s ",
+                    node->usenulls ? "true" : "false",
+                    node->aggstar ? "true" : "false",
+                    node->aggdistinct ? "true" : "false");
+   /* aggno is not dumped */
  }
  
  /*
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c

index 99be5199fa9b9fc386912cebda8d7a6eed1fb39a..83683ff3b1029d80afdf5d411669d88c9d859bb0 100644 (file)
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.75 1999/11/23 20:06:53 momjian Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.76 1999/12/13 01:26:54 tgl Exp $
   *
   * NOTES
   *   Most of the read functions for plan nodes are tested. (In fact, they
@@ -1190,6 +1190,14 @@ _readAggref()
     token = lsptok(NULL, &length);      /* get usenulls */
     local_node->usenulls = (token[0] == 't') ? true : false;
  
+   token = lsptok(NULL, &length);      /* eat :aggstar */
+   token = lsptok(NULL, &length);      /* get aggstar */
+   local_node->aggstar = (token[0] == 't') ? true : false;
+
+   token = lsptok(NULL, &length);      /* eat :aggdistinct */
+   token = lsptok(NULL, &length);      /* get aggdistinct */
+   local_node->aggdistinct = (token[0] == 't') ? true : false;
+
     return local_node;
  }
  
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c

index 63b3ff87d9e298ffde33d9821eea02a33dbcd033..63eebae06033e53281d3fc23a846b6e27dbc0182 100644 (file)
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.56 1999/12/09 05:58:53 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.57 1999/12/13 01:26:55 tgl Exp $
   *
   * HISTORY
   *   AUTHOR            DATE            MAJOR EVENT
@@ -45,6 +45,7 @@ typedef struct {
     List       *targetList;
  } check_subplans_for_ungrouped_vars_context;
  
+static bool contain_agg_clause_walker(Node *node, void *context);
  static bool pull_agg_clause_walker(Node *node, List **listptr);
  static bool check_subplans_for_ungrouped_vars_walker(Node *node,
                     check_subplans_for_ungrouped_vars_context *context);
@@ -393,12 +394,36 @@ pull_constant_clauses(List *quals, List **constantQual)
     return restqual;
  }
  
+/*
+ * contain_agg_clause
+ *   Recursively search for Aggref nodes within a clause.
+ *
+ *   Returns true if any aggregate found.
+ */
+bool
+contain_agg_clause(Node *clause)
+{
+   return contain_agg_clause_walker(clause, NULL);
+}
+
+static bool
+contain_agg_clause_walker(Node *node, void *context)
+{
+   if (node == NULL)
+       return false;
+   if (IsA(node, Aggref))
+       return true;            /* abort the tree traversal and return true */
+   return expression_tree_walker(node, contain_agg_clause_walker, context);
+}
+
  /*
   * pull_agg_clause
   *   Recursively pulls all Aggref nodes from an expression tree.
   *
   *   Returns list of Aggref nodes found.  Note the nodes themselves are not
   *   copied, only referenced.
+ *
+ *   Note: this also checks for nested aggregates, which are an error.
   */
  List *
  pull_agg_clause(Node *clause)
@@ -417,9 +442,16 @@ pull_agg_clause_walker(Node *node, List **listptr)
     if (IsA(node, Aggref))
     {
         *listptr = lappend(*listptr, node);
-       /* continue, to iterate over agg's arg as well (do nested aggregates
-        * actually work?)
+       /*
+        * Complain if the aggregate's argument contains any aggregates;
+        * nested agg functions are semantically nonsensical.
+        */
+       if (contain_agg_clause(((Aggref *) node)->target))
+           elog(ERROR, "Aggregate function calls may not be nested");
+       /*
+        * Having checked that, we need not recurse into the argument.
          */
+       return false;
     }
     return expression_tree_walker(node, pull_agg_clause_walker,
                                   (void *) listptr);
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c

index 68280f7f4a0a9ea8d75ce25951921d4fdb0428c6..21f8efe7f67a4cfc4edc716a1a94c9de6d4356ad 100644 (file)
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/parser/parse_agg.c,v 1.31 1999/12/10 07:37:35 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/parser/parse_agg.c,v 1.32 1999/12/13 01:26:58 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -28,38 +28,11 @@ typedef struct {
     List       *groupClauses;
  } check_ungrouped_columns_context;
  
-static bool contain_agg_clause(Node *clause);
-static bool contain_agg_clause_walker(Node *node, void *context);
  static void check_ungrouped_columns(Node *node, ParseState *pstate,
                                     List *groupClauses);
  static bool check_ungrouped_columns_walker(Node *node,
                                            check_ungrouped_columns_context *context);
  
-/*
- * contain_agg_clause
- *   Recursively find aggref nodes within a clause.
- *
- *   Returns true if any aggregate found.
- *
- * NOTE: we assume that the given clause has been transformed suitably for
- * parser output.  This means we can use the planner's expression_tree_walker.
- */
-static bool
-contain_agg_clause(Node *clause)
-{
-   return contain_agg_clause_walker(clause, NULL);
-}
-
-static bool
-contain_agg_clause_walker(Node *node, void *context)
-{
-   if (node == NULL)
-       return false;
-   if (IsA(node, Aggref))
-       return true;            /* abort the tree traversal and return true */
-   return expression_tree_walker(node, contain_agg_clause_walker, context);
-}
-
  /*
   * check_ungrouped_columns -
   *   Scan the given expression tree for ungrouped variables (variables
@@ -232,7 +205,8 @@ ParseAgg(ParseState *pstate, char *aggname, Oid basetype,
      * Since "1" never evaluates as null, we currently have no need of
      * the "usenulls" flag, but it should be kept around; in fact, we should
      * extend the pg_aggregate table to let usenulls be specified as an
-    * attribute of user-defined aggregates.
+    * attribute of user-defined aggregates.  In the meantime, usenulls
+    * is just always set to "false".
      */
  
     aggform = (Form_pg_aggregate) GETSTRUCT(theAggTuple);
@@ -264,14 +238,8 @@ ParseAgg(ParseState *pstate, char *aggname, Oid basetype,
     aggref->aggtype = fintype;
     aggref->target = lfirst(args);
     aggref->usenulls = usenulls;
-
-   /*
-    * We should store agg_star and agg_distinct into the Aggref node,
-    * and let downstream processing deal with them.  Currently, agg_star
-    * is ignored and agg_distinct is not implemented...
-    */
-   if (agg_distinct)
-       elog(ERROR, "aggregate(DISTINCT ...) is not implemented yet");
+   aggref->aggstar = agg_star;
+   aggref->aggdistinct = agg_distinct;
  
     pstate->p_hasAggs = true;
  
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c

index b62559ccdde64ae49d4b09c553e4ac5c54953b76..47fd957c9948cbb2e19c718d57a32859607d0a28 100644 (file)
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -3,7 +3,7 @@
   *           out of it's tuple
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/adt/ruleutils.c,v 1.34 1999/12/06 02:37:17 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/adt/ruleutils.c,v 1.35 1999/12/13 01:27:01 tgl Exp $
   *
   *   This software is copyrighted by Jan Wieck - Hamburg.
   *
@@ -1352,9 +1352,13 @@ get_rule_expr(Node *node, deparse_context *context)
             {
                 Aggref     *aggref = (Aggref *) node;
  
-               appendStringInfo(buf, "%s(",
-                                quote_identifier(aggref->aggname));
-               get_rule_expr(aggref->target, context);
+               appendStringInfo(buf, "%s(%s",
+                                quote_identifier(aggref->aggname),
+                                aggref->aggdistinct ? "DISTINCT " : "");
+               if (aggref->aggstar)
+                   appendStringInfo(buf, "*");
+               else
+                   get_rule_expr(aggref->target, context);
                 appendStringInfo(buf, ")");
             }
             break;
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c

index 5297fde36dcff0b5da385d9fe4a974cebe1535a6..6e9a23f1cd12cb4b50ed48f8baaf07c0a70402aa 100644 (file)
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -3,8 +3,8 @@
   * tuplesort.c
   *   Generalized tuple sorting routines.
   *
- * This module handles sorting of either heap tuples or index tuples
- * (and could fairly easily support other kinds of sortable objects,
+ * This module handles sorting of heap tuples, index tuples, or single
+ * Datums (and could easily support other kinds of sortable objects,
   * if necessary).  It works efficiently for both small and large amounts
   * of data.  Small amounts are sorted in-memory using qsort().  Large
   * amounts are sorted using temporary files and a standard external sort
@@ -77,7 +77,7 @@
   * Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *   $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.2 1999/10/30 17:27:15 tgl Exp $
+ *   $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.3 1999/12/13 01:27:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -87,7 +87,9 @@
  #include "access/heapam.h"
  #include "access/nbtree.h"
  #include "miscadmin.h"
+#include "parser/parse_type.h"
  #include "utils/logtape.h"
+#include "utils/lsyscache.h"
  #include "utils/tuplesort.h"
  
  /*
@@ -251,6 +253,17 @@ struct Tuplesortstate
      */
     Relation    indexRel;
     bool        enforceUnique;  /* complain if we find duplicate tuples */
+
+   /*
+    * These variables are specific to the Datum case; they are set
+    * by tuplesort_begin_datum and used only by the DatumTuple routines.
+    */
+   Oid         datumType;
+   Oid         sortOperator;
+   FmgrInfo    sortOpFn;       /* cached lookup data for sortOperator */
+   /* we need typelen and byval in order to know how to copy the Datums. */
+   int         datumTypeLen;
+   bool        datumTypeByVal;
  };
  
  #define COMPARETUP(state,a,b)  ((*(state)->comparetup) (state, a, b))
@@ -321,7 +334,22 @@ struct Tuplesortstate
   *--------------------
   */
  
+/*
+ * For sorting single Datums, we build "pseudo tuples" that just carry
+ * the datum's value and null flag.  For pass-by-reference data types,
+ * the actual data value appears after the DatumTupleHeader (MAXALIGNed,
+ * of course), and the value field in the header is just a pointer to it.
+ */
+
+typedef struct
+{
+   Datum       val;
+   bool        isNull;
+} DatumTuple;
+
+
  static Tuplesortstate *tuplesort_begin_common(bool randomAccess);
+static void puttuple_common(Tuplesortstate *state, void *tuple);
  static void inittapes(Tuplesortstate *state);
  static void selectnewtape(Tuplesortstate *state);
  static void mergeruns(Tuplesortstate *state);
@@ -349,6 +377,13 @@ static void writetup_index(Tuplesortstate *state, int tapenum, void *tup);
  static void *readtup_index(Tuplesortstate *state, int tapenum,
                            unsigned int len);
  static unsigned int tuplesize_index(Tuplesortstate *state, void *tup);
+static int comparetup_datum(Tuplesortstate *state,
+                           const void *a, const void *b);
+static void *copytup_datum(Tuplesortstate *state, void *tup);
+static void writetup_datum(Tuplesortstate *state, int tapenum, void *tup);
+static void *readtup_datum(Tuplesortstate *state, int tapenum,
+                          unsigned int len);
+static unsigned int tuplesize_datum(Tuplesortstate *state, void *tup);
  
  /*
   * Since qsort(3) will not pass any context info to qsort_comparetup(),
@@ -369,6 +404,7 @@ static Tuplesortstate *qsort_tuplesortstate;
   * have been supplied.  After performsort, retrieve the tuples in sorted
   * order by calling tuplesort_gettuple until it returns NULL.  (If random
   * access was requested, rescan, markpos, and restorepos can also be called.)
+ * For Datum sorts, putdatum/getdatum are used instead of puttuple/gettuple.
   * Call tuplesort_end to terminate the operation and release memory/disk space.
   */
  
@@ -444,6 +480,32 @@ tuplesort_begin_index(Relation indexRel,
     return state;
  }
  
+Tuplesortstate *
+tuplesort_begin_datum(Oid datumType,
+                     Oid sortOperator,
+                     bool randomAccess)
+{
+   Tuplesortstate *state = tuplesort_begin_common(randomAccess);
+   Type            typeInfo;
+
+   state->comparetup = comparetup_datum;
+   state->copytup = copytup_datum;
+   state->writetup = writetup_datum;
+   state->readtup = readtup_datum;
+   state->tuplesize = tuplesize_datum;
+
+   state->datumType = datumType;
+   state->sortOperator = sortOperator;
+   /* lookup the function that implements the sort operator */
+   fmgr_info(get_opcode(sortOperator), &state->sortOpFn);
+   /* lookup necessary attributes of the datum type */
+   typeInfo = typeidType(datumType);
+   state->datumTypeLen = typeLen(typeInfo);
+   state->datumTypeByVal = typeByVal(typeInfo);
+
+   return state;
+}
+
  /*
   * tuplesort_end
   *
@@ -476,9 +538,60 @@ tuplesort_puttuple(Tuplesortstate *state, void *tuple)
  {
     /*
      * Copy the given tuple into memory we control, and decrease availMem.
+    * Then call the code shared with the Datum case.
      */
     tuple = COPYTUP(state, tuple);
  
+   puttuple_common(state, tuple);
+}
+
+/*
+ * Accept one Datum while collecting input data for sort.
+ *
+ * If the Datum is pass-by-ref type, the value will be copied.
+ */
+void
+tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
+{
+   DatumTuple     *tuple;
+
+   /*
+    * Build pseudo-tuple carrying the datum, and decrease availMem.
+    */
+   if (isNull || state->datumTypeByVal)
+   {
+       USEMEM(state, sizeof(DatumTuple));
+       tuple = (DatumTuple *) palloc(sizeof(DatumTuple));
+       tuple->val = val;
+       tuple->isNull = isNull;
+   }
+   else
+   {
+       int     datalen = state->datumTypeLen;
+       int     tuplelen;
+       char   *newVal;
+
+       if (datalen == -1)      /* variable length type? */
+           datalen = VARSIZE((struct varlena *) DatumGetPointer(val));
+       tuplelen = datalen + MAXALIGN(sizeof(DatumTuple));
+       USEMEM(state, tuplelen);
+       newVal = (char *) palloc(tuplelen);
+       tuple = (DatumTuple *) newVal;
+       newVal += MAXALIGN(sizeof(DatumTuple));
+       memcpy(newVal, DatumGetPointer(val), datalen);
+       tuple->val = PointerGetDatum(newVal);
+       tuple->isNull = false;
+   }
+
+   puttuple_common(state, (void *) tuple);
+}
+
+/*
+ * Shared code for tuple and datum cases.
+ */
+static void
+puttuple_common(Tuplesortstate *state, void *tuple)
+{
     switch (state->status)
     {
         case TSS_INITIAL:
@@ -753,6 +866,50 @@ tuplesort_gettuple(Tuplesortstate *state, bool forward,
     }
  }
  
+/*
+ * Fetch the next Datum in either forward or back direction.
+ * Returns FALSE if no more datums.
+ *
+ * If the Datum is pass-by-ref type, the returned value is freshly palloc'd
+ * and is now owned by the caller.
+ */
+bool
+tuplesort_getdatum(Tuplesortstate *state, bool forward,
+                  Datum *val, bool *isNull)
+{
+   DatumTuple     *tuple;
+   bool            should_free;
+
+   tuple = (DatumTuple *) tuplesort_gettuple(state, forward, &should_free);
+
+   if (tuple == NULL)
+       return false;
+
+   if (tuple->isNull || state->datumTypeByVal)
+   {
+       *val = tuple->val;
+       *isNull = tuple->isNull;
+   }
+   else
+   {
+       int     datalen = state->datumTypeLen;
+       char   *newVal;
+
+       if (datalen == -1)      /* variable length type? */
+           datalen = VARSIZE((struct varlena *) DatumGetPointer(tuple->val));
+       newVal = (char *) palloc(datalen);
+       memcpy(newVal, DatumGetPointer(tuple->val), datalen);
+       *val = PointerGetDatum(newVal);
+       *isNull = false;
+   }
+
+   if (should_free)
+       pfree(tuple);
+
+   return true;
+}
+
+
  /*
   * inittapes - initialize for tape sorting.
   *
@@ -1695,3 +1852,103 @@ tuplesize_index(Tuplesortstate *state, void *tup)
  
     return tuplen;
  }
+
+
+/*
+ * Routines specialized for DatumTuple case
+ */
+
+static int
+comparetup_datum(Tuplesortstate *state, const void *a, const void *b)
+{
+   DatumTuple *ltup = (DatumTuple *) a;
+   DatumTuple *rtup = (DatumTuple *) b;
+
+   if (ltup->isNull)
+   {
+       if (!rtup->isNull)
+           return 1;           /* NULL sorts after non-NULL */
+       return 0;
+   }
+   else if (rtup->isNull)
+       return -1;
+   else
+   {
+       int     result;
+
+       if (!(result = - (int) (*fmgr_faddr(&state->sortOpFn)) (ltup->val,
+                                                               rtup->val)))
+           result = (int) (*fmgr_faddr(&state->sortOpFn)) (rtup->val,
+                                                           ltup->val);
+       return result;
+   }
+}
+
+static void *
+copytup_datum(Tuplesortstate *state, void *tup)
+{
+   /* Not currently needed */
+   elog(ERROR, "copytup_datum() should not be called");
+   return NULL;
+}
+
+static void
+writetup_datum(Tuplesortstate *state, int tapenum, void *tup)
+{
+   DatumTuple     *tuple = (DatumTuple *) tup;
+   unsigned int    tuplen = tuplesize_datum(state, tup);
+   unsigned int    writtenlen = tuplen + sizeof(unsigned int);
+
+   LogicalTapeWrite(state->tapeset, tapenum,
+                    (void*) &writtenlen, sizeof(writtenlen));
+   LogicalTapeWrite(state->tapeset, tapenum,
+                    (void*) tuple, tuplen);
+   if (state->randomAccess)    /* need trailing length word? */
+       LogicalTapeWrite(state->tapeset, tapenum,
+                        (void*) &writtenlen, sizeof(writtenlen));
+
+   FREEMEM(state, tuplen);
+   pfree(tuple);
+}
+
+static void *
+readtup_datum(Tuplesortstate *state, int tapenum, unsigned int len)
+{
+   unsigned int    tuplen = len - sizeof(unsigned int);
+   DatumTuple     *tuple = (DatumTuple *) palloc(tuplen);
+
+   USEMEM(state, tuplen);
+   if (LogicalTapeRead(state->tapeset, tapenum, (void *) tuple,
+                       tuplen) != tuplen)
+       elog(ERROR, "tuplesort: unexpected end of data");
+   if (state->randomAccess)    /* need trailing length word? */
+       if (LogicalTapeRead(state->tapeset, tapenum, (void *) &tuplen,
+                           sizeof(tuplen)) != sizeof(tuplen))
+           elog(ERROR, "tuplesort: unexpected end of data");
+
+   if (!tuple->isNull && !state->datumTypeByVal)
+       tuple->val = PointerGetDatum(((char *) tuple) +
+                                    MAXALIGN(sizeof(DatumTuple)));
+   return (void *) tuple;
+}
+
+static unsigned int
+tuplesize_datum(Tuplesortstate *state, void *tup)
+{
+   DatumTuple     *tuple = (DatumTuple *) tup;
+
+   if (tuple->isNull || state->datumTypeByVal)
+   {
+       return (unsigned int) sizeof(DatumTuple);
+   }
+   else
+   {
+       int     datalen = state->datumTypeLen;
+       int     tuplelen;
+
+       if (datalen == -1)      /* variable length type? */
+           datalen = VARSIZE((struct varlena *) DatumGetPointer(tuple->val));
+       tuplelen = datalen + MAXALIGN(sizeof(DatumTuple));
+       return (unsigned int) tuplelen;
+   }
+}
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 728c62b1200e11b7477e5e5ecf7a7461d599cab0..62244f88a475084af47e64fe30e72a0fa74b84b3 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -36,7 +36,7 @@
   *
   * Copyright (c) 1994, Regents of the University of California
   *
- * $Id: catversion.h,v 1.4 1999/11/24 16:52:48 momjian Exp $
+ * $Id: catversion.h,v 1.5 1999/12/13 01:27:07 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -51,6 +51,6 @@
   * catalog changes on the same day...)
   */
  
-#define CATALOG_VERSION_NO 199911241
+#define CATALOG_VERSION_NO 199912121
  
  #endif
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h

index 2d585bdcc5aa09d75c2585750405dc67c6f6ec4c..d3fb8f732a508b7852227b69cc54a5baa634473d 100644 (file)
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -6,7 +6,7 @@
   *
   * Copyright (c) 1994, Regents of the University of California
   *
- * $Id: primnodes.h,v 1.37 1999/11/15 02:00:15 tgl Exp $
+ * $Id: primnodes.h,v 1.38 1999/12/13 01:27:10 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -297,10 +297,12 @@ typedef struct Iter
  /* ----------------
   * Aggref
   *     aggname         - name of the aggregate
- *     basetype        - base type Oid of the aggregate
+ *     basetype        - base type Oid of the aggregate (ie, input type)
   *     aggtype         - type Oid of final result of the aggregate
   *     target          - attribute or expression we are aggregating on
   *     usenulls        - TRUE to accept null values as inputs
+ *     aggstar         - TRUE if argument was really '*'
+ *     aggdistinct     - TRUE if arguments were labeled DISTINCT
   *     aggno           - workspace for nodeAgg.c executor
   * ----------------
   */
@@ -312,6 +314,8 @@ typedef struct Aggref
     Oid         aggtype;
     Node       *target;
     bool        usenulls;
+   bool        aggstar;
+   bool        aggdistinct;
     int         aggno;
  } Aggref;
  
diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h

index 829bf434e787fcbb1f996d89f7750ba2ecd6e3b1..4cd2e486aa4e2cd0cd585ff4fb4cbaf6ec3d35ef 100644 (file)
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@@ -6,7 +6,7 @@
   *
   * Copyright (c) 1994, Regents of the University of California
   *
- * $Id: clauses.h,v 1.31 1999/12/09 05:58:55 tgl Exp $
+ * $Id: clauses.h,v 1.32 1999/12/13 01:27:13 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -38,6 +38,7 @@ extern Expr *make_ands_explicit(List *andclauses);
  extern List *make_ands_implicit(Expr *clause);
  
  extern List *pull_constant_clauses(List *quals, List **constantQual);
+extern bool contain_agg_clause(Node *clause);
  extern List *pull_agg_clause(Node *clause);
  extern void check_subplans_for_ungrouped_vars(Node *clause,
                                               Query *query,
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h

index 7c5a32098972bb484fb12a2f5a00bce2e8bedc40..4f775f74a55b280dea058e3daa796f877255b7a2 100644 (file)
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -3,8 +3,8 @@
   * tuplesort.h
   *   Generalized tuple sorting routines.
   *
- * This module handles sorting of either heap tuples or index tuples
- * (and could fairly easily support other kinds of sortable objects,
+ * This module handles sorting of heap tuples, index tuples, or single
+ * Datums (and could easily support other kinds of sortable objects,
   * if necessary).  It works efficiently for both small and large amounts
   * of data.  Small amounts are sorted in-memory using qsort().  Large
   * amounts are sorted using temporary files and a standard external sort
@@ -12,7 +12,7 @@
   *
   * Copyright (c) 1994, Regents of the University of California
   *
- * $Id: tuplesort.h,v 1.1 1999/10/17 22:15:09 tgl Exp $
+ * $Id: tuplesort.h,v 1.2 1999/12/13 01:27:17 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -34,6 +34,7 @@ typedef struct Tuplesortstate Tuplesortstate;
   * code: one for sorting HeapTuples and one for sorting IndexTuples.
   * They differ primarily in the way that the sort key information is
   * supplied.
+ * Yet a third slightly different interface supports sorting bare Datums.
   */
  
  extern Tuplesortstate *tuplesort_begin_heap(TupleDesc tupDesc,
@@ -42,9 +43,15 @@ extern Tuplesortstate *tuplesort_begin_heap(TupleDesc tupDesc,
  extern Tuplesortstate *tuplesort_begin_index(Relation indexRel,
                                              bool enforceUnique,
                                              bool randomAccess);
+extern Tuplesortstate *tuplesort_begin_datum(Oid datumType,
+                                            Oid sortOperator,
+                                            bool randomAccess);
  
  extern void tuplesort_puttuple(Tuplesortstate *state, void *tuple);
  
+extern void tuplesort_putdatum(Tuplesortstate *state, Datum val,
+                              bool isNull);
+
  extern void tuplesort_performsort(Tuplesortstate *state);
  
  extern void *tuplesort_gettuple(Tuplesortstate *state, bool forward,
@@ -54,11 +61,15 @@ extern void *tuplesort_gettuple(Tuplesortstate *state, bool forward,
  #define tuplesort_getindextuple(state, forward, should_free) \
     ((IndexTuple) tuplesort_gettuple(state, forward, should_free))
  
+extern bool tuplesort_getdatum(Tuplesortstate *state, bool forward,
+                              Datum *val, bool *isNull);
+
  extern void tuplesort_end(Tuplesortstate *state);
  
  /*
   * These routines may only be called if randomAccess was specified 'true'.
- * Backwards scan in gettuple is likewise only allowed if randomAccess.
+ * Likewise, backwards scan in gettuple/getdatum is only allowed if
+ * randomAccess was specified.
   */
  
  extern void tuplesort_rescan(Tuplesortstate *state);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out

index 84958f66937431784793f9b0bebcf1d188ebb9e5..5dac6162b597afae848baf27211062cff35afba0 100644 (file)
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -76,6 +76,42 @@ cnt_1000
      1000
  (1 row)
  
+QUERY: SELECT count(DISTINCT four) AS cnt_4 FROM onek;
+cnt_4
+-----
+    4
+(1 row)
+
+QUERY: select ten, count(*), sum(four) from onek group by ten;
+ten|count|sum
+---+-----+---
+  0|  100|100
+  1|  100|200
+  2|  100|100
+  3|  100|200
+  4|  100|100
+  5|  100|200
+  6|  100|100
+  7|  100|200
+  8|  100|100
+  9|  100|200
+(10 rows)
+
+QUERY: select ten, count(four), sum(DISTINCT four) from onek group by ten;
+ten|count|sum
+---+-----+---
+  0|  100|  2
+  1|  100|  4
+  2|  100|  2
+  3|  100|  4
+  4|  100|  2
+  5|  100|  4
+  6|  100|  2
+  7|  100|  4
+  8|  100|  2
+  9|  100|  4
+(10 rows)
+
  QUERY: SELECT newavg(four) AS avg_1 FROM onek;
  avg_1
  -----
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out

index 474ed0bb7f65893433b73bf4ee84c7d24fd92618..5938458a88e762a7acff9f858e5922f5bfc6e72d 100644 (file)
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1075,9 +1075,9 @@ pg_user           |SELECT pg_shadow.usename, pg_shadow.usesysid, pg_shadow.usecr
  pg_views          |SELECT c.relname AS viewname, pg_get_userbyid(c.relowner) AS viewowner, pg_get_viewdef(c.relname) AS definition FROM pg_class c WHERE (c.relhasrules AND (EXISTS (SELECT r.rulename FROM pg_rewrite r WHERE ((r.ev_class = c.oid) AND (r.ev_type = '1'::"char")))));                                                                                                                               
  rtest_v1          |SELECT rtest_t1.a, rtest_t1.b FROM rtest_t1;                                                                                                                                                                                                                                                                                                                                                       
  rtest_vcomp       |SELECT x.part, (x.size * y.factor) AS size_in_cm FROM rtest_comp x, rtest_unitfact y WHERE (x.unit = y.unit);                                                                                                                                                                                                                                                                                      
-rtest_vview1      |SELECT x.a, x.b FROM rtest_view1 x WHERE (0 < (SELECT count(1) AS count FROM rtest_view2 y WHERE (y.a = x.a)));                                                                                                                                                                                                                                                                                    
+rtest_vview1      |SELECT x.a, x.b FROM rtest_view1 x WHERE (0 < (SELECT count(*) AS count FROM rtest_view2 y WHERE (y.a = x.a)));                                                                                                                                                                                                                                                                                    
  rtest_vview2      |SELECT rtest_view1.a, rtest_view1.b FROM rtest_view1 WHERE rtest_view1.v;                                                                                                                                                                                                                                                                                                                          
-rtest_vview3      |SELECT x.a, x.b FROM rtest_vview2 x WHERE (0 < (SELECT count(1) AS count FROM rtest_view2 y WHERE (y.a = x.a)));                                                                                                                                                                                                                                                                                   
+rtest_vview3      |SELECT x.a, x.b FROM rtest_vview2 x WHERE (0 < (SELECT count(*) AS count FROM rtest_view2 y WHERE (y.a = x.a)));                                                                                                                                                                                                                                                                                   
  rtest_vview4      |SELECT x.a, x.b, count(y.a) AS refcount FROM rtest_view1 x, rtest_view2 y WHERE (x.a = y.a) GROUP BY x.a, x.b;                                                                                                                                                                                                                                                                                     
  rtest_vview5      |SELECT rtest_view1.a, rtest_view1.b, rtest_viewfunc1(rtest_view1.a) AS refcount FROM rtest_view1;                                                                                                                                                                                                                                                                                                  
  shoe              |SELECT sh.shoename, sh.sh_avail, sh.slcolor, sh.slminlen, (sh.slminlen * un.un_fact) AS slminlen_cm, sh.slmaxlen, (sh.slmaxlen * un.un_fact) AS slmaxlen_cm, sh.slunit FROM shoe_data sh, unit un WHERE (sh.slunit = un.un_name);                                                                                                                                                                  
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql

index 1fc0996807276fdb0709c2da3d94aa9b50d10d87..03ea7de2bbcbdc60d8aaeadb696101b51d5d1542 100644 (file)
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -30,6 +30,12 @@ SELECT max(student.gpa) AS max_3_7 FROM student;
  
  SELECT count(four) AS cnt_1000 FROM onek;
  
+SELECT count(DISTINCT four) AS cnt_4 FROM onek;
+
+select ten, count(*), sum(four) from onek group by ten;
+
+select ten, count(four), sum(DISTINCT four) from onek group by ten;
+
  
  SELECT newavg(four) AS avg_1 FROM onek;
author	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 13 Dec 1999 01:27:21 +0000 (01:27 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 13 Dec 1999 01:27:21 +0000 (01:27 +0000)
src/backend/executor/nodeAgg.c		patch \| blob \| blame \| history
src/backend/nodes/copyfuncs.c		patch \| blob \| blame \| history
src/backend/nodes/equalfuncs.c		patch \| blob \| blame \| history
src/backend/nodes/outfuncs.c		patch \| blob \| blame \| history
src/backend/nodes/readfuncs.c		patch \| blob \| blame \| history
src/backend/optimizer/util/clauses.c		patch \| blob \| blame \| history
src/backend/parser/parse_agg.c		patch \| blob \| blame \| history
src/backend/utils/adt/ruleutils.c		patch \| blob \| blame \| history
src/backend/utils/sort/tuplesort.c		patch \| blob \| blame \| history
src/include/catalog/catversion.h		patch \| blob \| blame \| history
src/include/nodes/primnodes.h		patch \| blob \| blame \| history
src/include/optimizer/clauses.h		patch \| blob \| blame \| history
src/include/utils/tuplesort.h		patch \| blob \| blame \| history
src/test/regress/expected/aggregates.out		patch \| blob \| blame \| history
src/test/regress/expected/rules.out		patch \| blob \| blame \| history
src/test/regress/sql/aggregates.sql		patch \| blob \| blame \| history