Update/improve documentation about creating aggregate functions.

author Tom Lane <tgl@sss.pgh.pa.us>

Sun, 26 Mar 2000 19:45:21 +0000 (19:45 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sun, 26 Mar 2000 19:45:21 +0000 (19:45 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Sun, 26 Mar 2000 19:45:21 +0000 (19:45 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sun, 26 Mar 2000 19:45:21 +0000 (19:45 +0000)
diff --git a/doc/src/sgml/ref/create_aggregate.sgml b/doc/src/sgml/ref/create_aggregate.sgml

index 936ee2b43bce4e4c767f618057ef03c837d48bcf..b279360a5c32873fcf44a78f5d613a00788ee9a2 100644 (file)
--- a/doc/src/sgml/ref/create_aggregate.sgml
+++ b/doc/src/sgml/ref/create_aggregate.sgml
@@ -1,5 +1,5 @@
  <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/ref/create_aggregate.sgml,v 1.7 1999/07/22 15:09:07 thomas Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/ref/create_aggregate.sgml,v 1.8 2000/03/26 19:45:21 tgl Exp $
  Postgres documentation
  -->
  
@@ -24,9 +24,9 @@ Postgres documentation
     <date>1999-07-20</date>
    </refsynopsisdivinfo>
    <synopsis>
-CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> [ AS ] ( BASETYPE = <replaceable class="PARAMETER">data_type</replaceable>
-    [ , SFUNC1 = <replaceable class="PARAMETER">sfunc1</replaceable>, STYPE1 = <replaceable class="PARAMETER">sfunc1_return_type</replaceable> ]
-    [ , SFUNC2 = <replaceable class="PARAMETER">sfunc2</replaceable>, STYPE2 = <replaceable class="PARAMETER">sfunc2_return_type</replaceable> ]
+CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> [ AS ] ( BASETYPE = <replaceable class="PARAMETER">input_data_type</replaceable>
+    [ , SFUNC1 = <replaceable class="PARAMETER">sfunc1</replaceable>, STYPE1 = <replaceable class="PARAMETER">state1_type</replaceable> ]
+    [ , SFUNC2 = <replaceable class="PARAMETER">sfunc2</replaceable>, STYPE2 = <replaceable class="PARAMETER">state2_type</replaceable> ]
      [ , FINALFUNC = <replaceable class="PARAMETER">ffunc</replaceable> ]
      [ , INITCOND1 = <replaceable class="PARAMETER">initial_condition1</replaceable> ]
      [ , INITCOND2 = <replaceable class="PARAMETER">initial_condition2</replaceable> ] )
@@ -51,10 +51,10 @@ CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> [ AS ] ( BASE
       </varlistentry>
  
       <varlistentry>
-      <term><replaceable class="PARAMETER">data_type</replaceable></term>
+      <term><replaceable class="PARAMETER">input_data_type</replaceable></term>
        <listitem>
         <para>
-   The fundamental data type on which this aggregate function operates.
+   The input data type on which this aggregate function operates.
         </para>
        </listitem>
       </varlistentry>
@@ -63,21 +63,25 @@ CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> [ AS ] ( BASE
        <term><replaceable class="PARAMETER">sfunc1</replaceable></term>
        <listitem>
         <para>
-   The state transition function
-   to be called for every non-NULL field from the source column.
-   It takes a variable of
-   type <replaceable class="PARAMETER">sfunc1_return_type</replaceable> as
-   the first argument and that field as the
-   second argument.
+   A state transition function
+   to be called for every non-NULL input data value.
+   This must be a function of two arguments, the first being of
+   type <replaceable class="PARAMETER">state1_type</replaceable>
+   and the second of
+   type <replaceable class="PARAMETER">input_data_type</replaceable>.
+   The function must return a value of
+   type <replaceable class="PARAMETER">state1_type</replaceable>.
+   This function takes the current state value 1 and the current
+   input data item, and returns the next state value 1.
         </para>
        </listitem>
       </varlistentry>
  
       <varlistentry>
-      <term><replaceable class="PARAMETER">sfunc1_return_type</replaceable></term>
+      <term><replaceable class="PARAMETER">state1_type</replaceable></term>
        <listitem>
         <para>
-   The return type of the first transition function.
+   The data type for the first state value of the aggregate.
         </para>
        </listitem>
       </varlistentry>
@@ -86,20 +90,22 @@ CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> [ AS ] ( BASE
        <term><replaceable class="PARAMETER">sfunc2</replaceable></term>
        <listitem>
         <para>
-   The state transition function
-   to be called for every non-NULL field from the source column.
-   It takes a variable
-   of type <replaceable class="PARAMETER">sfunc2_return_type</replaceable>
-   as the only argument and returns a variable of the same type.
+   A state transition function
+   to be called for every non-NULL input data value.
+   This must be a function of one argument of
+   type <replaceable class="PARAMETER">state2_type</replaceable>,
+   returning a value of the same type.
+   This function takes the current state value 2 and
+   returns the next state value 2.
         </para>
        </listitem>
       </varlistentry>
  
       <varlistentry>
-      <term><replaceable class="PARAMETER">sfunc2_return_type</replaceable></term>
+      <term><replaceable class="PARAMETER">state2_type</replaceable></term>
        <listitem>
         <para>
-   The return type of the second transition function.
+   The data type for the second state value of the aggregate.
         </para>
        </listitem>
       </varlistentry>
@@ -108,12 +114,17 @@ CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> [ AS ] ( BASE
        <term><replaceable class="PARAMETER">ffunc</replaceable></term>
        <listitem>
         <para>
-   The final function
-   called after traversing all input fields. This function must
+   The final function called to compute the aggregate's result
+   after all input data has been traversed.
+   If both state values are used, the final function must
     take two arguments of types
-   <replaceable class="PARAMETER">sfunc1_return_type</replaceable>
+   <replaceable class="PARAMETER">state1_type</replaceable>
     and
-   <replaceable class="PARAMETER">sfunc2_return_type</replaceable>.
+   <replaceable class="PARAMETER">state2_type</replaceable>.
+   If only one state value is used, the final function must
+   take a single argument of that state value's type.
+   The output datatype of the aggregate is defined as the return
+   type of this function.
         </para>
        </listitem>
       </varlistentry>
@@ -122,7 +133,7 @@ CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> [ AS ] ( BASE
        <term><replaceable class="PARAMETER">initial_condition1</replaceable></term>
        <listitem>
         <para>
-   The initial value for the first transition function argument.
+   The initial value for state value 1.
         </para>
        </listitem>
       </varlistentry>
@@ -131,7 +142,7 @@ CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> [ AS ] ( BASE
        <term><replaceable class="PARAMETER">initial_condition2</replaceable></term>
        <listitem>
         <para>
-   The initial value for the second transition function argument.
+   The initial value for state value 2.
         </para>
        </listitem>
       </varlistentry>
@@ -182,84 +193,66 @@ CREATE
     can be used to provide the desired features.
    </para>
    <para>
-   An  aggregate  function can require up to three functions, two
-   state transition functions, 
+   An  aggregate  function is identified by its name and input data type.
+   Two aggregates can have the same name if they operate on different
+   input types.  To avoid confusion, do not make an ordinary function
+   of the same name and input data type as an aggregate.
+  </para>
+  <para>
+   An  aggregate  function is made from between one and three ordinary
+   functions:
+   two state transition functions, 
     <replaceable class="PARAMETER">sfunc1</replaceable>
-   and <replaceable class="PARAMETER">sfunc2</replaceable>:
-   <programlisting>
-<replaceable class="PARAMETER">sfunc1</replaceable>( internal-state1, next-data_item ) ---> next-internal-state1 <replaceable class="PARAMETER">sfunc2</replaceable>( internal-state2 ) ---> next-internal-state2
-   </programlisting>
+   and <replaceable class="PARAMETER">sfunc2</replaceable>,
     and a final calculation function,
-   <replaceable class="PARAMETER">ffunc</replaceable>:
+   <replaceable class="PARAMETER">ffunc</replaceable>.
+   These are used as follows:
     <programlisting>
+<replaceable class="PARAMETER">sfunc1</replaceable>( internal-state1, next-data-item ) ---> next-internal-state1
+<replaceable class="PARAMETER">sfunc2</replaceable>( internal-state2 ) ---> next-internal-state2
  <replaceable class="PARAMETER">ffunc</replaceable>(internal-state1, internal-state2) ---> aggregate-value
     </programlisting>
    </para>
    <para>
-   <productname>Postgres</productname> creates up to two temporary variables
-   (referred to here as <replaceable class="PARAMETER">temp1</replaceable>
-   and <replaceable class="PARAMETER">temp2</replaceable>)
-   to hold intermediate results used as arguments to the transition functions.
+   <productname>Postgres</productname> creates one or two temporary variables
+   (of data types <replaceable class="PARAMETER">stype1</replaceable> and/or
+   <replaceable class="PARAMETER">stype2</replaceable>) to hold the
+   current internal states of the aggregate.  At each input data item,
+   the state transition function(s) are invoked to calculate new values
+   for the internal state values.  After all the data has been processed,
+   the final function is invoked once to calculate the aggregate's output
+   value.
    </para>
    <para>
-   These transition functions are required to have the following properties:
-   <itemizedlist>
-    <listitem>
-     <para>
-      The  arguments  to 
-      <replaceable class="PARAMETER">sfunc1</replaceable>
-      must be
-      <replaceable class="PARAMETER">temp1</replaceable>
-      of type
-      <replaceable class="PARAMETER">sfunc1_return_type</replaceable>
-      and
-      <replaceable class="PARAMETER">column_value</replaceable>
-      of type <replaceable class="PARAMETER">data_type</replaceable>.
-      The return value must  be of type
-      <replaceable class="PARAMETER">sfunc1_return_type</replaceable>
-      and will be used as the first argument in the next call to 
-      <replaceable class="PARAMETER">sfunc1</replaceable>.
-     </para>
-    </listitem>
-    
-    <listitem>
-     <para>
-      The  argument and return value of 
-      <replaceable class="PARAMETER">sfunc2</replaceable>
-      must be
-      <replaceable class="PARAMETER">temp2</replaceable>
-      of type
-      <replaceable class="PARAMETER">sfunc2_return_type</replaceable>.
-     </para>
-    </listitem>
-    <listitem>     
-     <para>
-      The  arguments  to  the  final-calculation-function
-      must  be
-      <replaceable class="PARAMETER">temp1</replaceable>
-      and
-      <replaceable class="PARAMETER">temp2</replaceable>
-      and its return value must
-      be a <productname>Postgres</productname>
-      base type (not necessarily
-      <replaceable class="PARAMETER">data_type</replaceable> 
-      which had been specified for BASETYPE).
-     </para>
-    </listitem>
-    <listitem>
-     <para>    
-      FINALFUNC should be specified
-      if and only if both state-transition functions  are
-      specified. 
-     </para></listitem>
-   </itemizedlist>
+   <replaceable class="PARAMETER">ffunc</replaceable> must be specified if
+   both transition functions are specified.  If only one transition function
+   is used, then <replaceable class="PARAMETER">ffunc</replaceable> is
+   optional.  The default behavior when
+   <replaceable class="PARAMETER">ffunc</replaceable> is not provided is
+   to return the ending value of the internal state value being used
+   (and, therefore, the aggregate's output type is the same as that
+   state value's type).
    </para>  
    
    <para>   
-   An aggregate function may also  require  one or two initial conditions,
- one for
-   each transition function.  These are specified and  stored
-   in the database as fields of type <type>text</type>.
+   An aggregate function may also provide one or two initial conditions,
+   that is, initial values for the internal state values being used.
+   These are specified and  stored in the database as fields of type
+   <type>text</type>, but they must be valid external representations
+   of constants of the state value datatypes.  If
+   <replaceable class="PARAMETER">sfunc1</replaceable> is specified 
+   without an <replaceable class="PARAMETER">initcond1</replaceable> value,
+   then the system does not call
+   <replaceable class="PARAMETER">sfunc1</replaceable> 
+   at the first input item; instead, the internal state value 1 is
+   initialized with the first input value, and
+   <replaceable class="PARAMETER">sfunc1</replaceable> is called beginning 
+   at the second input item.  This is useful for aggregates like MIN and
+   MAX.  Note that an aggregate using this feature will return NULL when
+   called with no input values.  There is no comparable provision for
+   state value 2; if <replaceable class="PARAMETER">sfunc2</replaceable> is
+   specified then an <replaceable class="PARAMETER">initcond2</replaceable> is
+   required.
    </para>
    
    <refsect2 id="R2-SQL-CREATEAGGREGATE-3">
@@ -274,18 +267,32 @@ CREATE
      to drop aggregate functions.
     </para>
  
+   <para>
+    The parameters of <command>CREATE AGGREGATE</command> can be written
+    in any order, not just the order illustrated above.
+   </para>
+
     <para>
      It  is possible to specify aggregate functions
      that have varying combinations of state  and  final  functions. 
-    For example, the <function>count</function> aggregate requires SFUNC2
-    (an incrementing function) but not  SFUNC1  or  FINALFUNC,
-    whereas  the  <function>sum</function> aggregate requires SFUNC1 (an addition
-    function) but not SFUNC2 or FINALFUNC  and  the  <function>avg</function>
+    For example, the <function>count</function> aggregate requires
+    <replaceable class="PARAMETER">sfunc2</replaceable> 
+    (an incrementing function) but not
+    <replaceable class="PARAMETER">sfunc1</replaceable>  or  
+    <replaceable class="PARAMETER">ffunc</replaceable>,
+    whereas  the  <function>sum</function> aggregate requires
+    <replaceable class="PARAMETER">sfunc1</replaceable> (an addition
+    function) but not <replaceable class="PARAMETER">sfunc2</replaceable> or
+    <replaceable class="PARAMETER">ffunc</replaceable>,  and  the
+    <function>avg</function>
      aggregate  requires 
-    both  of the above state functions as
-    well as a FINALFUNC (a division function) to  produce  its
+    both state functions as
+    well as a <replaceable class="PARAMETER">ffunc</replaceable> (a division
+    function) to  produce  its 
      answer.   In any case, at least one state function must be
-    defined, and any SFUNC2 must have  a  corresponding  INITCOND2.
+    defined, and any <replaceable class="PARAMETER">sfunc2</replaceable> must
+    have  a  corresponding
+    <replaceable class="PARAMETER">initcond2</replaceable>. 
     </para>
  
    </refsect2>
diff --git a/doc/src/sgml/xaggr.sgml b/doc/src/sgml/xaggr.sgml

index b0a5a48b28c920229983c09e2d5baf584565a323..d12cc78353d1d4688d9609fe0899f32680bdcb71 100644 (file)
--- a/doc/src/sgml/xaggr.sgml
+++ b/doc/src/sgml/xaggr.sgml
@@ -2,26 +2,57 @@
  <Title>Extending <Acronym>SQL</Acronym>: Aggregates</Title>
  
  <Para>
-     Aggregates  in <ProductName>Postgres</ProductName> 
-are expressed in terms of state
-     transition functions.  That is,  an  aggregate  can  be
+     Aggregate functions  in <ProductName>Postgres</ProductName> 
+     are expressed as <firstterm>state values</firstterm>
+     and <firstterm>state transition functions</firstterm>.
+     That is,  an  aggregate  can  be
       defined  in terms of state that is modified whenever an
-     instance is processed.  Some state functions look at  a
-     particular value in the instance when computing the new
-     state (<Acronym>sfunc1</Acronym> in the  
-create  aggregate  syntax)  while
-     others  only  keep  track  of  their own internal state
-     (<Acronym>sfunc2</Acronym>).
-     If we define an aggregate that  uses  only  
-<Acronym>sfunc1</Acronym>,  we
-     define an aggregate that computes a running function of
+     input item is processed.  To define a new aggregate
+     function, one selects a datatype for the state value,
+     an initial value for the state, and a state transition
+     function.  The state transition function is just an
+     ordinary function that could also be used outside the
+     context of the aggregate.
+</Para>
+
+<Para>
+     Actually, in order to make it easier to construct useful
+     aggregates from existing functions, an aggregate can have
+     one or two separate state values, one or two transition
+     functions to update those state values, and a
+     <firstterm>final function</firstterm> that computes the
+     actual aggregate result from the ending state values.
+</Para>
+
+<Para>
+     Thus there can be as many as four datatypes involved:
+     the type of the input data items, the type of the aggregate's
+     result, and the types of the two state values.  Only the
+     input and result datatypes are seen by a user of the aggregate.
+</Para>
+
+<Para>
+     Some state transition functions need to look at each successive
+     input to compute the next state value, while others ignore the
+     specific input value and simply update their internal state.
+     (The most useful example of the second kind is a running count
+     of the number of input items.)  The <ProductName>Postgres</ProductName>
+     aggregate machinery defines <Acronym>sfunc1</Acronym> for
+     an aggregate as a function that is passed both the old state
+     value and the current input value, while <Acronym>sfunc2</Acronym>
+     is a function that is passed only the old state value.
+</Para>
+
+<Para>
+     If we define an aggregate that  uses  only <Acronym>sfunc1</Acronym>,
+     we have an aggregate that computes a running function of
       the attribute values from each instance.  "Sum"  is  an
       example  of  this  kind  of aggregate.  "Sum" starts at
       zero and always adds the current  instance's  value  to
-     its  running  total.   We  will  use the 
-<Acronym>int4pl</Acronym> that is
-     built into <ProductName>Postgres</ProductName> 
-to perform this addition.
+     its  running  total.  For example, if we want to make a Sum
+     aggregate to work on a datatype for complex numbers,
+     we only need the addition function for that datatype.
+     The aggregate definition is:
       
  <ProgramListing>
  CREATE AGGREGATE complex_sum (
@@ -39,11 +70,15 @@ SELECT complex_sum(a) FROM test_complex;
           |(34,53.9)   |
           +------------+
  </ProgramListing>
+
+     (In practice, we'd just name the aggregate "sum", and rely on
+     <ProductName>Postgres</ProductName> to figure out which kind
+     of sum to apply to a complex column.)
  </Para>
  
  <Para>
       If we define only <Acronym>sfunc2</Acronym>, we are 
-specifying  an  aggregate  
+     specifying  an  aggregate  
       that computes a running function that is independent  of  
       the  attribute  values  from  each  instance.
       "Count"  is  the  most  common  example of this kind of
@@ -104,4 +139,10 @@ SELECT my_average(salary) as emp_average FROM EMP;
           +------------+
  </ProgramListing>
  </Para>
+
+<Para>
+    For further details see
+    <xref endterm="sql-createaggregate-title"
+    linkend="sql-createaggregate-title">.
+</Para>
  </Chapter>
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sun, 26 Mar 2000 19:45:21 +0000 (19:45 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sun, 26 Mar 2000 19:45:21 +0000 (19:45 +0000)
doc/src/sgml/ref/create_aggregate.sgml		patch \| blob \| blame \| history
doc/src/sgml/xaggr.sgml		patch \| blob \| blame \| history