Introduce a SQL-callable function array_sort(anyarray).
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 1 Apr 2025 22:03:55 +0000 (18:03 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 1 Apr 2025 22:03:55 +0000 (18:03 -0400)
Create a function that will sort the elements of an array
according to the element type's sort order.  If the array
has more than one dimension, the sub-arrays of the first
dimension are sorted per normal array-comparison rules,
leaving their contents alone.

In support of this, add pg_type.typarray to the set of fields
cached by the typcache.

Author: Junwang Zhao <zhjwpku@gmail.com>
Co-authored-by: Jian He <jian.universality@gmail.com>
Reviewed-by: Aleksander Alekseev <aleksander@timescale.com>
Discussion: https://postgr.es/m/CAEG8a3J41a4dpw_-F94fF-JPRXYxw-GfsgoGotKcjs9LVfEEvw@mail.gmail.com

doc/src/sgml/func.sgml
src/backend/utils/adt/array_userfuncs.c
src/backend/utils/cache/typcache.c
src/include/catalog/catversion.h
src/include/catalog/pg_proc.dat
src/include/utils/typcache.h
src/test/regress/expected/arrays.out
src/test/regress/expected/collate.icu.utf8.out
src/test/regress/sql/arrays.sql
src/test/regress/sql/collate.icu.utf8.sql
src/tools/pgindent/typedefs.list

index 5bf6656deca588ba20b0ce4a53dc678e74f34d9b..2488e9ba998250a6e5e75b611f2957da40927941 100644 (file)
@@ -20741,6 +20741,42 @@ SELECT NULLIF(value, '(none)') ...
        </para></entry>
       </row>
 
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>array_sort</primary>
+        </indexterm>
+        <function>array_sort</function> (
+          <parameter>array</parameter> <type>anyarray</type>
+          <optional>, <parameter>descending</parameter> <type>boolean</type>
+          <optional>, <parameter>nulls_first</parameter> <type>boolean</type>
+          </optional></optional> )
+        <returnvalue>anyarray</returnvalue>
+       </para>
+       <para>
+        Sorts the first dimension of the array.
+        The sort order is determined by the default sort ordering of the
+        array's element type; however, if the element type is collatable,
+        the collation to use can be specified by adding
+        a <literal>COLLATE</literal> clause to
+        the <parameter>array</parameter> argument.
+       </para>
+       <para>
+        If <parameter>descending</parameter> is true then sort in
+        descending order, otherwise ascending order.  If omitted, the
+        default is ascending order.
+        If <parameter>nulls_first</parameter> is true then nulls appear
+        before non-null values, otherwise nulls appear after non-null
+        values.
+        If omitted, <parameter>nulls_first</parameter> is taken to have
+        the same value as <parameter>descending</parameter>.
+       </para>
+       <para>
+        <literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
+        <returnvalue>{{2,1},{2,4},{6,5}}</returnvalue>
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm id="function-array-to-string">
index 2aae2f8ed935a2af73d217a9f460cf054880763e..8eb342e33823a19e300d59ed630b449abe5e94b8 100644 (file)
  */
 #include "postgres.h"
 
+#include "catalog/pg_operator_d.h"
 #include "catalog/pg_type.h"
 #include "common/int.h"
 #include "common/pg_prng.h"
 #include "libpq/pqformat.h"
+#include "miscadmin.h"
 #include "nodes/supportnodes.h"
 #include "port/pg_bitutils.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/datum.h"
 #include "utils/lsyscache.h"
+#include "utils/tuplesort.h"
 #include "utils/typcache.h"
 
 /*
@@ -43,6 +46,18 @@ typedef struct DeserialIOData
    Oid         typioparam;
 } DeserialIOData;
 
+/*
+ * ArraySortCachedInfo
+ *     Used for caching catalog data in array_sort
+ */
+typedef struct ArraySortCachedInfo
+{
+   ArrayMetaState array_meta;  /* metadata for array_create_iterator */
+   Oid         elem_lt_opr;    /* "<" operator for element type */
+   Oid         elem_gt_opr;    /* ">" operator for element type */
+   Oid         array_type;     /* pg_type OID of array type */
+} ArraySortCachedInfo;
+
 static Datum array_position_common(FunctionCallInfo fcinfo);
 
 
@@ -1858,3 +1873,166 @@ array_reverse(PG_FUNCTION_ARGS)
 
    PG_RETURN_ARRAYTYPE_P(result);
 }
+
+/*
+ * array_sort
+ *
+ * Sorts the first dimension of the array.
+ */
+static ArrayType *
+array_sort_internal(ArrayType *array, bool descending, bool nulls_first,
+                   FunctionCallInfo fcinfo)
+{
+   ArrayType  *newarray;
+   Oid         collation = PG_GET_COLLATION();
+   int         ndim,
+              *dims,
+              *lbs;
+   ArraySortCachedInfo *cache_info;
+   Oid         elmtyp;
+   Oid         sort_typ;
+   Oid         sort_opr;
+   Tuplesortstate *tuplesortstate;
+   ArrayIterator array_iterator;
+   Datum       value;
+   bool        isnull;
+   ArrayBuildStateAny *astate = NULL;
+
+   ndim = ARR_NDIM(array);
+   dims = ARR_DIMS(array);
+   lbs = ARR_LBOUND(array);
+
+   /* Quick exit if we don't need to sort */
+   if (ndim < 1 || dims[0] < 2)
+       return array;
+
+   /* Set up cache area if we didn't already */
+   cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
+   if (cache_info == NULL)
+   {
+       cache_info = (ArraySortCachedInfo *)
+           MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
+                                  sizeof(ArraySortCachedInfo));
+       fcinfo->flinfo->fn_extra = cache_info;
+   }
+
+   /* Fetch and cache required data if we don't have it */
+   elmtyp = ARR_ELEMTYPE(array);
+   if (elmtyp != cache_info->array_meta.element_type)
+   {
+       TypeCacheEntry *typentry;
+
+       typentry = lookup_type_cache(elmtyp,
+                                    TYPECACHE_LT_OPR | TYPECACHE_GT_OPR);
+       cache_info->array_meta.element_type = elmtyp;
+       cache_info->array_meta.typlen = typentry->typlen;
+       cache_info->array_meta.typbyval = typentry->typbyval;
+       cache_info->array_meta.typalign = typentry->typalign;
+       cache_info->elem_lt_opr = typentry->lt_opr;
+       cache_info->elem_gt_opr = typentry->gt_opr;
+       cache_info->array_type = typentry->typarray;
+   }
+
+   /* Identify the sort operator to use */
+   if (ndim == 1)
+   {
+       /* Need to sort the element type */
+       sort_typ = elmtyp;
+       sort_opr = (descending ? cache_info->elem_gt_opr : cache_info->elem_lt_opr);
+   }
+   else
+   {
+       /* Otherwise we're sorting arrays */
+       sort_typ = cache_info->array_type;
+       if (!OidIsValid(sort_typ))
+           ereport(ERROR,
+                   (errcode(ERRCODE_UNDEFINED_OBJECT),
+                    errmsg("could not find array type for data type %s",
+                           format_type_be(elmtyp))));
+       /* We know what operators to use for arrays */
+       sort_opr = (descending ? ARRAY_GT_OP : ARRAY_LT_OP);
+   }
+
+   /*
+    * Fail if we don't know how to sort.  The error message is chosen to
+    * match what array_lt()/array_gt() will say in the multidimensional case.
+    */
+   if (!OidIsValid(sort_opr))
+       ereport(ERROR,
+               errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+               errmsg("could not identify a comparison function for type %s",
+                      format_type_be(elmtyp)));
+
+   /* Put the things to be sorted (elements or sub-arrays) into a tuplesort */
+   tuplesortstate = tuplesort_begin_datum(sort_typ,
+                                          sort_opr,
+                                          collation,
+                                          nulls_first,
+                                          work_mem,
+                                          NULL,
+                                          TUPLESORT_NONE);
+
+   array_iterator = array_create_iterator(array, ndim - 1,
+                                          &cache_info->array_meta);
+   while (array_iterate(array_iterator, &value, &isnull))
+   {
+       tuplesort_putdatum(tuplesortstate, value, isnull);
+   }
+   array_free_iterator(array_iterator);
+
+   /* Do the sort */
+   tuplesort_performsort(tuplesortstate);
+
+   /* Extract results into a new array */
+   while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
+   {
+       astate = accumArrayResultAny(astate, value, isnull,
+                                    sort_typ, CurrentMemoryContext);
+   }
+   tuplesort_end(tuplesortstate);
+
+   newarray = DatumGetArrayTypeP(makeArrayResultAny(astate,
+                                                    CurrentMemoryContext,
+                                                    true));
+
+   /* Adjust lower bound to match the input */
+   ARR_LBOUND(newarray)[0] = lbs[0];
+
+   return newarray;
+}
+
+Datum
+array_sort(PG_FUNCTION_ARGS)
+{
+   ArrayType  *array = PG_GETARG_ARRAYTYPE_P(0);
+
+   PG_RETURN_ARRAYTYPE_P(array_sort_internal(array,
+                                             false,
+                                             false,
+                                             fcinfo));
+}
+
+Datum
+array_sort_order(PG_FUNCTION_ARGS)
+{
+   ArrayType  *array = PG_GETARG_ARRAYTYPE_P(0);
+   bool        descending = PG_GETARG_BOOL(1);
+
+   PG_RETURN_ARRAYTYPE_P(array_sort_internal(array,
+                                             descending,
+                                             descending,
+                                             fcinfo));
+}
+
+Datum
+array_sort_order_nulls_first(PG_FUNCTION_ARGS)
+{
+   ArrayType  *array = PG_GETARG_ARRAYTYPE_P(0);
+   bool        descending = PG_GETARG_BOOL(1);
+   bool        nulls_first = PG_GETARG_BOOL(2);
+
+   PG_RETURN_ARRAYTYPE_P(array_sort_internal(array,
+                                             descending,
+                                             nulls_first,
+                                             fcinfo));
+}
index 5a3b3788d0268d5240eb001f65ce2997672b6bac..ae65a1cce06c5e875e3060d3ed21a7ffa929c55e 100644 (file)
@@ -499,6 +499,7 @@ lookup_type_cache(Oid type_id, int flags)
        typentry->typrelid = typtup->typrelid;
        typentry->typsubscript = typtup->typsubscript;
        typentry->typelem = typtup->typelem;
+       typentry->typarray = typtup->typarray;
        typentry->typcollation = typtup->typcollation;
        typentry->flags |= TCFLAGS_HAVE_PG_TYPE_DATA;
 
@@ -544,6 +545,7 @@ lookup_type_cache(Oid type_id, int flags)
        typentry->typrelid = typtup->typrelid;
        typentry->typsubscript = typtup->typsubscript;
        typentry->typelem = typtup->typelem;
+       typentry->typarray = typtup->typarray;
        typentry->typcollation = typtup->typcollation;
        typentry->flags |= TCFLAGS_HAVE_PG_TYPE_DATA;
 
index 8b96f3b8bf2eb31da753260139d800c67f1e2302..8b4ee1fa13f08c9d34db2697bd0c9988be299179 100644 (file)
@@ -57,6 +57,6 @@
  */
 
 /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 202504011
+#define CATALOG_VERSION_NO 202504012
 
 #endif
index d9c41fa426b3aa58686eb87bee26b604b4c2f12d..6b57b7e18d9890149228298f605d0bf0d2c4fb66 100644 (file)
 { oid => '8686', descr => 'reverse array',
   proname => 'array_reverse', prorettype => 'anyarray',
   proargtypes => 'anyarray', prosrc => 'array_reverse' },
+{ oid => '8810', descr => 'sort array',
+  proname => 'array_sort', prorettype => 'anyarray', proargtypes => 'anyarray',
+  prosrc => 'array_sort' },
+{ oid => '8811', descr => 'sort array',
+  proname => 'array_sort', prorettype => 'anyarray',
+  proargtypes => 'anyarray bool', proargnames => '{array,descending}',
+  prosrc => 'array_sort_order' },
+{ oid => '8812', descr => 'sort array',
+  proname => 'array_sort', prorettype => 'anyarray',
+  proargtypes => 'anyarray bool bool',
+  proargnames => '{array,descending,nulls_first}',
+  prosrc => 'array_sort_order_nulls_first' },
 { oid => '3816', descr => 'array typanalyze',
   proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
   proargtypes => 'internal', prosrc => 'array_typanalyze' },
index 562a581333aa27f5d5f1f302f08955f6fd0dde05..1cb30f1818c2b8513e2b26414019477a24a3e5c4 100644 (file)
@@ -44,6 +44,7 @@ typedef struct TypeCacheEntry
    Oid         typrelid;
    Oid         typsubscript;
    Oid         typelem;
+   Oid         typarray;
    Oid         typcollation;
 
    /*
index 7afd7356bbebb66dad0dd71a985182ed6fc8651c..b815473f414b20d792fcbe358c143867b384d155 100644 (file)
@@ -2860,3 +2860,145 @@ SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
  {{7,8},{5,6},{3,4},{1,2}}
 (1 row)
 
+-- array_sort
+SELECT array_sort('{}'::int[]);
+ array_sort 
+------------
+ {}
+(1 row)
+
+SELECT array_sort('{1}'::int[]);
+ array_sort 
+------------
+ {1}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+  array_sort   
+---------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+        array_sort         
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+      array_sort       
+-----------------------
+ {Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+         array_sort         
+----------------------------
+ {Abc,CCC,bar,bbc,foo,NULL}
+(1 row)
+
+SELECT array_sort(ARRAY(SELECT '1 4'::int2vector UNION ALL SELECT '1 2'::int2vector));
+  array_sort   
+---------------
+ {"1 2","1 4"}
+(1 row)
+
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+           array_sort           
+--------------------------------
+ {NULL,6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+           array_sort           
+--------------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6,NULL}
+(1 row)
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+           array_sort           
+--------------------------------
+ {NULL,6.6,5.5,4.4,3.3,2.2,1.1}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
+           array_sort           
+--------------------------------
+ {6.6,5.5,4.4,3.3,2.2,1.1,NULL}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
+           array_sort           
+--------------------------------
+ {NULL,1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+           array_sort           
+--------------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6,NULL}
+(1 row)
+
+-- multidimensional array tests
+SELECT array_sort('{{1}}'::int[]);
+ array_sort 
+------------
+ {{1}}
+(1 row)
+
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+     array_sort      
+---------------------
+ {{2,1},{2,4},{6,5}}
+(1 row)
+
+SELECT array_sort('{{"1 2","3 4"}, {"1 -2","-1 4"}}'::int2vector[]);
+           array_sort            
+---------------------------------
+ {{"1 -2","-1 4"},{"1 2","3 4"}}
+(1 row)
+
+-- no ordering operator tests
+SELECT array_sort('{1}'::xid[]);  -- no error because no sort is required
+ array_sort 
+------------
+ {1}
+(1 row)
+
+SELECT array_sort('{1,2,3}'::xid[]);
+ERROR:  could not identify a comparison function for type xid
+SELECT array_sort('{{1,2,3},{2,3,4}}'::xid[]);
+ERROR:  could not identify a comparison function for type xid
+-- bounds preservation tests
+SELECT array_sort(a) FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);
+              array_sort              
+--------------------------------------
+ [10:12][20:21]={{1,2},{3,4},{10,20}}
+(1 row)
+
+SELECT array_sort(a) FROM (VALUES ('[-1:0]={7,1}'::int[])) v(a);
+  array_sort  
+--------------
+ [-1:0]={1,7}
+(1 row)
+
+SELECT array_sort(a) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+              array_sort              
+--------------------------------------
+ [-2:0][20:21]={{1,-4},{1,2},{10,20}}
+(1 row)
+
+SELECT array_sort(a [-1:0]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+    array_sort    
+------------------
+ {{1,-4},{10,20}}
+(1 row)
+
+SELECT array_sort(a [-1:0][20:20]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+ array_sort 
+------------
+ {{1},{10}}
+(1 row)
+
index aee4755c08349e3a38412f54a43c33aa5a7b4e4e..69805d4b9ec53203d5e63d96ed13cef03191869e 100644 (file)
@@ -1471,6 +1471,19 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
  t        | t
 (1 row)
 
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+ array_sort 
+------------
+ {a,B}
+(1 row)
+
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+ array_sort 
+------------
+ {B,a}
+(1 row)
+
 -- test language tags
 CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
 SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
index 399a0797f3bd309dbd5168c6256e5bc7cac59a3c..47d62c1d38d28e536417e30a3a9f862c7f9e0807 100644 (file)
@@ -856,3 +856,39 @@ SELECT array_reverse('{1}'::int[]);
 SELECT array_reverse('{1,2}'::int[]);
 SELECT array_reverse('{1,2,3,NULL,4,5,6}'::int[]);
 SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
+
+-- array_sort
+SELECT array_sort('{}'::int[]);
+SELECT array_sort('{1}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+SELECT array_sort(ARRAY(SELECT '1 4'::int2vector UNION ALL SELECT '1 2'::int2vector));
+
+-- array_sort with order specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false);
+
+-- array_sort with order and nullsfirst flag specified
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], true, false);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, true);
+SELECT array_sort('{1.1,3.3,5.5,2.2,null,4.4,6.6}'::float8[], false, false);
+
+-- multidimensional array tests
+SELECT array_sort('{{1}}'::int[]);
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+SELECT array_sort('{{"1 2","3 4"}, {"1 -2","-1 4"}}'::int2vector[]);
+
+-- no ordering operator tests
+SELECT array_sort('{1}'::xid[]);  -- no error because no sort is required
+SELECT array_sort('{1,2,3}'::xid[]);
+SELECT array_sort('{{1,2,3},{2,3,4}}'::xid[]);
+
+-- bounds preservation tests
+SELECT array_sort(a) FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);
+SELECT array_sort(a) FROM (VALUES ('[-1:0]={7,1}'::int[])) v(a);
+SELECT array_sort(a) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+SELECT array_sort(a [-1:0]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+SELECT array_sort(a [-1:0][20:20]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
index 38ebcd99508422a20d44480abffb389ca5908e27..dbc190227d02220e543a6ec1ea350b19cf4726fc 100644 (file)
@@ -564,6 +564,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
 SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
 SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
 
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+
 -- test language tags
 CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
 SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
index b66cecd8799109e4f3700b4b366ef213c441df74..449bafc123c5b7e4d114843a2c2c7bf3015498bd 100644 (file)
@@ -154,6 +154,7 @@ ArrayIOData
 ArrayIterator
 ArrayMapState
 ArrayMetaState
+ArraySortCachedInfo
 ArraySubWorkspace
 ArrayToken
 ArrayType