Collations with nondeterministic comparison
authorPeter Eisentraut <peter@eisentraut.org>
Fri, 22 Mar 2019 11:09:32 +0000 (12:09 +0100)
committerPeter Eisentraut <peter@eisentraut.org>
Fri, 22 Mar 2019 11:12:43 +0000 (12:12 +0100)
This adds a flag "deterministic" to collations.  If that is false,
such a collation disables various optimizations that assume that
strings are equal only if they are byte-wise equal.  That then allows
use cases such as case-insensitive or accent-insensitive comparisons
or handling of strings with different Unicode normal forms.

This functionality is only supported with the ICU provider.  At least
glibc doesn't appear to have any locales that work in a
nondeterministic way, so it's not worth supporting this for the libc
provider.

The term "deterministic comparison" in this context is from Unicode
Technical Standard #10
(https://unicode.org/reports/tr10/#Deterministic_Comparison).

This patch makes changes in three areas:

- CREATE COLLATION DDL changes and system catalog changes to support
  this new flag.

- Many executor nodes and auxiliary code are extended to track
  collations.  Previously, this code would just throw away collation
  information, because the eventually-called user-defined functions
  didn't use it since they only cared about equality, which didn't
  need collation information.

- String data type functions that do equality comparisons and hashing
  are changed to take the (non-)deterministic flag into account.  For
  comparison, this just means skipping various shortcuts and tie
  breakers that use byte-wise comparison.  For hashing, we first need
  to convert the input string to a canonical "sort key" using the ICU
  analogue of strxfrm().

Reviewed-by: Daniel Verite <daniel@manitou-mail.org>
Reviewed-by: Peter Geoghegan <pg@bowt.ie>
Discussion: https://www.postgresql.org/message-id/flat/1ccc668f-4cbc-0bef-af67-450b47cdfee7@2ndquadrant.com

69 files changed:
contrib/bloom/bloom.h
contrib/bloom/blutils.c
doc/src/sgml/catalogs.sgml
doc/src/sgml/charset.sgml
doc/src/sgml/citext.sgml
doc/src/sgml/func.sgml
doc/src/sgml/ref/create_collation.sgml
src/backend/access/hash/hashfunc.c
src/backend/access/spgist/spgtextproc.c
src/backend/catalog/pg_collation.c
src/backend/commands/collationcmds.c
src/backend/commands/extension.c
src/backend/executor/execExpr.c
src/backend/executor/execGrouping.c
src/backend/executor/execPartition.c
src/backend/executor/execReplication.c
src/backend/executor/nodeAgg.c
src/backend/executor/nodeGroup.c
src/backend/executor/nodeHash.c
src/backend/executor/nodeHashjoin.c
src/backend/executor/nodeRecursiveunion.c
src/backend/executor/nodeSetOp.c
src/backend/executor/nodeSubplan.c
src/backend/executor/nodeUnique.c
src/backend/executor/nodeWindowAgg.c
src/backend/nodes/copyfuncs.c
src/backend/nodes/outfuncs.c
src/backend/nodes/readfuncs.c
src/backend/optimizer/plan/createplan.c
src/backend/optimizer/util/tlist.c
src/backend/partitioning/partbounds.c
src/backend/partitioning/partprune.c
src/backend/regex/regc_pg_locale.c
src/backend/utils/adt/arrayfuncs.c
src/backend/utils/adt/like.c
src/backend/utils/adt/like_support.c
src/backend/utils/adt/name.c
src/backend/utils/adt/orderedsetaggs.c
src/backend/utils/adt/pg_locale.c
src/backend/utils/adt/ri_triggers.c
src/backend/utils/adt/varchar.c
src/backend/utils/adt/varlena.c
src/backend/utils/cache/catcache.c
src/backend/utils/cache/lsyscache.c
src/bin/initdb/initdb.c
src/bin/pg_dump/pg_dump.c
src/bin/psql/describe.c
src/include/catalog/catversion.h
src/include/catalog/pg_collation.h
src/include/executor/executor.h
src/include/executor/hashjoin.h
src/include/executor/nodeHash.h
src/include/nodes/execnodes.h
src/include/nodes/plannodes.h
src/include/optimizer/planmain.h
src/include/optimizer/tlist.h
src/include/partitioning/partbounds.h
src/include/utils/lsyscache.h
src/include/utils/pg_locale.h
src/test/regress/expected/collate.icu.utf8.out
src/test/regress/expected/collate.linux.utf8.out
src/test/regress/expected/collate.out
src/test/regress/expected/subselect.out
src/test/regress/sql/collate.icu.utf8.sql
src/test/regress/sql/collate.linux.utf8.sql
src/test/regress/sql/collate.sql
src/test/regress/sql/subselect.sql
src/test/subscription/Makefile
src/test/subscription/t/012_collation.pl [new file with mode: 0644]

index d641361aef1f1003c2c37485665a8aae6d2b22d0..7c18eaa50866193f4b882e27169d3f1e7ecca459 100644 (file)
@@ -137,6 +137,7 @@ typedef struct BloomMetaPageData
 typedef struct BloomState
 {
    FmgrInfo    hashFn[INDEX_MAX_KEYS];
+   Oid         collations[INDEX_MAX_KEYS];
    BloomOptions opts;          /* copy of options on index's metapage */
    int32       nColumns;
 
index 645837657871575628d92e1a83927b8341478a57..d078dfbd469bc470ba34179e1477efccf3084bad 100644 (file)
@@ -163,6 +163,7 @@ initBloomState(BloomState *state, Relation index)
        fmgr_info_copy(&(state->hashFn[i]),
                       index_getprocinfo(index, i + 1, BLOOM_HASH_PROC),
                       CurrentMemoryContext);
+       state->collations[i] = index->rd_indcollation[i];
    }
 
    /* Initialize amcache if needed with options from metapage */
@@ -267,7 +268,7 @@ signValue(BloomState *state, BloomSignatureWord *sign, Datum value, int attno)
     * different columns will be mapped into different bits because of step
     * above
     */
-   hashVal = DatumGetInt32(FunctionCall1(&state->hashFn[attno], value));
+   hashVal = DatumGetInt32(FunctionCall1Coll(&state->hashFn[attno], state->collations[attno], value));
    mySrand(hashVal ^ myRand());
 
    for (j = 0; j < state->opts.bitSize[attno]; j++)
index 0fd792ff1a2f92adc451be4329df3a56a9c1b90d..45ed077654e3086a0587854ad2f50563198f6ebc 100644 (file)
@@ -2077,6 +2077,13 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
        default, <literal>c</literal> = libc, <literal>i</literal> = icu</entry>
      </row>
 
+     <row>
+      <entry><structfield>collisdeterministic</structfield></entry>
+      <entry><type>bool</type></entry>
+      <entry></entry>
+      <entry>Is the collation deterministic?</entry>
+     </row>
+
      <row>
       <entry><structfield>collencoding</structfield></entry>
       <entry><type>int4</type></entry>
index a6143ef8a744e8db8c9024e07902a14a47a3da59..555d1b4ac6316a9d06f1039340a552b9e0116b2b 100644 (file)
@@ -847,11 +847,13 @@ CREATE COLLATION german (provider = libc, locale = 'de_DE');
 
    <para>
     Note that while this system allows creating collations that <quote>ignore
-    case</quote> or <quote>ignore accents</quote> or similar (using
-    the <literal>ks</literal> key), PostgreSQL does not at the moment allow
-    such collations to act in a truly case- or accent-insensitive manner.  Any
-    strings that compare equal according to the collation but are not
-    byte-wise equal will be sorted according to their byte values.
+    case</quote> or <quote>ignore accents</quote> or similar (using the
+    <literal>ks</literal> key), in order for such collations to act in a
+    truly case- or accent-insensitive manner, they also need to be declared as not
+    <firstterm>deterministic</firstterm> in <command>CREATE COLLATION</command>;
+    see <xref linkend="collation-nondeterministic"/>.
+    Otherwise, any strings that compare equal according to the collation but
+    are not byte-wise equal will be sorted according to their byte values.
    </para>
 
    <note>
@@ -883,6 +885,55 @@ CREATE COLLATION french FROM "fr-x-icu";
    </para>
    </sect4>
    </sect3>
+
+   <sect3 id="collation-nondeterministic">
+    <title>Nondeterminstic Collations</title>
+
+    <para>
+     A collation is either <firstterm>deterministic</firstterm> or
+     <firstterm>nondeterministic</firstterm>.  A deterministic collation uses
+     deterministic comparisons, which means that it considers strings to be
+     equal only if they consist of the same byte sequence.  Nondeterministic
+     comparison may determine strings to be equal even if they consist of
+     different bytes.  Typical situations include case-insensitive comparison,
+     accent-insensitive comparison, as well as comparion of strings in
+     different Unicode normal forms.  It is up to the collation provider to
+     actually implement such insensitive comparisons; the deterministic flag
+     only determines whether ties are to be broken using bytewise comparison.
+     See also <ulink url="https://unicode.org/reports/tr10">Unicode Technical
+     Standard 10</ulink> for more information on the terminology.
+    </para>
+
+    <para>
+     To create a nondeterministic collation, specify the property
+     <literal>deterministic = false</literal> to <command>CREATE
+     COLLATION</command>, for example:
+<programlisting>
+CREATE COLLATION ndcoll (provider = icu, locale = 'und', deterministic = false);
+</programlisting>
+     This example would use the standard Unicode collation in a
+     nondeterministic way.  In particular, this would allow strings in
+     different normal forms to be compared correctly.  More interesting
+     examples make use of the ICU customization facilities explained above.
+     For example:
+<programlisting>
+CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false);
+CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-true', deterministic = false);
+</programlisting>
+    </para>
+
+    <para>
+     All standard and predefined collations are deterministic, all
+     user-defined collations are deterministic by default.  While
+     nondeterministic collations give a more <quote>correct</quote> behavior,
+     especially when considering the full power of Unicode and its many
+     special cases, they also have some drawbacks.  Foremost, their use leads
+     to a performance penalty.  Also, certain operations are not possible with
+     nondeterministic collations, such as pattern matching operations.
+     Therefore, they should be used only in cases where they are specifically
+     wanted.
+    </para>
+   </sect3>
   </sect2>
  </sect1>
 
index b1fe7101b20907cc315b3d1f8d352eaffae44df4..85aa339d8bafa742b2a6665937d498c4ab1d85df 100644 (file)
   exactly like <type>text</type>.
  </para>
 
+ <tip>
+  <para>
+   Consider using <firstterm>nondeterministic collations</firstterm> (see
+   <xref linkend="collation-nondeterministic"/>) instead of this module.  They
+   can be used for case-insensitive comparisons, accent-insensitive
+   comparisons, and other combinations, and they handle more Unicode special
+   cases correctly.
+  </para>
+ </tip>
+
  <sect2>
   <title>Rationale</title>
 
@@ -246,6 +256,17 @@ SELECT * FROM users WHERE nick = 'Larry';
       will be invoked instead.
     </para>
     </listitem>
+
+    <listitem>
+     <para>
+      The approach of lower-casing strings for comparison does not handle some
+      Unicode special cases correctly, for example when one upper-case letter
+      has two lower-case letter equivalents.  Unicode distinguishes between
+      <firstterm>case mapping</firstterm> and <firstterm>case
+      folding</firstterm> for this reason.  Use nondeterministic collations
+      instead of <type>citext</type> to handle that correctly.
+     </para>
+    </listitem>
    </itemizedlist>
  </sect2>
 
index 3a99e209a2b0b5df1679e435cf44a4f4b7630771..1a014732919b6ef03c73784424d43b036f86c325 100644 (file)
@@ -4065,6 +4065,12 @@ cast(-44 as bit(12))           <lineannotation>111111010100</lineannotation>
     </para>
    </caution>
 
+   <para>
+    The pattern matching operators of all three kinds do not support
+    nondeterministic collations.  If required, apply a different collation to
+    the expression to work around this limitation.
+   </para>
+
   <sect2 id="functions-like">
    <title><function>LIKE</function></title>
 
index 038797fce116520361337c18ec17a1623047f0a6..def4dda6e8899be946b6b88e044871304262f259 100644 (file)
@@ -23,6 +23,7 @@ CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> (
     [ LC_COLLATE = <replaceable>lc_collate</replaceable>, ]
     [ LC_CTYPE = <replaceable>lc_ctype</replaceable>, ]
     [ PROVIDER = <replaceable>provider</replaceable>, ]
+    [ DETERMINISTIC = <replaceable>boolean</replaceable>, ]
     [ VERSION = <replaceable>version</replaceable> ]
 )
 CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> FROM <replaceable>existing_collation</replaceable>
@@ -124,6 +125,27 @@ CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> FROM <replace
      </listitem>
     </varlistentry>
 
+    <varlistentry>
+     <term><literal>DETERMINISTIC</literal></term>
+
+     <listitem>
+      <para>
+       Specifies whether the collation should use deterministic comparisons.
+       The default is true.  A deterministic comparison considers strings that
+       are not byte-wise equal to be unequal even if they are considered
+       logically equal by the comparison.  PostgreSQL breaks ties using a
+       byte-wise comparison.  Comparison that is not deterministic can make the
+       collation be, say, case- or accent-insensitive.  For that, you need to
+       choose an appropriate <literal>LC_COLLATE</literal> setting
+       <emphasis>and</emphasis> set the collation to not deterministic here.
+      </para>
+
+      <para>
+       Nondeterministic collations are only supported with the ICU provider.
+      </para>
+     </listitem>
+    </varlistentry>
+
     <varlistentry>
      <term><replaceable>version</replaceable></term>
 
index e5f3d42e0454859fff2c31cb52a6d5cc8b591f54..0bf15ae7236c559363ed9805248e0cf8d4f1275b 100644 (file)
 #include "postgres.h"
 
 #include "access/hash.h"
+#include "catalog/pg_collation.h"
 #include "utils/builtins.h"
 #include "utils/hashutils.h"
+#include "utils/pg_locale.h"
 
 /*
  * Datatype-specific hash functions.
@@ -243,15 +245,51 @@ Datum
 hashtext(PG_FUNCTION_ARGS)
 {
    text       *key = PG_GETARG_TEXT_PP(0);
+   Oid         collid = PG_GET_COLLATION();
+   pg_locale_t mylocale = 0;
    Datum       result;
 
-   /*
-    * Note: this is currently identical in behavior to hashvarlena, but keep
-    * it as a separate function in case we someday want to do something
-    * different in non-C locales.  (See also hashbpchar, if so.)
-    */
-   result = hash_any((unsigned char *) VARDATA_ANY(key),
-                     VARSIZE_ANY_EXHDR(key));
+   if (!collid)
+       ereport(ERROR,
+               (errcode(ERRCODE_INDETERMINATE_COLLATION),
+                errmsg("could not determine which collation to use for string hashing"),
+                errhint("Use the COLLATE clause to set the collation explicitly.")));
+
+   if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+       mylocale = pg_newlocale_from_collation(collid);
+
+   if (!mylocale || mylocale->deterministic)
+   {
+       result = hash_any((unsigned char *) VARDATA_ANY(key),
+                         VARSIZE_ANY_EXHDR(key));
+   }
+   else
+   {
+#ifdef USE_ICU
+       if (mylocale->provider == COLLPROVIDER_ICU)
+       {
+           int32_t     ulen = -1;
+           UChar      *uchar = NULL;
+           Size        bsize;
+           uint8_t    *buf;
+
+           ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
+
+           bsize = ucol_getSortKey(mylocale->info.icu.ucol,
+                                   uchar, ulen, NULL, 0);
+           buf = palloc(bsize);
+           ucol_getSortKey(mylocale->info.icu.ucol,
+                           uchar, ulen, buf, bsize);
+
+           result = hash_any(buf, bsize);
+
+           pfree(buf);
+       }
+       else
+#endif
+           /* shouldn't happen */
+           elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
+   }
 
    /* Avoid leaking memory for toasted inputs */
    PG_FREE_IF_COPY(key, 0);
@@ -263,12 +301,52 @@ Datum
 hashtextextended(PG_FUNCTION_ARGS)
 {
    text       *key = PG_GETARG_TEXT_PP(0);
+   Oid         collid = PG_GET_COLLATION();
+   pg_locale_t mylocale = 0;
    Datum       result;
 
-   /* Same approach as hashtext */
-   result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
-                              VARSIZE_ANY_EXHDR(key),
-                              PG_GETARG_INT64(1));
+   if (!collid)
+       ereport(ERROR,
+               (errcode(ERRCODE_INDETERMINATE_COLLATION),
+                errmsg("could not determine which collation to use for string hashing"),
+                errhint("Use the COLLATE clause to set the collation explicitly.")));
+
+   if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+       mylocale = pg_newlocale_from_collation(collid);
+
+   if (!mylocale || mylocale->deterministic)
+   {
+       result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
+                                  VARSIZE_ANY_EXHDR(key),
+                                  PG_GETARG_INT64(1));
+   }
+   else
+   {
+#ifdef USE_ICU
+       if (mylocale->provider == COLLPROVIDER_ICU)
+       {
+           int32_t     ulen = -1;
+           UChar      *uchar = NULL;
+           Size        bsize;
+           uint8_t    *buf;
+
+           ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
+
+           bsize = ucol_getSortKey(mylocale->info.icu.ucol,
+                                   uchar, ulen, NULL, 0);
+           buf = palloc(bsize);
+           ucol_getSortKey(mylocale->info.icu.ucol,
+                           uchar, ulen, buf, bsize);
+
+           result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
+
+           pfree(buf);
+       }
+       else
+#endif
+           /* shouldn't happen */
+           elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
+   }
 
    PG_FREE_IF_COPY(key, 0);
 
index 39cd391529c2194fda1c055aefbd9faad556b694..d22998c54bf4424d991bf0cfa2f5cea4deec5f68 100644 (file)
@@ -630,7 +630,8 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS)
             * query (prefix) string, so we don't need to check it again.
             */
            res = (level >= queryLen) ||
-               DatumGetBool(DirectFunctionCall2(text_starts_with,
+               DatumGetBool(DirectFunctionCall2Coll(text_starts_with,
+                                                    PG_GET_COLLATION(),
                                                 out->leafValue,
                                                 PointerGetDatum(query)));
 
index 74e1e82cb9cac03ebc06414e196b62a059f6fdec..dd99d53547f3563e71a123bdff50ce450e6f77a0 100644 (file)
@@ -46,6 +46,7 @@ Oid
 CollationCreate(const char *collname, Oid collnamespace,
                Oid collowner,
                char collprovider,
+               bool collisdeterministic,
                int32 collencoding,
                const char *collcollate, const char *collctype,
                const char *collversion,
@@ -160,6 +161,7 @@ CollationCreate(const char *collname, Oid collnamespace,
    values[Anum_pg_collation_collnamespace - 1] = ObjectIdGetDatum(collnamespace);
    values[Anum_pg_collation_collowner - 1] = ObjectIdGetDatum(collowner);
    values[Anum_pg_collation_collprovider - 1] = CharGetDatum(collprovider);
+   values[Anum_pg_collation_collisdeterministic - 1] = BoolGetDatum(collisdeterministic);
    values[Anum_pg_collation_collencoding - 1] = Int32GetDatum(collencoding);
    namestrcpy(&name_collate, collcollate);
    values[Anum_pg_collation_collcollate - 1] = NameGetDatum(&name_collate);
index ed3f1c12e57e1189f84abc6314bf2c3d62bf2402..919e092483ae7caf99a81549bc9a88140eb3666e 100644 (file)
@@ -59,10 +59,12 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
    DefElem    *lccollateEl = NULL;
    DefElem    *lcctypeEl = NULL;
    DefElem    *providerEl = NULL;
+   DefElem    *deterministicEl = NULL;
    DefElem    *versionEl = NULL;
    char       *collcollate = NULL;
    char       *collctype = NULL;
    char       *collproviderstr = NULL;
+   bool        collisdeterministic = true;
    int         collencoding = 0;
    char        collprovider = 0;
    char       *collversion = NULL;
@@ -91,6 +93,8 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
            defelp = &lcctypeEl;
        else if (strcmp(defel->defname, "provider") == 0)
            defelp = &providerEl;
+       else if (strcmp(defel->defname, "deterministic") == 0)
+           defelp = &deterministicEl;
        else if (strcmp(defel->defname, "version") == 0)
            defelp = &versionEl;
        else
@@ -125,6 +129,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
        collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate));
        collctype = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype));
        collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
+       collisdeterministic = ((Form_pg_collation) GETSTRUCT(tp))->collisdeterministic;
        collencoding = ((Form_pg_collation) GETSTRUCT(tp))->collencoding;
 
        ReleaseSysCache(tp);
@@ -157,6 +162,9 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
    if (providerEl)
        collproviderstr = defGetString(providerEl);
 
+   if (deterministicEl)
+       collisdeterministic = defGetBoolean(deterministicEl);
+
    if (versionEl)
        collversion = defGetString(versionEl);
 
@@ -185,6 +193,16 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
                (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
                 errmsg("parameter \"lc_ctype\" must be specified")));
 
+   /*
+    * Nondeterministic collations are currently only supported with ICU
+    * because that's the only case where it can actually make a difference.
+    * So we can save writing the code for the other providers.
+    */
+   if (!collisdeterministic && collprovider != COLLPROVIDER_ICU)
+       ereport(ERROR,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("nondeterministic collations not supported with this provider")));
+
    if (!fromEl)
    {
        if (collprovider == COLLPROVIDER_ICU)
@@ -203,6 +221,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
                             collNamespace,
                             GetUserId(),
                             collprovider,
+                            collisdeterministic,
                             collencoding,
                             collcollate,
                             collctype,
@@ -586,7 +605,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
             * about existing ones.
             */
            collid = CollationCreate(localebuf, nspid, GetUserId(),
-                                    COLLPROVIDER_LIBC, enc,
+                                    COLLPROVIDER_LIBC, true, enc,
                                     localebuf, localebuf,
                                     get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
                                     true, true);
@@ -647,7 +666,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
            int         enc = aliases[i].enc;
 
            collid = CollationCreate(alias, nspid, GetUserId(),
-                                    COLLPROVIDER_LIBC, enc,
+                                    COLLPROVIDER_LIBC, true, enc,
                                     locale, locale,
                                     get_collation_actual_version(COLLPROVIDER_LIBC, locale),
                                     true, true);
@@ -709,7 +728,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
 
            collid = CollationCreate(psprintf("%s-x-icu", langtag),
                                     nspid, GetUserId(),
-                                    COLLPROVIDER_ICU, -1,
+                                    COLLPROVIDER_ICU, true, -1,
                                     collcollate, collcollate,
                                     get_collation_actual_version(COLLPROVIDER_ICU, collcollate),
                                     true, true);
index daf3f516362d323b0e1e13ed3d12843f85e1de10..d4723fced898785d3f371315b457e340bb70749e 100644 (file)
@@ -901,7 +901,8 @@ execute_extension_script(Oid extensionOid, ExtensionControlFile *control,
        {
            const char *qSchemaName = quote_identifier(schemaName);
 
-           t_sql = DirectFunctionCall3(replace_text,
+           t_sql = DirectFunctionCall3Coll(replace_text,
+                                           C_COLLATION_OID,
                                        t_sql,
                                        CStringGetTextDatum("@extschema@"),
                                        CStringGetTextDatum(qSchemaName));
@@ -913,7 +914,8 @@ execute_extension_script(Oid extensionOid, ExtensionControlFile *control,
         */
        if (control->module_pathname)
        {
-           t_sql = DirectFunctionCall3(replace_text,
+           t_sql = DirectFunctionCall3Coll(replace_text,
+                                           C_COLLATION_OID,
                                        t_sql,
                                        CStringGetTextDatum("MODULE_PATHNAME"),
                                        CStringGetTextDatum(control->module_pathname));
index 7cbf9d3bc1c786d0ce10fc5e7c9f4486107ac5dc..0fb31f5c3d395be5416d601b15d7256af262212a 100644 (file)
@@ -3317,6 +3317,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc,
                       int numCols,
                       const AttrNumber *keyColIdx,
                       const Oid *eqfunctions,
+                      const Oid *collations,
                       PlanState *parent)
 {
    ExprState  *state = makeNode(ExprState);
@@ -3377,6 +3378,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc,
        Form_pg_attribute latt = TupleDescAttr(ldesc, attno - 1);
        Form_pg_attribute ratt = TupleDescAttr(rdesc, attno - 1);
        Oid         foid = eqfunctions[natt];
+       Oid         collid = collations[natt];
        FmgrInfo   *finfo;
        FunctionCallInfo fcinfo;
        AclResult   aclresult;
@@ -3394,7 +3396,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc,
        fmgr_info(foid, finfo);
        fmgr_info_set_expr(NULL, finfo);
        InitFunctionCallInfoData(*fcinfo, finfo, 2,
-                                InvalidOid, NULL, NULL);
+                                collid, NULL, NULL);
 
        /* left arg */
        scratch.opcode = EEOP_INNER_VAR;
index 417e971ec88db254254b67e3b6511396d01de1cf..14ee8db3f98b7d58396dbe52b19bf60b6bb1b960 100644 (file)
@@ -60,6 +60,7 @@ execTuplesMatchPrepare(TupleDesc desc,
                       int numCols,
                       const AttrNumber *keyColIdx,
                       const Oid *eqOperators,
+                      const Oid *collations,
                       PlanState *parent)
 {
    Oid        *eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));
@@ -75,7 +76,7 @@ execTuplesMatchPrepare(TupleDesc desc,
 
    /* build actual expression */
    expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,
-                                 numCols, keyColIdx, eqFunctions,
+                                 numCols, keyColIdx, eqFunctions, collations,
                                  parent);
 
    return expr;
@@ -155,6 +156,7 @@ BuildTupleHashTableExt(PlanState *parent,
                       int numCols, AttrNumber *keyColIdx,
                       const Oid *eqfuncoids,
                       FmgrInfo *hashfunctions,
+                      Oid *collations,
                       long nbuckets, Size additionalsize,
                       MemoryContext metacxt,
                       MemoryContext tablecxt,
@@ -177,6 +179,7 @@ BuildTupleHashTableExt(PlanState *parent,
    hashtable->numCols = numCols;
    hashtable->keyColIdx = keyColIdx;
    hashtable->tab_hash_funcs = hashfunctions;
+   hashtable->tab_collations = collations;
    hashtable->tablecxt = tablecxt;
    hashtable->tempcxt = tempcxt;
    hashtable->entrysize = entrysize;
@@ -212,7 +215,7 @@ BuildTupleHashTableExt(PlanState *parent,
    hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
                                                    &TTSOpsMinimalTuple, &TTSOpsMinimalTuple,
                                                    numCols,
-                                                   keyColIdx, eqfuncoids,
+                                                   keyColIdx, eqfuncoids, collations,
                                                    NULL);
 
    /*
@@ -240,6 +243,7 @@ BuildTupleHashTable(PlanState *parent,
                    int numCols, AttrNumber *keyColIdx,
                    const Oid *eqfuncoids,
                    FmgrInfo *hashfunctions,
+                   Oid *collations,
                    long nbuckets, Size additionalsize,
                    MemoryContext tablecxt,
                    MemoryContext tempcxt,
@@ -250,6 +254,7 @@ BuildTupleHashTable(PlanState *parent,
                                  numCols, keyColIdx,
                                  eqfuncoids,
                                  hashfunctions,
+                                 collations,
                                  nbuckets, additionalsize,
                                  tablecxt,
                                  tablecxt,
@@ -421,8 +426,9 @@ TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple)
        {
            uint32      hkey;
 
-           hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i],
-                                               attr));
+           hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
+                                                   hashtable->tab_collations[i],
+                                                   attr));
            hashkey ^= hkey;
        }
    }
index 37e96a6013bed2caa4f1cc65b31dec232db0791a..cfad8a38f0f0701c16543cecb72bbc22f8b30438 100644 (file)
@@ -1246,6 +1246,7 @@ get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull)
                greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
                rowHash = compute_partition_hash_value(key->partnatts,
                                                       key->partsupfunc,
+                                                      key->partcollation,
                                                       values, isnull);
 
                part_index = boundinfo->indexes[rowHash % greatest_modulus];
index 95dfc4987de753409cb12ead18d1a409209e6f2a..c539bb5a3f65e5cc65b5e0f1cd0bf15f4ebc391a 100644 (file)
@@ -96,6 +96,8 @@ build_replindex_scan_key(ScanKey skey, Relation rel, Relation idxrel,
                    regop,
                    searchslot->tts_values[mainattno - 1]);
 
+       skey[attoff].sk_collation = idxrel->rd_indcollation[attoff];
+
        /* Check for null value. */
        if (searchslot->tts_isnull[mainattno - 1])
        {
@@ -262,7 +264,8 @@ tuples_equal(TupleTableSlot *slot1, TupleTableSlot *slot2)
                     errmsg("could not identify an equality operator for type %s",
                            format_type_be(att->atttypid))));
 
-       if (!DatumGetBool(FunctionCall2(&typentry->eq_opr_finfo,
+       if (!DatumGetBool(FunctionCall2Coll(&typentry->eq_opr_finfo,
+                                           att->attcollation,
                                        slot1->tts_values[attrnum],
                                        slot2->tts_values[attrnum])))
            return false;
index bae7989a4227a3c3a13d2c4d809c987c1bfb9d3d..47161afbd42da1752cf32761c76be944ee52baec 100644 (file)
@@ -746,15 +746,14 @@ process_ordered_aggregate_single(AggState *aggstate,
 
        /*
         * If DISTINCT mode, and not distinct from prior, skip it.
-        *
-        * Note: we assume equality functions don't care about collation.
         */
        if (isDistinct &&
            haveOldVal &&
            ((oldIsNull && *isNull) ||
             (!oldIsNull && !*isNull &&
              oldAbbrevVal == newAbbrevVal &&
-             DatumGetBool(FunctionCall2(&pertrans->equalfnOne,
+             DatumGetBool(FunctionCall2Coll(&pertrans->equalfnOne,
+                                            pertrans->aggCollation,
                                         oldVal, *newVal)))))
        {
            /* equal to prior, so forget this one */
@@ -1287,6 +1286,7 @@ build_hash_table(AggState *aggstate)
                                                        perhash->hashGrpColIdxHash,
                                                        perhash->eqfuncoids,
                                                        perhash->hashfunctions,
+                                                       perhash->aggnode->grpCollations,
                                                        perhash->aggnode->numGroups,
                                                        additionalsize,
                                                        aggstate->ss.ps.state->es_query_cxt,
@@ -2381,6 +2381,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
                                               length,
                                               aggnode->grpColIdx,
                                               aggnode->grpOperators,
+                                              aggnode->grpCollations,
                                               (PlanState *) aggstate);
                }
 
@@ -2392,6 +2393,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
                                               aggnode->numCols,
                                               aggnode->grpColIdx,
                                               aggnode->grpOperators,
+                                              aggnode->grpCollations,
                                               (PlanState *) aggstate);
                }
            }
@@ -3155,6 +3157,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
                                       numDistinctCols,
                                       pertrans->sortColIdx,
                                       ops,
+                                      pertrans->sortCollations,
                                       &aggstate->ss.ps);
        pfree(ops);
    }
index 655084d7b564858b1a447b79be6905acd9352bf6..05f1d33150f84c6eae53ac9f0a62b1a72089b779 100644 (file)
@@ -212,6 +212,7 @@ ExecInitGroup(Group *node, EState *estate, int eflags)
                               node->numCols,
                               node->grpColIdx,
                               node->grpOperators,
+                              node->grpCollations,
                               &grpstate->ss.ps);
 
    return grpstate;
index 856daf6a7f31ed0d24d3e3056bf559df19c27a39..64eec91f8b8f970f9a8d3f2950b6832341b58f8f 100644 (file)
@@ -425,7 +425,7 @@ ExecEndHash(HashState *node)
  * ----------------------------------------------------------------
  */
 HashJoinTable
-ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls)
+ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, bool keepNulls)
 {
    Hash       *node;
    HashJoinTable hashtable;
@@ -439,6 +439,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls)
    int         nkeys;
    int         i;
    ListCell   *ho;
+   ListCell   *hc;
    MemoryContext oldcxt;
 
    /*
@@ -541,8 +542,9 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls)
    hashtable->inner_hashfunctions =
        (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));
    hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool));
+   hashtable->collations = (Oid *) palloc(nkeys * sizeof(Oid));
    i = 0;
-   foreach(ho, hashOperators)
+   forboth(ho, hashOperators, hc, hashCollations)
    {
        Oid         hashop = lfirst_oid(ho);
        Oid         left_hashfn;
@@ -554,6 +556,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls)
        fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]);
        fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]);
        hashtable->hashStrict[i] = op_strict(hashop);
+       hashtable->collations[i] = lfirst_oid(hc);
        i++;
    }
 
@@ -1847,7 +1850,7 @@ ExecHashGetHashValue(HashJoinTable hashtable,
            /* Compute the hash function */
            uint32      hkey;
 
-           hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i], keyval));
+           hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], hashtable->collations[i], keyval));
            hashkey ^= hkey;
        }
 
@@ -2303,8 +2306,9 @@ ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse)
            uint32      hashvalue;
            int         bucket;
 
-           hashvalue = DatumGetUInt32(FunctionCall1(&hashfunctions[0],
-                                                    sslot.values[i]));
+           hashvalue = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[0],
+                                                        hashtable->collations[0],
+                                                        sslot.values[i]));
 
            /*
             * While we have not hit a hole in the hashtable and have not hit
index 209870886400645312e74c33ae6dbecb5149a3bb..aa43296e26cfe30bee27aa621cae9b44b92fedcb 100644 (file)
@@ -278,6 +278,7 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
                 */
                hashtable = ExecHashTableCreate(hashNode,
                                                node->hj_HashOperators,
+                                               node->hj_Collations,
                                                HJ_FILL_INNER(node));
                node->hj_HashTable = hashtable;
 
@@ -603,6 +604,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
    List       *rclauses;
    List       *rhclauses;
    List       *hoperators;
+   List       *hcollations;
    TupleDesc   outerDesc,
                innerDesc;
    ListCell   *l;
@@ -738,6 +740,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
    rclauses = NIL;
    rhclauses = NIL;
    hoperators = NIL;
+   hcollations = NIL;
    foreach(l, node->hashclauses)
    {
        OpExpr     *hclause = lfirst_node(OpExpr, l);
@@ -749,10 +752,12 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
        rhclauses = lappend(rhclauses, ExecInitExpr(lsecond(hclause->args),
                                                   innerPlanState(hjstate)));
        hoperators = lappend_oid(hoperators, hclause->opno);
+       hcollations = lappend_oid(hcollations, hclause->inputcollid);
    }
    hjstate->hj_OuterHashKeys = lclauses;
    hjstate->hj_InnerHashKeys = rclauses;
    hjstate->hj_HashOperators = hoperators;
+   hjstate->hj_Collations = hcollations;
    /* child Hash node needs to evaluate inner hash keys, too */
    ((HashState *) innerPlanState(hjstate))->hashkeys = rhclauses;
 
index 9b74ed3208c5f660132ca663b7f7a225db87f54d..9c5eed7def3971c537d57be575802438ee192cda 100644 (file)
@@ -43,6 +43,7 @@ build_hash_table(RecursiveUnionState *rustate)
                                                node->dupColIdx,
                                                rustate->eqfuncoids,
                                                rustate->hashfunctions,
+                                               node->dupCollations,
                                                node->numGroups,
                                                0,
                                                rustate->ps.state->es_query_cxt,
index 26aeaee08389b347aa67a76cd479b86c616e3d82..044246aa09ff34f109192569b922daf3b956094c 100644 (file)
@@ -132,6 +132,7 @@ build_hash_table(SetOpState *setopstate)
                                                   node->dupColIdx,
                                                   setopstate->eqfuncoids,
                                                   setopstate->hashfunctions,
+                                                  node->dupCollations,
                                                   node->numGroups,
                                                   0,
                                                   setopstate->ps.state->es_query_cxt,
@@ -554,6 +555,7 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
                                   node->numCols,
                                   node->dupColIdx,
                                   node->dupOperators,
+                                  node->dupCollations,
                                   &setopstate->ps);
 
    if (node->strategy == SETOP_HASHED)
index d7d076758c323a571420077d1f381db4ba02f9a7..749b4eced34c4aea222603b1c0fa8ab51ce1f6f8 100644 (file)
@@ -514,6 +514,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
                                                 node->keyColIdx,
                                                 node->tab_eq_funcoids,
                                                 node->tab_hash_funcs,
+                                                node->tab_collations,
                                                 nbuckets,
                                                 0,
                                                 node->planstate->state->es_query_cxt,
@@ -541,6 +542,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
                                                     node->keyColIdx,
                                                     node->tab_eq_funcoids,
                                                     node->tab_hash_funcs,
+                                                    node->tab_collations,
                                                     nbuckets,
                                                     0,
                                                     node->planstate->state->es_query_cxt,
@@ -642,6 +644,7 @@ execTuplesUnequal(TupleTableSlot *slot1,
                  int numCols,
                  AttrNumber *matchColIdx,
                  FmgrInfo *eqfunctions,
+                 const Oid *collations,
                  MemoryContext evalContext)
 {
    MemoryContext oldContext;
@@ -679,8 +682,8 @@ execTuplesUnequal(TupleTableSlot *slot1,
            continue;           /* can't prove anything here */
 
        /* Apply the type-specific equality function */
-
-       if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
+       if (!DatumGetBool(FunctionCall2Coll(&eqfunctions[i],
+                                           collations[i],
                                        attr1, attr2)))
        {
            result = true;      /* they are unequal */
@@ -722,6 +725,7 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot,
        if (!execTuplesUnequal(slot, hashtable->tableslot,
                               numCols, keyColIdx,
                               eqfunctions,
+                              hashtable->tab_collations,
                               hashtable->tempcxt))
        {
            TermTupleHashIterator(&hashiter);
@@ -817,6 +821,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
    sstate->tab_eq_funcoids = NULL;
    sstate->tab_hash_funcs = NULL;
    sstate->tab_eq_funcs = NULL;
+   sstate->tab_collations = NULL;
    sstate->lhs_hash_funcs = NULL;
    sstate->cur_eq_funcs = NULL;
 
@@ -915,6 +920,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
        sstate->tab_eq_funcoids = (Oid *) palloc(ncols * sizeof(Oid));
        sstate->tab_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
        sstate->tab_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
+       sstate->tab_collations = (Oid *) palloc(ncols * sizeof(Oid));
        sstate->lhs_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
        sstate->cur_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
        i = 1;
@@ -965,6 +971,9 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
            fmgr_info(left_hashfn, &sstate->lhs_hash_funcs[i - 1]);
            fmgr_info(right_hashfn, &sstate->tab_hash_funcs[i - 1]);
 
+           /* Set collation */
+           sstate->tab_collations[i - 1] = opexpr->inputcollid;
+
            i++;
        }
 
@@ -1001,6 +1010,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
                                                     ncols,
                                                     sstate->keyColIdx,
                                                     sstate->tab_eq_funcoids,
+                                                    sstate->tab_collations,
                                                     parent);
 
    }
index ad7039937d0650370a1790929690ad1e736971a8..c553f150b8d25febdb8d73d362b3e0ad1223aa6f 100644 (file)
@@ -152,6 +152,7 @@ ExecInitUnique(Unique *node, EState *estate, int eflags)
                               node->numCols,
                               node->uniqColIdx,
                               node->uniqOperators,
+                              node->uniqCollations,
                               &uniquestate->ps);
 
    return uniquestate;
index 157ac042b82a4ba648291ac8b4f677231ad24181..b090828c01ee59c5cf35d9e499fcc6af847b8766 100644 (file)
@@ -2370,6 +2370,7 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags)
                                   node->partNumCols,
                                   node->partColIdx,
                                   node->partOperators,
+                                  node->partCollations,
                                   &winstate->ss.ps);
 
    if (node->ordNumCols > 0)
@@ -2378,6 +2379,7 @@ ExecInitWindowAgg(WindowAgg *node, EState *estate, int eflags)
                                   node->ordNumCols,
                                   node->ordColIdx,
                                   node->ordOperators,
+                                  node->ordCollations,
                                   &winstate->ss.ps);
 
    /*
index c68bd7bcf73c2d5b1378ef49071255c31dd53e84..1ea6b845616f1bc7a8994a3afc58f0168c6c1150 100644 (file)
@@ -297,6 +297,7 @@ _copyRecursiveUnion(const RecursiveUnion *from)
    {
        COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber));
        COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid));
+       COPY_POINTER_FIELD(dupCollations, from->numCols * sizeof(Oid));
    }
    COPY_SCALAR_FIELD(numGroups);
 
@@ -956,6 +957,7 @@ _copyGroup(const Group *from)
    COPY_SCALAR_FIELD(numCols);
    COPY_POINTER_FIELD(grpColIdx, from->numCols * sizeof(AttrNumber));
    COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid));
+   COPY_POINTER_FIELD(grpCollations, from->numCols * sizeof(Oid));
 
    return newnode;
 }
@@ -977,6 +979,7 @@ _copyAgg(const Agg *from)
    {
        COPY_POINTER_FIELD(grpColIdx, from->numCols * sizeof(AttrNumber));
        COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid));
+       COPY_POINTER_FIELD(grpCollations, from->numCols * sizeof(Oid));
    }
    COPY_SCALAR_FIELD(numGroups);
    COPY_BITMAPSET_FIELD(aggParams);
@@ -1002,12 +1005,14 @@ _copyWindowAgg(const WindowAgg *from)
    {
        COPY_POINTER_FIELD(partColIdx, from->partNumCols * sizeof(AttrNumber));
        COPY_POINTER_FIELD(partOperators, from->partNumCols * sizeof(Oid));
+       COPY_POINTER_FIELD(partCollations, from->partNumCols * sizeof(Oid));
    }
    COPY_SCALAR_FIELD(ordNumCols);
    if (from->ordNumCols > 0)
    {
        COPY_POINTER_FIELD(ordColIdx, from->ordNumCols * sizeof(AttrNumber));
        COPY_POINTER_FIELD(ordOperators, from->ordNumCols * sizeof(Oid));
+       COPY_POINTER_FIELD(ordCollations, from->ordNumCols * sizeof(Oid));
    }
    COPY_SCALAR_FIELD(frameOptions);
    COPY_NODE_FIELD(startOffset);
@@ -1040,6 +1045,7 @@ _copyUnique(const Unique *from)
    COPY_SCALAR_FIELD(numCols);
    COPY_POINTER_FIELD(uniqColIdx, from->numCols * sizeof(AttrNumber));
    COPY_POINTER_FIELD(uniqOperators, from->numCols * sizeof(Oid));
+   COPY_POINTER_FIELD(uniqCollations, from->numCols * sizeof(Oid));
 
    return newnode;
 }
@@ -1089,6 +1095,7 @@ _copySetOp(const SetOp *from)
    COPY_SCALAR_FIELD(numCols);
    COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber));
    COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid));
+   COPY_POINTER_FIELD(dupCollations, from->numCols * sizeof(Oid));
    COPY_SCALAR_FIELD(flagColIdx);
    COPY_SCALAR_FIELD(firstFlag);
    COPY_SCALAR_FIELD(numGroups);
index 69179a07c375b252d4a35c854174521177ed1776..910a738c205de69d869d122c25e642af20947b30 100644 (file)
@@ -463,6 +463,7 @@ _outRecursiveUnion(StringInfo str, const RecursiveUnion *node)
    WRITE_INT_FIELD(numCols);
    WRITE_ATTRNUMBER_ARRAY(dupColIdx, node->numCols);
    WRITE_OID_ARRAY(dupOperators, node->numCols);
+   WRITE_OID_ARRAY(dupCollations, node->numCols);
    WRITE_LONG_FIELD(numGroups);
 }
 
@@ -774,6 +775,7 @@ _outAgg(StringInfo str, const Agg *node)
    WRITE_INT_FIELD(numCols);
    WRITE_ATTRNUMBER_ARRAY(grpColIdx, node->numCols);
    WRITE_OID_ARRAY(grpOperators, node->numCols);
+   WRITE_OID_ARRAY(grpCollations, node->numCols);
    WRITE_LONG_FIELD(numGroups);
    WRITE_BITMAPSET_FIELD(aggParams);
    WRITE_NODE_FIELD(groupingSets);
@@ -791,9 +793,11 @@ _outWindowAgg(StringInfo str, const WindowAgg *node)
    WRITE_INT_FIELD(partNumCols);
    WRITE_ATTRNUMBER_ARRAY(partColIdx, node->partNumCols);
    WRITE_OID_ARRAY(partOperators, node->partNumCols);
+   WRITE_OID_ARRAY(partCollations, node->partNumCols);
    WRITE_INT_FIELD(ordNumCols);
    WRITE_ATTRNUMBER_ARRAY(ordColIdx, node->ordNumCols);
    WRITE_OID_ARRAY(ordOperators, node->ordNumCols);
+   WRITE_OID_ARRAY(ordCollations, node->ordNumCols);
    WRITE_INT_FIELD(frameOptions);
    WRITE_NODE_FIELD(startOffset);
    WRITE_NODE_FIELD(endOffset);
@@ -814,6 +818,7 @@ _outGroup(StringInfo str, const Group *node)
    WRITE_INT_FIELD(numCols);
    WRITE_ATTRNUMBER_ARRAY(grpColIdx, node->numCols);
    WRITE_OID_ARRAY(grpOperators, node->numCols);
+   WRITE_OID_ARRAY(grpCollations, node->numCols);
 }
 
 static void
@@ -848,6 +853,7 @@ _outUnique(StringInfo str, const Unique *node)
    WRITE_INT_FIELD(numCols);
    WRITE_ATTRNUMBER_ARRAY(uniqColIdx, node->numCols);
    WRITE_OID_ARRAY(uniqOperators, node->numCols);
+   WRITE_OID_ARRAY(uniqCollations, node->numCols);
 }
 
 static void
@@ -875,6 +881,7 @@ _outSetOp(StringInfo str, const SetOp *node)
    WRITE_INT_FIELD(numCols);
    WRITE_ATTRNUMBER_ARRAY(dupColIdx, node->numCols);
    WRITE_OID_ARRAY(dupOperators, node->numCols);
+   WRITE_OID_ARRAY(dupCollations, node->numCols);
    WRITE_INT_FIELD(flagColIdx);
    WRITE_INT_FIELD(firstFlag);
    WRITE_LONG_FIELD(numGroups);
index 4b845b1bb71cb0ed833dfcef7639b8a76ac20b75..eff98febf1fa80748acf50e1b8817b1ddbaee9ab 100644 (file)
@@ -1677,6 +1677,7 @@ _readRecursiveUnion(void)
    READ_INT_FIELD(numCols);
    READ_ATTRNUMBER_ARRAY(dupColIdx, local_node->numCols);
    READ_OID_ARRAY(dupOperators, local_node->numCols);
+   READ_OID_ARRAY(dupCollations, local_node->numCols);
    READ_LONG_FIELD(numGroups);
 
    READ_DONE();
@@ -2143,6 +2144,7 @@ _readGroup(void)
    READ_INT_FIELD(numCols);
    READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols);
    READ_OID_ARRAY(grpOperators, local_node->numCols);
+   READ_OID_ARRAY(grpCollations, local_node->numCols);
 
    READ_DONE();
 }
@@ -2162,6 +2164,7 @@ _readAgg(void)
    READ_INT_FIELD(numCols);
    READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols);
    READ_OID_ARRAY(grpOperators, local_node->numCols);
+   READ_OID_ARRAY(grpCollations, local_node->numCols);
    READ_LONG_FIELD(numGroups);
    READ_BITMAPSET_FIELD(aggParams);
    READ_NODE_FIELD(groupingSets);
@@ -2184,9 +2187,11 @@ _readWindowAgg(void)
    READ_INT_FIELD(partNumCols);
    READ_ATTRNUMBER_ARRAY(partColIdx, local_node->partNumCols);
    READ_OID_ARRAY(partOperators, local_node->partNumCols);
+   READ_OID_ARRAY(partCollations, local_node->partNumCols);
    READ_INT_FIELD(ordNumCols);
    READ_ATTRNUMBER_ARRAY(ordColIdx, local_node->ordNumCols);
    READ_OID_ARRAY(ordOperators, local_node->ordNumCols);
+   READ_OID_ARRAY(ordCollations, local_node->ordNumCols);
    READ_INT_FIELD(frameOptions);
    READ_NODE_FIELD(startOffset);
    READ_NODE_FIELD(endOffset);
@@ -2212,6 +2217,7 @@ _readUnique(void)
    READ_INT_FIELD(numCols);
    READ_ATTRNUMBER_ARRAY(uniqColIdx, local_node->numCols);
    READ_OID_ARRAY(uniqOperators, local_node->numCols);
+   READ_OID_ARRAY(uniqCollations, local_node->numCols);
 
    READ_DONE();
 }
@@ -2290,6 +2296,7 @@ _readSetOp(void)
    READ_INT_FIELD(numCols);
    READ_ATTRNUMBER_ARRAY(dupColIdx, local_node->numCols);
    READ_OID_ARRAY(dupOperators, local_node->numCols);
+   READ_OID_ARRAY(dupCollations, local_node->numCols);
    READ_INT_FIELD(flagColIdx);
    READ_INT_FIELD(firstFlag);
    READ_LONG_FIELD(numGroups);
index 9fbe5b2a5fbedc2acd5f62ca1263cdc250d91ea7..93c56c657ce2e10aefd81ebbe4fdab453c3a7002 100644 (file)
@@ -260,14 +260,14 @@ static Sort *make_sort_from_groupcols(List *groupcls,
                         Plan *lefttree);
 static Material *make_material(Plan *lefttree);
 static WindowAgg *make_windowagg(List *tlist, Index winref,
-              int partNumCols, AttrNumber *partColIdx, Oid *partOperators,
-              int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators,
+              int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations,
+              int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations,
               int frameOptions, Node *startOffset, Node *endOffset,
               Oid startInRangeFunc, Oid endInRangeFunc,
               Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst,
               Plan *lefttree);
 static Group *make_group(List *tlist, List *qual, int numGroupCols,
-          AttrNumber *grpColIdx, Oid *grpOperators,
+          AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
           Plan *lefttree);
 static Unique *make_unique_from_sortclauses(Plan *lefttree, List *distinctList);
 static Unique *make_unique_from_pathkeys(Plan *lefttree,
@@ -1387,6 +1387,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags)
    bool        newitems;
    int         numGroupCols;
    AttrNumber *groupColIdx;
+   Oid        *groupCollations;
    int         groupColPos;
    ListCell   *l;
 
@@ -1453,6 +1454,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags)
    newtlist = subplan->targetlist;
    numGroupCols = list_length(uniq_exprs);
    groupColIdx = (AttrNumber *) palloc(numGroupCols * sizeof(AttrNumber));
+   groupCollations = (Oid *) palloc(numGroupCols * sizeof(Oid));
 
    groupColPos = 0;
    foreach(l, uniq_exprs)
@@ -1463,7 +1465,9 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags)
        tle = tlist_member(uniqexpr, newtlist);
        if (!tle)               /* shouldn't happen */
            elog(ERROR, "failed to find unique expression in subplan tlist");
-       groupColIdx[groupColPos++] = tle->resno;
+       groupColIdx[groupColPos] = tle->resno;
+       groupCollations[groupColPos] = exprCollation((Node *) tle->expr);
+       groupColPos++;
    }
 
    if (best_path->umethod == UNIQUE_PATH_HASH)
@@ -1501,6 +1505,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags)
                                 numGroupCols,
                                 groupColIdx,
                                 groupOperators,
+                                groupCollations,
                                 NIL,
                                 NIL,
                                 best_path->path.rows,
@@ -1883,6 +1888,8 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path)
                      extract_grouping_cols(best_path->groupClause,
                                            subplan->targetlist),
                      extract_grouping_ops(best_path->groupClause),
+                     extract_grouping_collations(best_path->groupClause,
+                                                 subplan->targetlist),
                      subplan);
 
    copy_generic_path_info(&plan->plan, (Path *) best_path);
@@ -1949,6 +1956,8 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path)
                    extract_grouping_cols(best_path->groupClause,
                                          subplan->targetlist),
                    extract_grouping_ops(best_path->groupClause),
+                   extract_grouping_collations(best_path->groupClause,
+                                               subplan->targetlist),
                    NIL,
                    NIL,
                    best_path->numGroups,
@@ -2110,6 +2119,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
                                         list_length((List *) linitial(rollup->gsets)),
                                         new_grpColIdx,
                                         extract_grouping_ops(rollup->groupClause),
+                                        extract_grouping_collations(rollup->groupClause, subplan->targetlist),
                                         rollup->gsets,
                                         NIL,
                                         rollup->numGroups,
@@ -2147,6 +2157,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
                        numGroupCols,
                        top_grpColIdx,
                        extract_grouping_ops(rollup->groupClause),
+                       extract_grouping_collations(rollup->groupClause, subplan->targetlist),
                        rollup->gsets,
                        chain,
                        rollup->numGroups,
@@ -2246,9 +2257,11 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path)
    int         partNumCols;
    AttrNumber *partColIdx;
    Oid        *partOperators;
+   Oid        *partCollations;
    int         ordNumCols;
    AttrNumber *ordColIdx;
    Oid        *ordOperators;
+   Oid        *ordCollations;
    ListCell   *lc;
 
    /*
@@ -2270,6 +2283,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path)
     */
    partColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numPart);
    partOperators = (Oid *) palloc(sizeof(Oid) * numPart);
+   partCollations = (Oid *) palloc(sizeof(Oid) * numPart);
 
    partNumCols = 0;
    foreach(lc, wc->partitionClause)
@@ -2280,11 +2294,13 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path)
        Assert(OidIsValid(sgc->eqop));
        partColIdx[partNumCols] = tle->resno;
        partOperators[partNumCols] = sgc->eqop;
+       partCollations[partNumCols] = exprCollation((Node *) tle->expr);
        partNumCols++;
    }
 
    ordColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numOrder);
    ordOperators = (Oid *) palloc(sizeof(Oid) * numOrder);
+   ordCollations = (Oid *) palloc(sizeof(Oid) * numOrder);
 
    ordNumCols = 0;
    foreach(lc, wc->orderClause)
@@ -2295,6 +2311,7 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path)
        Assert(OidIsValid(sgc->eqop));
        ordColIdx[ordNumCols] = tle->resno;
        ordOperators[ordNumCols] = sgc->eqop;
+       ordCollations[ordNumCols] = exprCollation((Node *) tle->expr);
        ordNumCols++;
    }
 
@@ -2304,9 +2321,11 @@ create_windowagg_plan(PlannerInfo *root, WindowAggPath *best_path)
                          partNumCols,
                          partColIdx,
                          partOperators,
+                         partCollations,
                          ordNumCols,
                          ordColIdx,
                          ordOperators,
+                         ordCollations,
                          wc->frameOptions,
                          wc->startOffset,
                          wc->endOffset,
@@ -5326,10 +5345,12 @@ make_recursive_union(List *tlist,
        int         keyno = 0;
        AttrNumber *dupColIdx;
        Oid        *dupOperators;
+       Oid        *dupCollations;
        ListCell   *slitem;
 
        dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
        dupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+       dupCollations = (Oid *) palloc(sizeof(Oid) * numCols);
 
        foreach(slitem, distinctList)
        {
@@ -5339,11 +5360,13 @@ make_recursive_union(List *tlist,
 
            dupColIdx[keyno] = tle->resno;
            dupOperators[keyno] = sortcl->eqop;
+           dupCollations[keyno] = exprCollation((Node *) tle->expr);
            Assert(OidIsValid(dupOperators[keyno]));
            keyno++;
        }
        node->dupColIdx = dupColIdx;
        node->dupOperators = dupOperators;
+       node->dupCollations = dupCollations;
    }
    node->numGroups = numGroups;
 
@@ -6015,7 +6038,7 @@ materialize_finished_plan(Plan *subplan)
 Agg *
 make_agg(List *tlist, List *qual,
         AggStrategy aggstrategy, AggSplit aggsplit,
-        int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators,
+        int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
         List *groupingSets, List *chain,
         double dNumGroups, Plan *lefttree)
 {
@@ -6031,6 +6054,7 @@ make_agg(List *tlist, List *qual,
    node->numCols = numGroupCols;
    node->grpColIdx = grpColIdx;
    node->grpOperators = grpOperators;
+   node->grpCollations = grpCollations;
    node->numGroups = numGroups;
    node->aggParams = NULL;     /* SS_finalize_plan() will fill this */
    node->groupingSets = groupingSets;
@@ -6046,8 +6070,8 @@ make_agg(List *tlist, List *qual,
 
 static WindowAgg *
 make_windowagg(List *tlist, Index winref,
-              int partNumCols, AttrNumber *partColIdx, Oid *partOperators,
-              int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators,
+              int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations,
+              int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations,
               int frameOptions, Node *startOffset, Node *endOffset,
               Oid startInRangeFunc, Oid endInRangeFunc,
               Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst,
@@ -6060,9 +6084,11 @@ make_windowagg(List *tlist, Index winref,
    node->partNumCols = partNumCols;
    node->partColIdx = partColIdx;
    node->partOperators = partOperators;
+   node->partCollations = partCollations;
    node->ordNumCols = ordNumCols;
    node->ordColIdx = ordColIdx;
    node->ordOperators = ordOperators;
+   node->ordCollations = ordCollations;
    node->frameOptions = frameOptions;
    node->startOffset = startOffset;
    node->endOffset = endOffset;
@@ -6087,6 +6113,7 @@ make_group(List *tlist,
           int numGroupCols,
           AttrNumber *grpColIdx,
           Oid *grpOperators,
+          Oid *grpCollations,
           Plan *lefttree)
 {
    Group      *node = makeNode(Group);
@@ -6095,6 +6122,7 @@ make_group(List *tlist,
    node->numCols = numGroupCols;
    node->grpColIdx = grpColIdx;
    node->grpOperators = grpOperators;
+   node->grpCollations = grpCollations;
 
    plan->qual = qual;
    plan->targetlist = tlist;
@@ -6118,6 +6146,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
    int         keyno = 0;
    AttrNumber *uniqColIdx;
    Oid        *uniqOperators;
+   Oid        *uniqCollations;
    ListCell   *slitem;
 
    plan->targetlist = lefttree->targetlist;
@@ -6132,6 +6161,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
    Assert(numCols > 0);
    uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
    uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+   uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols);
 
    foreach(slitem, distinctList)
    {
@@ -6140,6 +6170,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
 
        uniqColIdx[keyno] = tle->resno;
        uniqOperators[keyno] = sortcl->eqop;
+       uniqCollations[keyno] = exprCollation((Node *) tle->expr);
        Assert(OidIsValid(uniqOperators[keyno]));
        keyno++;
    }
@@ -6147,6 +6178,7 @@ make_unique_from_sortclauses(Plan *lefttree, List *distinctList)
    node->numCols = numCols;
    node->uniqColIdx = uniqColIdx;
    node->uniqOperators = uniqOperators;
+   node->uniqCollations = uniqCollations;
 
    return node;
 }
@@ -6162,6 +6194,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols)
    int         keyno = 0;
    AttrNumber *uniqColIdx;
    Oid        *uniqOperators;
+   Oid        *uniqCollations;
    ListCell   *lc;
 
    plan->targetlist = lefttree->targetlist;
@@ -6177,6 +6210,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols)
    Assert(numCols >= 0 && numCols <= list_length(pathkeys));
    uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
    uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+   uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols);
 
    foreach(lc, pathkeys)
    {
@@ -6245,6 +6279,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols)
 
        uniqColIdx[keyno] = tle->resno;
        uniqOperators[keyno] = eqop;
+       uniqCollations[keyno] = ec->ec_collation;
 
        keyno++;
    }
@@ -6252,6 +6287,7 @@ make_unique_from_pathkeys(Plan *lefttree, List *pathkeys, int numCols)
    node->numCols = numCols;
    node->uniqColIdx = uniqColIdx;
    node->uniqOperators = uniqOperators;
+   node->uniqCollations = uniqCollations;
 
    return node;
 }
@@ -6296,6 +6332,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
    int         keyno = 0;
    AttrNumber *dupColIdx;
    Oid        *dupOperators;
+   Oid        *dupCollations;
    ListCell   *slitem;
 
    plan->targetlist = lefttree->targetlist;
@@ -6309,6 +6346,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
     */
    dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
    dupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+   dupCollations = (Oid *) palloc(sizeof(Oid) * numCols);
 
    foreach(slitem, distinctList)
    {
@@ -6317,6 +6355,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
 
        dupColIdx[keyno] = tle->resno;
        dupOperators[keyno] = sortcl->eqop;
+       dupCollations[keyno] = exprCollation((Node *) tle->expr);
        Assert(OidIsValid(dupOperators[keyno]));
        keyno++;
    }
@@ -6326,6 +6365,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
    node->numCols = numCols;
    node->dupColIdx = dupColIdx;
    node->dupOperators = dupOperators;
+   node->dupCollations = dupCollations;
    node->flagColIdx = flagColIdx;
    node->firstFlag = firstFlag;
    node->numGroups = numGroups;
index 14d1c67a940c837fdb1b0d01f0098884b39959c4..bb3b7969f26e4a1f7627f14a1622e293575f2376 100644 (file)
@@ -503,6 +503,31 @@ extract_grouping_ops(List *groupClause)
    return groupOperators;
 }
 
+/*
+ * extract_grouping_collations - make an array of the grouping column collations
+ *     for a SortGroupClause list
+ */
+Oid *
+extract_grouping_collations(List *groupClause, List *tlist)
+{
+   int         numCols = list_length(groupClause);
+   int         colno = 0;
+   Oid        *grpCollations;
+   ListCell   *glitem;
+
+   grpCollations = (Oid *) palloc(sizeof(Oid) * numCols);
+
+   foreach(glitem, groupClause)
+   {
+       SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
+       TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist);
+
+       grpCollations[colno++] = exprCollation((Node *) tle->expr);
+   }
+
+   return grpCollations;
+}
+
 /*
  * extract_grouping_cols - make an array of the grouping column resnos
  *     for a SortGroupClause list
index 5b897d50eed7ca254916bf7a7cb389d84968b36b..803c23aaf50eda1a6b1f77b59f3b0a68c6a27325 100644 (file)
@@ -2657,7 +2657,7 @@ get_range_nulltest(PartitionKey key)
  * Compute the hash value for given partition key values.
  */
 uint64
-compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc,
+compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation,
                             Datum *values, bool *isnull)
 {
    int         i;
@@ -2678,7 +2678,7 @@ compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc,
             * datatype-specific hash functions of each partition key
             * attribute.
             */
-           hash = FunctionCall2(&partsupfunc[i], values[i], seed);
+           hash = FunctionCall2Coll(&partsupfunc[i], partcollation[i], values[i], seed);
 
            /* Form a single 64-bit hash value */
            rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
index b5c0889935b26b449ec65f7f3df8a8d52b78fe3d..31e0164ea99af4c33726ef9d1cddbad0bb86e326 100644 (file)
@@ -2159,6 +2159,7 @@ get_matching_hash_bounds(PartitionPruneContext *context,
    int         i;
    uint64      rowHash;
    int         greatest_modulus;
+   Oid        *partcollation = context->partcollation;
 
    Assert(context->strategy == PARTITION_STRATEGY_HASH);
 
@@ -2179,7 +2180,7 @@ get_matching_hash_bounds(PartitionPruneContext *context,
            isnull[i] = bms_is_member(i, nullkeys);
 
        greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
-