typedef struct BloomState
{
FmgrInfo hashFn[INDEX_MAX_KEYS];
+ Oid collations[INDEX_MAX_KEYS];
BloomOptions opts; /* copy of options on index's metapage */
int32 nColumns;
fmgr_info_copy(&(state->hashFn[i]),
index_getprocinfo(index, i + 1, BLOOM_HASH_PROC),
CurrentMemoryContext);
+ state->collations[i] = index->rd_indcollation[i];
}
/* Initialize amcache if needed with options from metapage */
* different columns will be mapped into different bits because of step
* above
*/
- hashVal = DatumGetInt32(FunctionCall1(&state->hashFn[attno], value));
+ hashVal = DatumGetInt32(FunctionCall1Coll(&state->hashFn[attno], state->collations[attno], value));
mySrand(hashVal ^ myRand());
for (j = 0; j < state->opts.bitSize[attno]; j++)
default, <literal>c</literal> = libc, <literal>i</literal> = icu</entry>
</row>
+ <row>
+ <entry><structfield>collisdeterministic</structfield></entry>
+ <entry><type>bool</type></entry>
+ <entry></entry>
+ <entry>Is the collation deterministic?</entry>
+ </row>
+
<row>
<entry><structfield>collencoding</structfield></entry>
<entry><type>int4</type></entry>
<para>
Note that while this system allows creating collations that <quote>ignore
- case</quote> or <quote>ignore accents</quote> or similar (using
- the <literal>ks</literal> key), PostgreSQL does not at the moment allow
- such collations to act in a truly case- or accent-insensitive manner. Any
- strings that compare equal according to the collation but are not
- byte-wise equal will be sorted according to their byte values.
+ case</quote> or <quote>ignore accents</quote> or similar (using the
+ <literal>ks</literal> key), in order for such collations to act in a
+ truly case- or accent-insensitive manner, they also need to be declared as not
+ <firstterm>deterministic</firstterm> in <command>CREATE COLLATION</command>;
+ see <xref linkend="collation-nondeterministic"/>.
+ Otherwise, any strings that compare equal according to the collation but
+ are not byte-wise equal will be sorted according to their byte values.
</para>
<note>
</para>
</sect4>
</sect3>
+
+ <sect3 id="collation-nondeterministic">
+ <title>Nondeterminstic Collations</title>
+
+ <para>
+ A collation is either <firstterm>deterministic</firstterm> or
+ <firstterm>nondeterministic</firstterm>. A deterministic collation uses
+ deterministic comparisons, which means that it considers strings to be
+ equal only if they consist of the same byte sequence. Nondeterministic
+ comparison may determine strings to be equal even if they consist of
+ different bytes. Typical situations include case-insensitive comparison,
+ accent-insensitive comparison, as well as comparion of strings in
+ different Unicode normal forms. It is up to the collation provider to
+ actually implement such insensitive comparisons; the deterministic flag
+ only determines whether ties are to be broken using bytewise comparison.
+ See also <ulink url="https://unicode.org/reports/tr10">Unicode Technical
+ Standard 10</ulink> for more information on the terminology.
+ </para>
+
+ <para>
+ To create a nondeterministic collation, specify the property
+ <literal>deterministic = false</literal> to <command>CREATE
+ COLLATION</command>, for example:
+<programlisting>
+CREATE COLLATION ndcoll (provider = icu, locale = 'und', deterministic = false);
+</programlisting>
+ This example would use the standard Unicode collation in a
+ nondeterministic way. In particular, this would allow strings in
+ different normal forms to be compared correctly. More interesting
+ examples make use of the ICU customization facilities explained above.
+ For example:
+<programlisting>
+CREATE COLLATION case_insensitive (provider = icu, locale = 'und-u-ks-level2', deterministic = false);
+CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-true', deterministic = false);
+</programlisting>
+ </para>
+
+ <para>
+ All standard and predefined collations are deterministic, all
+ user-defined collations are deterministic by default. While
+ nondeterministic collations give a more <quote>correct</quote> behavior,
+ especially when considering the full power of Unicode and its many
+ special cases, they also have some drawbacks. Foremost, their use leads
+ to a performance penalty. Also, certain operations are not possible with
+ nondeterministic collations, such as pattern matching operations.
+ Therefore, they should be used only in cases where they are specifically
+ wanted.
+ </para>
+ </sect3>
</sect2>
</sect1>
exactly like <type>text</type>.
</para>
+ <tip>
+ <para>
+ Consider using <firstterm>nondeterministic collations</firstterm> (see
+ <xref linkend="collation-nondeterministic"/>) instead of this module. They
+ can be used for case-insensitive comparisons, accent-insensitive
+ comparisons, and other combinations, and they handle more Unicode special
+ cases correctly.
+ </para>
+ </tip>
+
<sect2>
<title>Rationale</title>
will be invoked instead.
</para>
</listitem>
+
+ <listitem>
+ <para>
+ The approach of lower-casing strings for comparison does not handle some
+ Unicode special cases correctly, for example when one upper-case letter
+ has two lower-case letter equivalents. Unicode distinguishes between
+ <firstterm>case mapping</firstterm> and <firstterm>case
+ folding</firstterm> for this reason. Use nondeterministic collations
+ instead of <type>citext</type> to handle that correctly.
+ </para>
+ </listitem>
</itemizedlist>
</sect2>
</para>
</caution>
+ <para>
+ The pattern matching operators of all three kinds do not support
+ nondeterministic collations. If required, apply a different collation to
+ the expression to work around this limitation.
+ </para>
+
<sect2 id="functions-like">
<title><function>LIKE</function></title>
[ LC_COLLATE = <replaceable>lc_collate</replaceable>, ]
[ LC_CTYPE = <replaceable>lc_ctype</replaceable>, ]
[ PROVIDER = <replaceable>provider</replaceable>, ]
+ [ DETERMINISTIC = <replaceable>boolean</replaceable>, ]
[ VERSION = <replaceable>version</replaceable> ]
)
CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> FROM <replaceable>existing_collation</replaceable>
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><literal>DETERMINISTIC</literal></term>
+
+ <listitem>
+ <para>
+ Specifies whether the collation should use deterministic comparisons.
+ The default is true. A deterministic comparison considers strings that
+ are not byte-wise equal to be unequal even if they are considered
+ logically equal by the comparison. PostgreSQL breaks ties using a
+ byte-wise comparison. Comparison that is not deterministic can make the
+ collation be, say, case- or accent-insensitive. For that, you need to
+ choose an appropriate <literal>LC_COLLATE</literal> setting
+ <emphasis>and</emphasis> set the collation to not deterministic here.
+ </para>
+
+ <para>
+ Nondeterministic collations are only supported with the ICU provider.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry>
<term><replaceable>version</replaceable></term>
#include "postgres.h"
#include "access/hash.h"
+#include "catalog/pg_collation.h"
#include "utils/builtins.h"
#include "utils/hashutils.h"
+#include "utils/pg_locale.h"
/*
* Datatype-specific hash functions.
hashtext(PG_FUNCTION_ARGS)
{
text *key = PG_GETARG_TEXT_PP(0);
+ Oid collid = PG_GET_COLLATION();
+ pg_locale_t mylocale = 0;
Datum result;
- /*
- * Note: this is currently identical in behavior to hashvarlena, but keep
- * it as a separate function in case we someday want to do something
- * different in non-C locales. (See also hashbpchar, if so.)
- */
- result = hash_any((unsigned char *) VARDATA_ANY(key),
- VARSIZE_ANY_EXHDR(key));
+ if (!collid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for string hashing"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+
+ if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (!mylocale || mylocale->deterministic)
+ {
+ result = hash_any((unsigned char *) VARDATA_ANY(key),
+ VARSIZE_ANY_EXHDR(key));
+ }
+ else
+ {
+#ifdef USE_ICU
+ if (mylocale->provider == COLLPROVIDER_ICU)
+ {
+ int32_t ulen = -1;
+ UChar *uchar = NULL;
+ Size bsize;
+ uint8_t *buf;
+
+ ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
+
+ bsize = ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, NULL, 0);
+ buf = palloc(bsize);
+ ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, buf, bsize);
+
+ result = hash_any(buf, bsize);
+
+ pfree(buf);
+ }
+ else
+#endif
+ /* shouldn't happen */
+ elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
+ }
/* Avoid leaking memory for toasted inputs */
PG_FREE_IF_COPY(key, 0);
hashtextextended(PG_FUNCTION_ARGS)
{
text *key = PG_GETARG_TEXT_PP(0);
+ Oid collid = PG_GET_COLLATION();
+ pg_locale_t mylocale = 0;
Datum result;
- /* Same approach as hashtext */
- result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
- VARSIZE_ANY_EXHDR(key),
- PG_GETARG_INT64(1));
+ if (!collid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for string hashing"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+
+ if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (!mylocale || mylocale->deterministic)
+ {
+ result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
+ VARSIZE_ANY_EXHDR(key),
+ PG_GETARG_INT64(1));
+ }
+ else
+ {
+#ifdef USE_ICU
+ if (mylocale->provider == COLLPROVIDER_ICU)
+ {
+ int32_t ulen = -1;
+ UChar *uchar = NULL;
+ Size bsize;
+ uint8_t *buf;
+
+ ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
+
+ bsize = ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, NULL, 0);
+ buf = palloc(bsize);
+ ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, buf, bsize);
+
+ result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
+
+ pfree(buf);
+ }
+ else
+#endif
+ /* shouldn't happen */
+ elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
+ }
PG_FREE_IF_COPY(key, 0);
* query (prefix) string, so we don't need to check it again.
*/
res = (level >= queryLen) ||
- DatumGetBool(DirectFunctionCall2(text_starts_with,
+ DatumGetBool(DirectFunctionCall2Coll(text_starts_with,
+ PG_GET_COLLATION(),
out->leafValue,
PointerGetDatum(query)));
CollationCreate(const char *collname, Oid collnamespace,
Oid collowner,
char collprovider,
+ bool collisdeterministic,
int32 collencoding,
const char *collcollate, const char *collctype,
const char *collversion,
values[Anum_pg_collation_collnamespace - 1] = ObjectIdGetDatum(collnamespace);
values[Anum_pg_collation_collowner - 1] = ObjectIdGetDatum(collowner);
values[Anum_pg_collation_collprovider - 1] = CharGetDatum(collprovider);
+ values[Anum_pg_collation_collisdeterministic - 1] = BoolGetDatum(collisdeterministic);
values[Anum_pg_collation_collencoding - 1] = Int32GetDatum(collencoding);
namestrcpy(&name_collate, collcollate);
values[Anum_pg_collation_collcollate - 1] = NameGetDatum(&name_collate);
DefElem *lccollateEl = NULL;
DefElem *lcctypeEl = NULL;
DefElem *providerEl = NULL;
+ DefElem *deterministicEl = NULL;
DefElem *versionEl = NULL;
char *collcollate = NULL;
char *collctype = NULL;
char *collproviderstr = NULL;
+ bool collisdeterministic = true;
int collencoding = 0;
char collprovider = 0;
char *collversion = NULL;
defelp = &lcctypeEl;
else if (strcmp(defel->defname, "provider") == 0)
defelp = &providerEl;
+ else if (strcmp(defel->defname, "deterministic") == 0)
+ defelp = &deterministicEl;
else if (strcmp(defel->defname, "version") == 0)
defelp = &versionEl;
else
collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate));
collctype = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype));
collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
+ collisdeterministic = ((Form_pg_collation) GETSTRUCT(tp))->collisdeterministic;
collencoding = ((Form_pg_collation) GETSTRUCT(tp))->collencoding;
ReleaseSysCache(tp);
if (providerEl)
collproviderstr = defGetString(providerEl);
+ if (deterministicEl)
+ collisdeterministic = defGetBoolean(deterministicEl);
+
if (versionEl)
collversion = defGetString(versionEl);
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("parameter \"lc_ctype\" must be specified")));
+ /*
+ * Nondeterministic collations are currently only supported with ICU
+ * because that's the only case where it can actually make a difference.
+ * So we can save writing the code for the other providers.
+ */
+ if (!collisdeterministic && collprovider != COLLPROVIDER_ICU)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("nondeterministic collations not supported with this provider")));
+
if (!fromEl)
{
if (collprovider == COLLPROVIDER_ICU)
collNamespace,
GetUserId(),
collprovider,
+ collisdeterministic,
collencoding,
collcollate,
collctype,
* about existing ones.
*/
collid = CollationCreate(localebuf, nspid, GetUserId(),
- COLLPROVIDER_LIBC, enc,
+ COLLPROVIDER_LIBC, true, enc,
localebuf, localebuf,
get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
true, true);
int enc = aliases[i].enc;
collid = CollationCreate(alias, nspid, GetUserId(),
- COLLPROVIDER_LIBC, enc,
+ COLLPROVIDER_LIBC, true, enc,
locale, locale,
get_collation_actual_version(COLLPROVIDER_LIBC, locale),
true, true);
collid = CollationCreate(psprintf("%s-x-icu", langtag),
nspid, GetUserId(),
- COLLPROVIDER_ICU, -1,
+ COLLPROVIDER_ICU, true, -1,
collcollate, collcollate,
get_collation_actual_version(COLLPROVIDER_ICU, collcollate),
true, true);
{
const char *qSchemaName = quote_identifier(schemaName);
- t_sql = DirectFunctionCall3(replace_text,
+ t_sql = DirectFunctionCall3Coll(replace_text,
+ C_COLLATION_OID,
t_sql,
CStringGetTextDatum("@extschema@"),
CStringGetTextDatum(qSchemaName));
*/
if (control->module_pathname)
{
- t_sql = DirectFunctionCall3(replace_text,
+ t_sql = DirectFunctionCall3Coll(replace_text,
+ C_COLLATION_OID,
t_sql,
CStringGetTextDatum("MODULE_PATHNAME"),
CStringGetTextDatum(control->module_pathname));
int numCols,
const AttrNumber *keyColIdx,
const Oid *eqfunctions,
+ const Oid *collations,
PlanState *parent)
{
ExprState *state = makeNode(ExprState);
Form_pg_attribute latt = TupleDescAttr(ldesc, attno - 1);
Form_pg_attribute ratt = TupleDescAttr(rdesc, attno - 1);
Oid foid = eqfunctions[natt];
+ Oid collid = collations[natt];
FmgrInfo *finfo;
FunctionCallInfo fcinfo;
AclResult aclresult;
fmgr_info(foid, finfo);
fmgr_info_set_expr(NULL, finfo);
InitFunctionCallInfoData(*fcinfo, finfo, 2,
- InvalidOid, NULL, NULL);
+ collid, NULL, NULL);
/* left arg */
scratch.opcode = EEOP_INNER_VAR;
int numCols,
const AttrNumber *keyColIdx,
const Oid *eqOperators,
+ const Oid *collations,
PlanState *parent)
{
Oid *eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));
/* build actual expression */
expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,
- numCols, keyColIdx, eqFunctions,
+ numCols, keyColIdx, eqFunctions, collations,
parent);
return expr;
int numCols, AttrNumber *keyColIdx,
const Oid *eqfuncoids,
FmgrInfo *hashfunctions,
+ Oid *collations,
long nbuckets, Size additionalsize,
MemoryContext metacxt,
MemoryContext tablecxt,
hashtable->numCols = numCols;
hashtable->keyColIdx = keyColIdx;
hashtable->tab_hash_funcs = hashfunctions;
+ hashtable->tab_collations = collations;
hashtable->tablecxt = tablecxt;
hashtable->tempcxt = tempcxt;
hashtable->entrysize = entrysize;
hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
&TTSOpsMinimalTuple, &TTSOpsMinimalTuple,
numCols,
- keyColIdx, eqfuncoids,
+ keyColIdx, eqfuncoids, collations,
NULL);
/*
int numCols, AttrNumber *keyColIdx,
const Oid *eqfuncoids,
FmgrInfo *hashfunctions,
+ Oid *collations,
long nbuckets, Size additionalsize,
MemoryContext tablecxt,
MemoryContext tempcxt,
numCols, keyColIdx,
eqfuncoids,
hashfunctions,
+ collations,
nbuckets, additionalsize,
tablecxt,
tablecxt,
{
uint32 hkey;
- hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i],
- attr));
+ hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
+ hashtable->tab_collations[i],
+ attr));
hashkey ^= hkey;
}
}
greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
rowHash = compute_partition_hash_value(key->partnatts,
key->partsupfunc,
+ key->partcollation,
values, isnull);
part_index = boundinfo->indexes[rowHash % greatest_modulus];
regop,
searchslot->tts_values[mainattno - 1]);
+ skey[attoff].sk_collation = idxrel->rd_indcollation[attoff];
+
/* Check for null value. */
if (searchslot->tts_isnull[mainattno - 1])
{
errmsg("could not identify an equality operator for type %s",
format_type_be(att->atttypid))));
- if (!DatumGetBool(FunctionCall2(&typentry->eq_opr_finfo,
+ if (!DatumGetBool(FunctionCall2Coll(&typentry->eq_opr_finfo,
+ att->attcollation,
slot1->tts_values[attrnum],
slot2->tts_values[attrnum])))
return false;
/*
* If DISTINCT mode, and not distinct from prior, skip it.
- *
- * Note: we assume equality functions don't care about collation.
*/
if (isDistinct &&
haveOldVal &&
((oldIsNull && *isNull) ||
(!oldIsNull && !*isNull &&
oldAbbrevVal == newAbbrevVal &&
- DatumGetBool(FunctionCall2(&pertrans->equalfnOne,
+ DatumGetBool(FunctionCall2Coll(&pertrans->equalfnOne,
+ pertrans->aggCollation,
oldVal, *newVal)))))
{
/* equal to prior, so forget this one */
perhash->hashGrpColIdxHash,
perhash->eqfuncoids,
perhash->hashfunctions,
+ perhash->aggnode->grpCollations,
perhash->aggnode->numGroups,
additionalsize,
aggstate->ss.ps.state->es_query_cxt,
length,
aggnode->grpColIdx,
aggnode->grpOperators,
+ aggnode->grpCollations,
(PlanState *) aggstate);
}
aggnode->numCols,
aggnode->grpColIdx,
aggnode->grpOperators,
+ aggnode->grpCollations,
(PlanState *) aggstate);
}
}
numDistinctCols,
pertrans->sortColIdx,
ops,
+ pertrans->sortCollations,
&aggstate->ss.ps);
pfree(ops);
}
node->numCols,
node->grpColIdx,
node->grpOperators,
+ node->grpCollations,
&grpstate->ss.ps);
return grpstate;
* ----------------------------------------------------------------
*/
HashJoinTable
-ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls)
+ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, bool keepNulls)
{
Hash *node;
HashJoinTable hashtable;
int nkeys;
int i;
ListCell *ho;
+ ListCell *hc;
MemoryContext oldcxt;
/*
hashtable->inner_hashfunctions =
(FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));
hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool));
+ hashtable->collations = (Oid *) palloc(nkeys * sizeof(Oid));
i = 0;
- foreach(ho, hashOperators)
+ forboth(ho, hashOperators, hc, hashCollations)
{
Oid hashop = lfirst_oid(ho);
Oid left_hashfn;
fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]);
fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]);
hashtable->hashStrict[i] = op_strict(hashop);
+ hashtable->collations[i] = lfirst_oid(hc);
i++;
}
/* Compute the hash function */
uint32 hkey;
- hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i], keyval));
+ hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], hashtable->collations[i], keyval));
hashkey ^= hkey;
}
uint32 hashvalue;
int bucket;
- hashvalue = DatumGetUInt32(FunctionCall1(&hashfunctions[0],
- sslot.values[i]));
+ hashvalue = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[0],
+ hashtable->collations[0],
+ sslot.values[i]));
/*
* While we have not hit a hole in the hashtable and have not hit
*/
hashtable = ExecHashTableCreate(hashNode,
node->hj_HashOperators,
+ node->hj_Collations,
HJ_FILL_INNER(node));
node->hj_HashTable = hashtable;
List *rclauses;
List *rhclauses;
List *hoperators;
+ List *hcollations;
TupleDesc outerDesc,
innerDesc;
ListCell *l;
rclauses = NIL;
rhclauses = NIL;
hoperators = NIL;
+ hcollations = NIL;
foreach(l, node->hashclauses)
{
OpExpr *hclause = lfirst_node(OpExpr, l);
rhclauses = lappend(rhclauses, ExecInitExpr(lsecond(hclause->args),
innerPlanState(hjstate)));
hoperators = lappend_oid(hoperators, hclause->opno);
+ hcollations = lappend_oid(hcollations, hclause->inputcollid);
}
hjstate->hj_OuterHashKeys = lclauses;
hjstate->hj_InnerHashKeys = rclauses;
hjstate->hj_HashOperators = hoperators;
+ hjstate->hj_Collations = hcollations;
/* child Hash node needs to evaluate inner hash keys, too */
((HashState *) innerPlanState(hjstate))->hashkeys = rhclauses;
node->dupColIdx,
rustate->eqfuncoids,
rustate->hashfunctions,
+ node->dupCollations,
node->numGroups,
0,
rustate->ps.state->es_query_cxt,
node->dupColIdx,
setopstate->eqfuncoids,
setopstate->hashfunctions,
+ node->dupCollations,
node->numGroups,
0,
setopstate->ps.state->es_query_cxt,
node->numCols,
node->dupColIdx,
node->dupOperators,
+ node->dupCollations,
&setopstate->ps);
if (node->strategy == SETOP_HASHED)
node->keyColIdx,
node->tab_eq_funcoids,
node->tab_hash_funcs,
+ node->tab_collations,
nbuckets,
0,
node->planstate->state->es_query_cxt,
node->keyColIdx,
node->tab_eq_funcoids,
node->tab_hash_funcs,
+ node->tab_collations,
nbuckets,
0,
node->planstate->state->es_query_cxt,
int numCols,
AttrNumber *matchColIdx,
FmgrInfo *eqfunctions,
+ const Oid *collations,
MemoryContext evalContext)
{
MemoryContext oldContext;
continue; /* can't prove anything here */
/* Apply the type-specific equality function */
-
- if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
+ if (!DatumGetBool(FunctionCall2Coll(&eqfunctions[i],
+ collations[i],
attr1, attr2)))
{
result = true; /* they are unequal */
if (!execTuplesUnequal(slot, hashtable->tableslot,
numCols, keyColIdx,
eqfunctions,
+ hashtable->tab_collations,
hashtable->tempcxt))
{
TermTupleHashIterator(&hashiter);
sstate->tab_eq_funcoids = NULL;
sstate->tab_hash_funcs = NULL;
sstate->tab_eq_funcs = NULL;
+ sstate->tab_collations = NULL;
sstate->lhs_hash_funcs = NULL;
sstate->cur_eq_funcs = NULL;
sstate->tab_eq_funcoids = (Oid *) palloc(ncols * sizeof(Oid));
sstate->tab_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
sstate->tab_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
+ sstate->tab_collations = (Oid *) palloc(ncols * sizeof(Oid));
sstate->lhs_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
sstate->cur_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
i = 1;
fmgr_info(left_hashfn, &sstate->lhs_hash_funcs[i - 1]);
fmgr_info(right_hashfn, &sstate->tab_hash_funcs[i - 1]);
+ /* Set collation */
+ sstate->tab_collations[i - 1] = opexpr->inputcollid;
+
i++;
}
ncols,
sstate->keyColIdx,
sstate->tab_eq_funcoids,
+ sstate->tab_collations,
parent);
}
node->numCols,
node->uniqColIdx,
node->uniqOperators,
+ node->uniqCollations,
&uniquestate->ps);
return uniquestate;
node->partNumCols,
node->partColIdx,
node->partOperators,
+ node->partCollations,
&winstate->ss.ps);
if (node->ordNumCols > 0)
node->ordNumCols,
node->ordColIdx,
node->ordOperators,
+ node->ordCollations,
&winstate->ss.ps);
/*
{
COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber));
COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid));
+ COPY_POINTER_FIELD(dupCollations, from->numCols * sizeof(Oid));
}
COPY_SCALAR_FIELD(numGroups);
COPY_SCALAR_FIELD(numCols);
COPY_POINTER_FIELD(grpColIdx, from->numCols * sizeof(AttrNumber));
COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid));
+ COPY_POINTER_FIELD(grpCollations, from->numCols * sizeof(Oid));
return newnode;
}
{
COPY_POINTER_FIELD(grpColIdx, from->numCols * sizeof(AttrNumber));
COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid));
+ COPY_POINTER_FIELD(grpCollations, from->numCols * sizeof(Oid));
}
COPY_SCALAR_FIELD(numGroups);
COPY_BITMAPSET_FIELD(aggParams);
{
COPY_POINTER_FIELD(partColIdx, from->partNumCols * sizeof(AttrNumber));
COPY_POINTER_FIELD(partOperators, from->partNumCols * sizeof(Oid));
+ COPY_POINTER_FIELD(partCollations, from->partNumCols * sizeof(Oid));
}
COPY_SCALAR_FIELD(ordNumCols);
if (from->ordNumCols > 0)
{
COPY_POINTER_FIELD(ordColIdx, from->ordNumCols * sizeof(AttrNumber));
COPY_POINTER_FIELD(ordOperators, from->ordNumCols * sizeof(Oid));
+ COPY_POINTER_FIELD(ordCollations, from->ordNumCols * sizeof(Oid));
}
COPY_SCALAR_FIELD(frameOptions);
COPY_NODE_FIELD(startOffset);
COPY_SCALAR_FIELD(numCols);
COPY_POINTER_FIELD(uniqColIdx, from->numCols * sizeof(AttrNumber));
COPY_POINTER_FIELD(uniqOperators, from->numCols * sizeof(Oid));
+ COPY_POINTER_FIELD(uniqCollations, from->numCols * sizeof(Oid));
return newnode;
}
COPY_SCALAR_FIELD(numCols);
COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber));
COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid));
+ COPY_POINTER_FIELD(dupCollations, from->numCols * sizeof(Oid));
COPY_SCALAR_FIELD(flagColIdx);
COPY_SCALAR_FIELD(firstFlag);
COPY_SCALAR_FIELD(numGroups);
WRITE_INT_FIELD(numCols);
WRITE_ATTRNUMBER_ARRAY(dupColIdx, node->numCols);
WRITE_OID_ARRAY(dupOperators, node->numCols);
+ WRITE_OID_ARRAY(dupCollations, node->numCols);
WRITE_LONG_FIELD(numGroups);
}
WRITE_INT_FIELD(numCols);
WRITE_ATTRNUMBER_ARRAY(grpColIdx, node->numCols);
WRITE_OID_ARRAY(grpOperators, node->numCols);
+ WRITE_OID_ARRAY(grpCollations, node->numCols);
WRITE_LONG_FIELD(numGroups);
WRITE_BITMAPSET_FIELD(aggParams);
WRITE_NODE_FIELD(groupingSets);
WRITE_INT_FIELD(partNumCols);
WRITE_ATTRNUMBER_ARRAY(partColIdx, node->partNumCols);
WRITE_OID_ARRAY(partOperators, node->partNumCols);
+ WRITE_OID_ARRAY(partCollations, node->partNumCols);
WRITE_INT_FIELD(ordNumCols);
WRITE_ATTRNUMBER_ARRAY(ordColIdx, node->ordNumCols);
WRITE_OID_ARRAY(ordOperators, node->ordNumCols);
+ WRITE_OID_ARRAY(ordCollations, node->ordNumCols);
WRITE_INT_FIELD(frameOptions);
WRITE_NODE_FIELD(startOffset);
WRITE_NODE_FIELD(endOffset);
WRITE_INT_FIELD(numCols);
WRITE_ATTRNUMBER_ARRAY(grpColIdx, node->numCols);
WRITE_OID_ARRAY(grpOperators, node->numCols);
+ WRITE_OID_ARRAY(grpCollations, node->numCols);
}
static void
WRITE_INT_FIELD(numCols);
WRITE_ATTRNUMBER_ARRAY(uniqColIdx, node->numCols);
WRITE_OID_ARRAY(uniqOperators, node->numCols);
+ WRITE_OID_ARRAY(uniqCollations, node->numCols);
}
static void
WRITE_INT_FIELD(numCols);
WRITE_ATTRNUMBER_ARRAY(dupColIdx, node->numCols);
WRITE_OID_ARRAY(dupOperators, node->numCols);
+ WRITE_OID_ARRAY(dupCollations, node->numCols);
WRITE_INT_FIELD(flagColIdx);
WRITE_INT_FIELD(firstFlag);
WRITE_LONG_FIELD(numGroups);
READ_INT_FIELD(numCols);
READ_ATTRNUMBER_ARRAY(dupColIdx, local_node->numCols);
READ_OID_ARRAY(dupOperators, local_node->numCols);
+ READ_OID_ARRAY(dupCollations, local_node->numCols);
READ_LONG_FIELD(numGroups);
READ_DONE();
READ_INT_FIELD(numCols);
READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols);
READ_OID_ARRAY(grpOperators, local_node->numCols);
+ READ_OID_ARRAY(grpCollations, local_node->numCols);
READ_DONE();
}
READ_INT_FIELD(numCols);
READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols);
READ_OID_ARRAY(grpOperators, local_node->numCols);
+ READ_OID_ARRAY(grpCollations, local_node->numCols);
READ_LONG_FIELD(numGroups);
READ_BITMAPSET_FIELD(aggParams);
READ_NODE_FIELD(groupingSets);
READ_INT_FIELD(partNumCols);
READ_ATTRNUMBER_ARRAY(partColIdx, local_node->partNumCols);
READ_OID_ARRAY(partOperators, local_node->partNumCols);
+ READ_OID_ARRAY(partCollations, local_node->partNumCols);
READ_INT_FIELD(ordNumCols);
READ_ATTRNUMBER_ARRAY(ordColIdx, local_node->ordNumCols);
READ_OID_ARRAY(ordOperators, local_node->ordNumCols);
+ READ_OID_ARRAY(ordCollations, local_node->ordNumCols);
READ_INT_FIELD(frameOptions);
READ_NODE_FIELD(startOffset);
READ_NODE_FIELD(endOffset);
READ_INT_FIELD(numCols);
READ_ATTRNUMBER_ARRAY(uniqColIdx, local_node->numCols);
READ_OID_ARRAY(uniqOperators, local_node->numCols);
+ READ_OID_ARRAY(uniqCollations, local_node->numCols);
READ_DONE();
}
READ_INT_FIELD(numCols);
READ_ATTRNUMBER_ARRAY(dupColIdx, local_node->numCols);
READ_OID_ARRAY(dupOperators, local_node->numCols);
+ READ_OID_ARRAY(dupCollations, local_node->numCols);
READ_INT_FIELD(flagColIdx);
READ_INT_FIELD(firstFlag);
READ_LONG_FIELD(numGroups);
Plan *lefttree);
static Material *make_material(Plan *lefttree);
static WindowAgg *make_windowagg(List *tlist, Index winref,
- int partNumCols, AttrNumber *partColIdx, Oid *partOperators,
- int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators,
+ int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations,
+ int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations,
int frameOptions, Node *startOffset, Node *endOffset,
Oid startInRangeFunc, Oid endInRangeFunc,
Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst,
Plan *lefttree);
static Group *make_group(List *tlist, List *qual, int numGroupCols,
- AttrNumber *grpColIdx, Oid *grpOperators,
+ AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
Plan *lefttree);
static Unique *make_unique_from_sortclauses(Plan *lefttree, List *distinctList);
static Unique *make_unique_from_pathkeys(Plan *lefttree,
bool newitems;
int numGroupCols;
AttrNumber *groupColIdx;
+ Oid *groupCollations;
int groupColPos;
ListCell *l;
newtlist = subplan->targetlist;
numGroupCols = list_length(uniq_exprs);
groupColIdx = (AttrNumber *) palloc(numGroupCols * sizeof(AttrNumber));
+ groupCollations = (Oid *) palloc(numGroupCols * sizeof(Oid));
groupColPos = 0;
foreach(l, uniq_exprs)
tle = tlist_member(uniqexpr, newtlist);
if (!tle) /* shouldn't happen */
elog(ERROR, "failed to find unique expression in subplan tlist");
- groupColIdx[groupColPos++] = tle->resno;
+ groupColIdx[groupColPos] = tle->resno;
+ groupCollations[groupColPos] = exprCollation((Node *) tle->expr);
+ groupColPos++;
}
if (best_path->umethod == UNIQUE_PATH_HASH)
numGroupCols,
groupColIdx,
groupOperators,
+ groupCollations,
NIL,
NIL,
best_path->path.rows,
extract_grouping_cols(best_path->groupClause,
subplan->targetlist),
extract_grouping_ops(best_path->groupClause),
+ extract_grouping_collations(best_path->groupClause,
+ subplan->targetlist),
subplan);
copy_generic_path_info(&plan->plan, (Path *) best_path);
extract_grouping_cols(best_path->groupClause,
subplan->targetlist),
extract_grouping_ops(best_path->groupClause),
+ extract_grouping_collations(best_path->groupClause,
+ subplan->targetlist),
NIL,
NIL,
best_path->numGroups,
list_length((List *) linitial(rollup->gsets)),
new_grpColIdx,
extract_grouping_ops(rollup->groupClause),
+ extract_grouping_collations(rollup->groupClause, subplan->targetlist),
rollup->gsets,
NIL,
rollup->numGroups,
numGroupCols,
top_grpColIdx,
extract_grouping_ops(rollup->groupClause),
+ extract_grouping_collations(rollup->groupClause, subplan->targetlist),
rollup->gsets,
chain,
rollup->numGroups,
int partNumCols;
AttrNumber *partColIdx;
Oid *partOperators;
+ Oid *partCollations;
int ordNumCols;
AttrNumber *ordColIdx;
Oid *ordOperators;
+ Oid *ordCollations;
ListCell *lc;
/*
*/
partColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numPart);
partOperators = (Oid *) palloc(sizeof(Oid) * numPart);
+ partCollations = (Oid *) palloc(sizeof(Oid) * numPart);
partNumCols = 0;
foreach(lc, wc->partitionClause)
Assert(OidIsValid(sgc->eqop));
partColIdx[partNumCols] = tle->resno;
partOperators[partNumCols] = sgc->eqop;
+ partCollations[partNumCols] = exprCollation((Node *) tle->expr);
partNumCols++;
}
ordColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numOrder);
ordOperators = (Oid *) palloc(sizeof(Oid) * numOrder);
+ ordCollations = (Oid *) palloc(sizeof(Oid) * numOrder);
ordNumCols = 0;
foreach(lc, wc->orderClause)
Assert(OidIsValid(sgc->eqop));
ordColIdx[ordNumCols] = tle->resno;
ordOperators[ordNumCols] = sgc->eqop;
+ ordCollations[ordNumCols] = exprCollation((Node *) tle->expr);
ordNumCols++;
}
partNumCols,
partColIdx,
partOperators,
+ partCollations,
ordNumCols,
ordColIdx,
ordOperators,
+ ordCollations,
wc->frameOptions,
wc->startOffset,
wc->endOffset,
int keyno = 0;
AttrNumber *dupColIdx;
Oid *dupOperators;
+ Oid *dupCollations;
ListCell *slitem;
dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
dupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+ dupCollations = (Oid *) palloc(sizeof(Oid) * numCols);
foreach(slitem, distinctList)
{
dupColIdx[keyno] = tle->resno;
dupOperators[keyno] = sortcl->eqop;
+ dupCollations[keyno] = exprCollation((Node *) tle->expr);
Assert(OidIsValid(dupOperators[keyno]));
keyno++;
}
node->dupColIdx = dupColIdx;
node->dupOperators = dupOperators;
+ node->dupCollations = dupCollations;
}
node->numGroups = numGroups;
Agg *
make_agg(List *tlist, List *qual,
AggStrategy aggstrategy, AggSplit aggsplit,
- int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators,
+ int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
List *groupingSets, List *chain,
double dNumGroups, Plan *lefttree)
{
node->numCols = numGroupCols;
node->grpColIdx = grpColIdx;
node->grpOperators = grpOperators;
+ node->grpCollations = grpCollations;
node->numGroups = numGroups;
node->aggParams = NULL; /* SS_finalize_plan() will fill this */
node->groupingSets = groupingSets;
static WindowAgg *
make_windowagg(List *tlist, Index winref,
- int partNumCols, AttrNumber *partColIdx, Oid *partOperators,
- int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators,
+ int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations,
+ int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations,
int frameOptions, Node *startOffset, Node *endOffset,
Oid startInRangeFunc, Oid endInRangeFunc,
Oid inRangeColl, bool inRangeAsc, bool inRangeNullsFirst,
node->partNumCols = partNumCols;
node->partColIdx = partColIdx;
node->partOperators = partOperators;
+ node->partCollations = partCollations;
node->ordNumCols = ordNumCols;
node->ordColIdx = ordColIdx;
node->ordOperators = ordOperators;
+ node->ordCollations = ordCollations;
node->frameOptions = frameOptions;
node->startOffset = startOffset;
node->endOffset = endOffset;
int numGroupCols,
AttrNumber *grpColIdx,
Oid *grpOperators,
+ Oid *grpCollations,
Plan *lefttree)
{
Group *node = makeNode(Group);
node->numCols = numGroupCols;
node->grpColIdx = grpColIdx;
node->grpOperators = grpOperators;
+ node->grpCollations = grpCollations;
plan->qual = qual;
plan->targetlist = tlist;
int keyno = 0;
AttrNumber *uniqColIdx;
Oid *uniqOperators;
+ Oid *uniqCollations;
ListCell *slitem;
plan->targetlist = lefttree->targetlist;
Assert(numCols > 0);
uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+ uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols);
foreach(slitem, distinctList)
{
uniqColIdx[keyno] = tle->resno;
uniqOperators[keyno] = sortcl->eqop;
+ uniqCollations[keyno] = exprCollation((Node *) tle->expr);
Assert(OidIsValid(uniqOperators[keyno]));
keyno++;
}
node->numCols = numCols;
node->uniqColIdx = uniqColIdx;
node->uniqOperators = uniqOperators;
+ node->uniqCollations = uniqCollations;
return node;
}
int keyno = 0;
AttrNumber *uniqColIdx;
Oid *uniqOperators;
+ Oid *uniqCollations;
ListCell *lc;
plan->targetlist = lefttree->targetlist;
Assert(numCols >= 0 && numCols <= list_length(pathkeys));
uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
uniqOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+ uniqCollations = (Oid *) palloc(sizeof(Oid) * numCols);
foreach(lc, pathkeys)
{
uniqColIdx[keyno] = tle->resno;
uniqOperators[keyno] = eqop;
+ uniqCollations[keyno] = ec->ec_collation;
keyno++;
}
node->numCols = numCols;
node->uniqColIdx = uniqColIdx;
node->uniqOperators = uniqOperators;
+ node->uniqCollations = uniqCollations;
return node;
}
int keyno = 0;
AttrNumber *dupColIdx;
Oid *dupOperators;
+ Oid *dupCollations;
ListCell *slitem;
plan->targetlist = lefttree->targetlist;
*/
dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
dupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+ dupCollations = (Oid *) palloc(sizeof(Oid) * numCols);
foreach(slitem, distinctList)
{
dupColIdx[keyno] = tle->resno;
dupOperators[keyno] = sortcl->eqop;
+ dupCollations[keyno] = exprCollation((Node *) tle->expr);
Assert(OidIsValid(dupOperators[keyno]));
keyno++;
}
node->numCols = numCols;
node->dupColIdx = dupColIdx;
node->dupOperators = dupOperators;
+ node->dupCollations = dupCollations;
node->flagColIdx = flagColIdx;
node->firstFlag = firstFlag;
node->numGroups = numGroups;
return groupOperators;
}
+/*
+ * extract_grouping_collations - make an array of the grouping column collations
+ * for a SortGroupClause list
+ */
+Oid *
+extract_grouping_collations(List *groupClause, List *tlist)
+{
+ int numCols = list_length(groupClause);
+ int colno = 0;
+ Oid *grpCollations;
+ ListCell *glitem;
+
+ grpCollations = (Oid *) palloc(sizeof(Oid) * numCols);
+
+ foreach(glitem, groupClause)
+ {
+ SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
+ TargetEntry *tle = get_sortgroupclause_tle(groupcl, tlist);
+
+ grpCollations[colno++] = exprCollation((Node *) tle->expr);
+ }
+
+ return grpCollations;
+}
+
/*
* extract_grouping_cols - make an array of the grouping column resnos
* for a SortGroupClause list
* Compute the hash value for given partition key values.
*/
uint64
-compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc,
+compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation,
Datum *values, bool *isnull)
{
int i;
* datatype-specific hash functions of each partition key
* attribute.
*/
- hash = FunctionCall2(&partsupfunc[i], values[i], seed);
+ hash = FunctionCall2Coll(&partsupfunc[i], partcollation[i], values[i], seed);
/* Form a single 64-bit hash value */
rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
int i;
uint64 rowHash;
int greatest_modulus;
+ Oid *partcollation = context->partcollation;
Assert(context->strategy == PARTITION_STRATEGY_HASH);
isnull[i] = bms_is_member(i, nullkeys);
greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
-