summaryrefslogtreecommitdiff
path: root/src/include/nodes
diff options
context:
space:
mode:
authorPeter Eisentraut2019-03-22 11:09:32 +0000
committerPeter Eisentraut2019-03-22 11:12:43 +0000
commit5e1963fb764e9cc092e0f7b58b28985c311431d9 (patch)
tree544492f24e3d48d00bd2a19c11663f84f1e18ce4 /src/include/nodes
parent2ab6d28d233af17987ea323e3235b2bda89b4f2e (diff)
Collations with nondeterministic comparison
This adds a flag "deterministic" to collations. If that is false, such a collation disables various optimizations that assume that strings are equal only if they are byte-wise equal. That then allows use cases such as case-insensitive or accent-insensitive comparisons or handling of strings with different Unicode normal forms. This functionality is only supported with the ICU provider. At least glibc doesn't appear to have any locales that work in a nondeterministic way, so it's not worth supporting this for the libc provider. The term "deterministic comparison" in this context is from Unicode Technical Standard #10 (https://unicode.org/reports/tr10/#Deterministic_Comparison). This patch makes changes in three areas: - CREATE COLLATION DDL changes and system catalog changes to support this new flag. - Many executor nodes and auxiliary code are extended to track collations. Previously, this code would just throw away collation information, because the eventually-called user-defined functions didn't use it since they only cared about equality, which didn't need collation information. - String data type functions that do equality comparisons and hashing are changed to take the (non-)deterministic flag into account. For comparison, this just means skipping various shortcuts and tie breakers that use byte-wise comparison. For hashing, we first need to convert the input string to a canonical "sort key" using the ICU analogue of strxfrm(). Reviewed-by: Daniel Verite <daniel@manitou-mail.org> Reviewed-by: Peter Geoghegan <pg@bowt.ie> Discussion: https://www.postgresql.org/message-id/flat/1ccc668f-4cbc-0bef-af67-450b47cdfee7@2ndquadrant.com
Diffstat (limited to 'src/include/nodes')
-rw-r--r--src/include/nodes/execnodes.h3
-rw-r--r--src/include/nodes/plannodes.h7
2 files changed, 10 insertions, 0 deletions
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 62eb1a06eef..869c303e157 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -693,6 +693,7 @@ typedef struct TupleHashTableData
AttrNumber *keyColIdx; /* attr numbers of key columns */
FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */
ExprState *tab_eq_func; /* comparator for table datatype(s) */
+ Oid *tab_collations; /* collations for hash and comparison */
MemoryContext tablecxt; /* memory context containing table */
MemoryContext tempcxt; /* context for function evaluations */
Size entrysize; /* actual size to make each hash entry */
@@ -862,6 +863,7 @@ typedef struct SubPlanState
AttrNumber *keyColIdx; /* control data for hash tables */
Oid *tab_eq_funcoids; /* equality func oids for table
* datatype(s) */
+ Oid *tab_collations; /* collations for hash and comparison */
FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */
FmgrInfo *tab_eq_funcs; /* equality functions for table datatype(s) */
FmgrInfo *lhs_hash_funcs; /* hash functions for lefthand datatype(s) */
@@ -1872,6 +1874,7 @@ typedef struct HashJoinState
List *hj_OuterHashKeys; /* list of ExprState nodes */
List *hj_InnerHashKeys; /* list of ExprState nodes */
List *hj_HashOperators; /* list of operator OIDs */
+ List *hj_Collations;
HashJoinTable hj_HashTable;
uint32 hj_CurHashValue;
int hj_CurBucketNo;
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index d66a187a530..24740c31e3d 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -297,6 +297,7 @@ typedef struct RecursiveUnion
* duplicate-ness */
AttrNumber *dupColIdx; /* their indexes in the target list */
Oid *dupOperators; /* equality operators to compare with */
+ Oid *dupCollations;
long numGroups; /* estimated number of groups in input */
} RecursiveUnion;
@@ -773,6 +774,7 @@ typedef struct Group
int numCols; /* number of grouping columns */
AttrNumber *grpColIdx; /* their indexes in the target list */
Oid *grpOperators; /* equality operators to compare with */
+ Oid *grpCollations;
} Group;
/* ---------------
@@ -797,6 +799,7 @@ typedef struct Agg
int numCols; /* number of grouping columns */
AttrNumber *grpColIdx; /* their indexes in the target list */
Oid *grpOperators; /* equality operators to compare with */
+ Oid *grpCollations;
long numGroups; /* estimated number of groups in input */
Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */
/* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */
@@ -815,9 +818,11 @@ typedef struct WindowAgg
int partNumCols; /* number of columns in partition clause */
AttrNumber *partColIdx; /* their indexes in the target list */
Oid *partOperators; /* equality operators for partition columns */
+ Oid *partCollations; /* collations for partition columns */
int ordNumCols; /* number of columns in ordering clause */
AttrNumber *ordColIdx; /* their indexes in the target list */
Oid *ordOperators; /* equality operators for ordering columns */
+ Oid *ordCollations; /* collations for ordering columns */
int frameOptions; /* frame_clause options, see WindowDef */
Node *startOffset; /* expression for starting bound, if any */
Node *endOffset; /* expression for ending bound, if any */
@@ -839,6 +844,7 @@ typedef struct Unique
int numCols; /* number of columns to check for uniqueness */
AttrNumber *uniqColIdx; /* their indexes in the target list */
Oid *uniqOperators; /* equality operators to compare with */
+ Oid *uniqCollations; /* collations for equality comparisons */
} Unique;
/* ------------
@@ -913,6 +919,7 @@ typedef struct SetOp
* duplicate-ness */
AttrNumber *dupColIdx; /* their indexes in the target list */
Oid *dupOperators; /* equality operators to compare with */
+ Oid *dupCollations;
AttrNumber flagColIdx; /* where is the flag column, if any */
int firstFlag; /* flag value for first input relation */
long numGroups; /* estimated number of groups in input */