summaryrefslogtreecommitdiff
path: root/src/include/mb
diff options
context:
space:
mode:
authorJeff Davis2024-03-07 19:15:06 +0000
committerJeff Davis2024-03-07 19:15:06 +0000
commit5c40364dd6d9c6a260c8965dffe2e066642d6f79 (patch)
tree229ba6adf17935fe7f54f6cca3c3856267302d15 /src/include/mb
parent6d470211e54f7a617783b99b27c9d8056a890a57 (diff)
Unicode case mapping tables and functions.
Implements Unicode simple case mapping, in which all code points map to exactly one other code point unconditionally. These tables are generated from UnicodeData.txt, which is already being used by other infrastructure in src/common/unicode. The tables are checked into the source tree, so they only need to be regenerated when we update the Unicode version. In preparation for the builtin collation provider, and possibly useful for other callers. Discussion: https://postgr.es/m/ff4c2f2f9c8fc7ca27c1c24ae37ecaeaeaff6b53.camel%40j-davis.com Reviewed-by: Peter Eisentraut, Daniel Verite, Jeremy Schneider
Diffstat (limited to 'src/include/mb')
-rw-r--r--src/include/mb/pg_wchar.h15
1 files changed, 15 insertions, 0 deletions
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
index 32e25a1a6ea..69a55b66f44 100644
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -555,6 +555,21 @@ surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF);
}
+/*
+ * Number of bytes needed to represent the given char in UTF8.
+ */
+static inline int
+unicode_utf8len(pg_wchar c)
+{
+ if (c <= 0x7F)
+ return 1;
+ else if (c <= 0x7FF)
+ return 2;
+ else if (c <= 0xFFFF)
+ return 3;
+ else
+ return 4;
+}
/*
* The functions in this list are exported by libpq, and we need to be sure