21#include <unicode/ucasemap.h>
22#include <unicode/uchar.h>
32static UCaseMap *
casemap = NULL;
35typedef size_t (*
TestFunc) (
char *dst,
size_t dstsize,
const char *src,
63 size_t prev_offset = wbstate->
offset;
89 pg_wchar icufold = u_foldCase(code, U_FOLD_CASE_DEFAULT);
91 if (
lower != iculower || title != icutitle ||
upper != icuupper ||
94 printf(
"case_test: FAILURE for codepoint 0x%06x\n", code);
95 printf(
"case_test: Postgres lower/title/upper/fold: 0x%06x/0x%06x/0x%06x/0x%06x\n",
97 printf(
"case_test: ICU lower/title/upper/fold: 0x%06x/0x%06x/0x%06x/0x%06x\n",
98 iculower, icutitle, icuupper, icufold);
105icu_test_full(
char *
str)
111 char icu_lower[
BUFSZ];
112 char icu_title[
BUFSZ];
113 char icu_upper[
BUFSZ];
114 char icu_fold[
BUFSZ];
131 status = U_ZERO_ERROR;
133 status = U_ZERO_ERROR;
135 status = U_ZERO_ERROR;
137 status = U_ZERO_ERROR;
140 if (strcmp(
lower, icu_lower) != 0)
142 printf(
"case_test: str='%s' lower='%s' icu_lower='%s'\n",
str,
lower,
146 if (strcmp(title, icu_title) != 0)
148 printf(
"case_test: str='%s' title='%s' icu_title='%s'\n",
str, title,
152 if (strcmp(
upper, icu_upper) != 0)
154 printf(
"case_test: str='%s' upper='%s' icu_upper='%s'\n",
str,
upper,
158 if (strcmp(fold, icu_fold) != 0)
160 printf(
"case_test: str='%s' fold='%s' icu_fold='%s'\n",
str, fold,
173 int skipped_mismatch = 0;
175 for (
pg_wchar code = 0; code <= 0x10ffff; code++)
181 uint8_t icu_category = u_charType(code);
182 char code_str[5] = {0};
190 icu_test_simple(code);
192 icu_test_full(code_str);
198 if (skipped_mismatch > 0)
199 printf(
"case_test: skipped %d codepoints unassigned in ICU due to Unicode version mismatch\n",
202 printf(
"case_test: ICU simple mapping test: %d codepoints successful\n",
210 size_t src1len = strlen(test_string);
212 size_t dst1len = strlen(expected);
213 size_t dst2len = strlen(expected) + 1;
214 char *src1 =
malloc(src1len);
215 char *dst1 =
malloc(dst1len);
216 char *src2 = strdup(test_string);
217 char *dst2 =
malloc(dst2len);
220 memcpy(src1, test_string, src1len);
223 memset(dst1, 0x7F, dst1len);
224 needed = tfunc(dst1, dst1len, src1, src1len);
225 if (needed != strlen(expected))
227 printf(
"case_test: convert_case test1 FAILURE: '%s' needed %zu expected %zu\n",
228 test_string, needed, strlen(expected));
231 if (memcmp(dst1, expected, dst1len) != 0)
233 printf(
"case_test: convert_case test1 FAILURE: test: '%s' result: '%.*s' expected: '%s'\n",
234 test_string, (
int) dst1len, dst1, expected);
239 memset(dst2, 0x7F, dst2len);
240 needed = tfunc(dst2, dst2len, src1, src1len);
241 if (needed != strlen(expected))
243 printf(
"case_test: convert_case test2 FAILURE: '%s' needed %zu expected %zu\n",
244 test_string, needed, strlen(expected));
247 if (strcmp(dst2, expected) != 0)
249 printf(
"case_test: convert_case test2 FAILURE: test: '%s' result: '%s' expected: '%s'\n",
250 test_string, dst2, expected);
255 memset(dst1, 0x7F, dst1len);
256 needed = tfunc(dst1, dst1len, src2, src2len);
257 if (needed != strlen(expected))
259 printf(
"case_test: convert_case test3 FAILURE: '%s' needed %zu expected %zu\n",
260 test_string, needed, strlen(expected));
261 printf(
"case_test: convert_case test3 FAILURE: needed %zu\n", needed);
264 if (memcmp(dst1, expected, dst1len) != 0)
266 printf(
"case_test: convert_case test3 FAILURE: test: '%s' result: '%.*s' expected: '%s'\n",
267 test_string, (
int) dst1len, dst1, expected);
272 memset(dst2, 0x7F, dst2len);
273 needed = tfunc(dst2, dst2len, src2, src2len);
274 if (needed != strlen(expected))
276 printf(
"case_test: convert_case test4 FAILURE: '%s' needed %zu expected %zu\n",
277 test_string, needed, strlen(expected));
280 if (strcmp(dst2, expected) != 0)
282 printf(
"case_test: convert_case test4 FAILURE: test: '%s' result: '%s' expected: '%s'\n",
283 test_string, dst2, expected);
360 icu_test_full(
"ȺȺȺ");
361 icu_test_full(
"ßßß");
363 icu_test_full(
"a b");
364 icu_test_full(
"abc 123xyz");
365 icu_test_full(
"σςΣ ΣΣΣ");
366 icu_test_full(
"ıiIİ");
367 icu_test_full(
"\uFF11a");
369 icu_test_full(
"\u0391\u0345\u0301");
372 printf(
"case_test: convert_case: success\n");
379 UErrorCode status = U_ZERO_ERROR;
385 casemap = ucasemap_open(
"und", U_TITLECASE_NO_BREAK_ADJUSTMENT, &status);
386 if (U_FAILURE(status))
388 printf(
"case_test: failure opening UCaseMap: %s\n",
389 u_errorName(status));
396 printf(
"case_test: ICU Unicode version:\t\t%s\n", U_UNICODE_VERSION);
399 printf(
"case_test: ICU not available; skipping\n");
static void test_convert_case()
static void test_convert(TestFunc tfunc, const char *test_string, const char *expected)
int main(int argc, char **argv)
static size_t tfunc_lower(char *dst, size_t dstsize, const char *src, ssize_t srclen)
static size_t initcap_wbnext(void *state)
static size_t tfunc_title(char *dst, size_t dstsize, const char *src, ssize_t srclen)
static size_t tfunc_upper(char *dst, size_t dstsize, const char *src, ssize_t srclen)
static size_t tfunc_fold(char *dst, size_t dstsize, const char *src, ssize_t srclen)
size_t(* TestFunc)(char *dst, size_t dstsize, const char *src, ssize_t srclen)
static pg_wchar utf8_to_unicode(const unsigned char *c)
Datum lower(PG_FUNCTION_ARGS)
Datum upper(PG_FUNCTION_ARGS)
static unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
static int unicode_utf8len(pg_wchar c)
pg_wchar unicode_uppercase_simple(pg_wchar code)
pg_wchar unicode_titlecase_simple(pg_wchar code)
size_t unicode_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full)
static enum CaseMapResult casemap(pg_wchar u1, CaseKind casekind, bool full, const char *src, size_t srclen, size_t srcoff, pg_wchar *simple, const pg_wchar **special)
size_t unicode_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full)
size_t unicode_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full, WordBoundaryNext wbnext, void *wbstate)
pg_wchar unicode_lowercase_simple(pg_wchar code)
size_t unicode_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen, bool full)
pg_wchar unicode_casefold_simple(pg_wchar code)
bool pg_u_isalnum(pg_wchar code, bool posix)
pg_unicode_category unicode_category(pg_wchar code)
#define PG_UNICODE_VERSION