From 5bc429aacb3722e55638a776332eebfa88dd60e5 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Thu, 26 Aug 2021 13:07:34 -0400 Subject: [PATCH] Extend collection of Unicode combining characters to beyond the BMP The former limit was perhaps a carryover from an older hand-coded table. Since commit bab982161 we have enough space in mbinterval to store larger codepoints, so collect all combining characters. Discussion: https://www.postgresql.org/message-id/49ad1fa0-174e-c901-b14c-c484b60907f1%40enterprisedb.com --- .../generate-unicode_combining_table.pl | 2 - src/include/common/unicode_combining_table.h | 102 ++++++++++++++++++ 2 files changed, 102 insertions(+), 2 deletions(-) diff --git a/src/common/unicode/generate-unicode_combining_table.pl b/src/common/unicode/generate-unicode_combining_table.pl index 86aed78907..093a802f5f 100644 --- a/src/common/unicode/generate-unicode_combining_table.pl +++ b/src/common/unicode/generate-unicode_combining_table.pl @@ -25,8 +25,6 @@ foreach my $line () my @fields = split ';', $line; $codepoint = hex $fields[0]; - next if $codepoint > 0xFFFF; - if ($fields[2] eq 'Me' || $fields[2] eq 'Mn') { # combining character, save for start of range diff --git a/src/include/common/unicode_combining_table.h b/src/include/common/unicode_combining_table.h index a9f10c31bc..4dcaf45c3c 100644 --- a/src/include/common/unicode_combining_table.h +++ b/src/include/common/unicode_combining_table.h @@ -193,4 +193,106 @@ static const struct mbinterval combining[] = { {0xFB1E, 0xFB1E}, {0xFE00, 0xFE0F}, {0xFE20, 0xFE2F}, + {0x101FD, 0x101FD}, + {0x102E0, 0x102E0}, + {0x10376, 0x1037A}, + {0x10A01, 0x10A0F}, + {0x10A38, 0x10A3F}, + {0x10AE5, 0x10AE6}, + {0x10D24, 0x10D27}, + {0x10EAB, 0x10EAC}, + {0x10F46, 0x10F50}, + {0x11001, 0x11001}, + {0x11038, 0x11046}, + {0x1107F, 0x11081}, + {0x110B3, 0x110B6}, + {0x110B9, 0x110BA}, + {0x11100, 0x11102}, + {0x11127, 0x1112B}, + {0x1112D, 0x11134}, + {0x11173, 0x11173}, + {0x11180, 0x11181}, + {0x111B6, 0x111BE}, + {0x111C9, 0x111CC}, + {0x111CF, 0x111CF}, + {0x1122F, 0x11231}, + {0x11234, 0x11234}, + {0x11236, 0x11237}, + {0x1123E, 0x1123E}, + {0x112DF, 0x112DF}, + {0x112E3, 0x112EA}, + {0x11300, 0x11301}, + {0x1133B, 0x1133C}, + {0x11340, 0x11340}, + {0x11366, 0x11374}, + {0x11438, 0x1143F}, + {0x11442, 0x11444}, + {0x11446, 0x11446}, + {0x1145E, 0x1145E}, + {0x114B3, 0x114B8}, + {0x114BA, 0x114BA}, + {0x114BF, 0x114C0}, + {0x114C2, 0x114C3}, + {0x115B2, 0x115B5}, + {0x115BC, 0x115BD}, + {0x115BF, 0x115C0}, + {0x115DC, 0x115DD}, + {0x11633, 0x1163A}, + {0x1163D, 0x1163D}, + {0x1163F, 0x11640}, + {0x116AB, 0x116AB}, + {0x116AD, 0x116AD}, + {0x116B0, 0x116B5}, + {0x116B7, 0x116B7}, + {0x1171D, 0x1171F}, + {0x11722, 0x11725}, + {0x11727, 0x1172B}, + {0x1182F, 0x11837}, + {0x11839, 0x1183A}, + {0x1193B, 0x1193C}, + {0x1193E, 0x1193E}, + {0x11943, 0x11943}, + {0x119D4, 0x119DB}, + {0x119E0, 0x119E0}, + {0x11A01, 0x11A0A}, + {0x11A33, 0x11A38}, + {0x11A3B, 0x11A3E}, + {0x11A47, 0x11A47}, + {0x11A51, 0x11A56}, + {0x11A59, 0x11A5B}, + {0x11A8A, 0x11A96}, + {0x11A98, 0x11A99}, + {0x11C30, 0x11C3D}, + {0x11C3F, 0x11C3F}, + {0x11C92, 0x11CA7}, + {0x11CAA, 0x11CB0}, + {0x11CB2, 0x11CB3}, + {0x11CB5, 0x11CB6}, + {0x11D31, 0x11D45}, + {0x11D47, 0x11D47}, + {0x11D90, 0x11D91}, + {0x11D95, 0x11D95}, + {0x11D97, 0x11D97}, + {0x11EF3, 0x11EF4}, + {0x16AF0, 0x16AF4}, + {0x16B30, 0x16B36}, + {0x16F4F, 0x16F4F}, + {0x16F8F, 0x16F92}, + {0x16FE4, 0x16FE4}, + {0x1BC9D, 0x1BC9E}, + {0x1D167, 0x1D169}, + {0x1D17B, 0x1D182}, + {0x1D185, 0x1D18B}, + {0x1D1AA, 0x1D1AD}, + {0x1D242, 0x1D244}, + {0x1DA00, 0x1DA36}, + {0x1DA3B, 0x1DA6C}, + {0x1DA75, 0x1DA75}, + {0x1DA84, 0x1DA84}, + {0x1DA9B, 0x1E02A}, + {0x1E130, 0x1E136}, + {0x1E2EC, 0x1E2EF}, + {0x1E8D0, 0x1E8D6}, + {0x1E944, 0x1E94A}, + {0xE0100, 0xE01EF}, }; -- 2.39.5