Add support for automatically updating Unicode derived files
authorPeter Eisentraut <peter@eisentraut.org>
Thu, 9 Jan 2020 08:54:47 +0000 (09:54 +0100)
committerPeter Eisentraut <peter@eisentraut.org>
Thu, 9 Jan 2020 09:08:14 +0000 (10:08 +0100)
We currently have several sets of files generated from data provided
by Unicode.  These all have ad hoc rules and instructions for updating
when new Unicode versions appear, and it's not done consistently.

This patch centralizes and automates the process and makes it part of
the release checklist.  The Unicode and CLDR versions are specified in
Makefile.global.in.  There is a new make target "update-unicode" that
downloads all the relevant files and runs the generation script.

There is also a new script for generating the table of combining
characters for ucs_wcwidth().  That table is now in a separate include
file rather than hardcoded into the middle of other code.  This is
based on the script that was used for generating
d8594d123c155aeecd47fc2450f62f5100b2fbf0, but the script itself wasn't
committed at that time.

Reviewed-by: John Naylor <john.naylor@2ndquadrant.com>
Discussion: https://www.postgresql.org/message-id/flat/c8d05f42-443e-6c23-819b-05b31759a37c@2ndquadrant.com

13 files changed:
GNUmakefile.in
contrib/unaccent/.gitignore
contrib/unaccent/Makefile
contrib/unaccent/generate_unaccent_rules.py
src/Makefile.global.in
src/backend/utils/mb/Unicode/Makefile
src/backend/utils/mb/wchar.c
src/common/unicode/.gitignore
src/common/unicode/Makefile
src/common/unicode/README
src/common/unicode/generate-unicode_combining_table.pl [new file with mode: 0644]
src/include/common/unicode_combining_table.h [new file with mode: 0644]
src/tools/RELEASE_CHANGES

index 9dc373c79cc0894391fc6df49465ae53b415d769..ee636e3b5053b0aac0277b679b4bd456c6a938ef 100644 (file)
@@ -75,6 +75,10 @@ $(call recurse,installcheck-world,src/test src/pl src/interfaces/ecpg contrib sr
 GNUmakefile: GNUmakefile.in $(top_builddir)/config.status
    ./config.status $@
 
+update-unicode: | submake-generated-headers submake-libpgport
+   $(MAKE) -C src/common/unicode $@
+   $(MAKE) -C contrib/unaccent $@
+
 
 ##########################################################################
 
index 5dcb3ff9723501c3fe639bee1c1435e47a580a6f..bccda7317dc4e8e11f62e7a2383715647e2955e4 100644 (file)
@@ -2,3 +2,6 @@
 /log/
 /results/
 /tmp_check/
+
+# Downloaded files
+/Latin-ASCII.xml
index 92b7f9d78e7d8bb9d4d7564ed09edc53003bcdaf..9753bc6ad2912878c925d32e4e778877bac4e110 100644 (file)
@@ -26,3 +26,22 @@ top_builddir = ../..
 include $(top_builddir)/src/Makefile.global
 include $(top_srcdir)/contrib/contrib-global.mk
 endif
+
+update-unicode: unaccent.rules
+
+# Allow running this even without --with-python
+PYTHON ?= python
+
+unaccent.rules: generate_unaccent_rules.py ../../src/common/unicode/UnicodeData.txt Latin-ASCII.xml
+   $(PYTHON) $< --unicode-data-file $(word 2,$^) --latin-ascii-file $(word 3,$^) >$@
+
+# Only download it once; dependencies must match src/common/unicode/
+../../src/common/unicode/UnicodeData.txt: $(top_builddir)/src/Makefile.global
+   $(MAKE) -C $(@D) $(@F)
+
+# Dependency on Makefile.global is for CLDR_VERSION
+Latin-ASCII.xml: $(top_builddir)/src/Makefile.global
+   $(DOWNLOAD) https://raw.githubusercontent.com/unicode-org/cldr/release-$(subst .,-,$(CLDR_VERSION))/common/transforms/Latin-ASCII.xml
+
+distclean:
+   rm -f Latin-ASCII.xml
index acfb4f0b6868d280dc35e8062ef53c204d6534b7..a952de510c686de24bd5f82d6931c32a345b5afb 100644 (file)
 # option is enabled, the XML file of this transliterator [2] -- given as a
 # command line argument -- will be parsed and used.
 #
-# Ideally you should use the latest release for each data set.  For
-# Latin-ASCII.xml, the latest data sets released can be browsed directly
-# via [3].  Note that this script is compatible with at least release 29.
+# Ideally you should use the latest release for each data set.  This
+# script is compatible with at least CLDR release 29.
 #
-# [1] https://www.unicode.org/Public/8.0.0/ucd/UnicodeData.txt
-# [2] https://raw.githubusercontent.com/unicode-org/cldr/release-34/common/transforms/Latin-ASCII.xml
-# [3] https://github.com/unicode-org/cldr/tags
+# [1] https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/UnicodeData.txt
+# [2] https://raw.githubusercontent.com/unicode-org/cldr/${TAG}/common/transforms/Latin-ASCII.xml
 
 # BEGIN: Python 2/3 compatibility - remove when Python 2 compatibility dropped
 # The approach is to be Python3 compatible with Python2 "backports".
index 5002c4776410444c345e9c6e70f70686b46119ab..9b28b8af431e4d353fd22277e0d0d4b89a8801f0 100644 (file)
@@ -23,7 +23,7 @@ standard_targets = all install installdirs uninstall distprep clean distclean ma
 # these targets should recurse even into subdirectories not being built:
 standard_always_targets = distprep clean distclean maintainer-clean
 
-.PHONY: $(standard_targets) install-strip html man installcheck-parallel
+.PHONY: $(standard_targets) install-strip html man installcheck-parallel update-unicode
 
 # make `all' the default target
 all:
@@ -352,6 +352,22 @@ XGETTEXT = @XGETTEXT@
 GZIP   = gzip
 BZIP2  = bzip2
 
+DOWNLOAD = wget -O $@ --no-use-server-timestamps
+#DOWNLOAD = curl -o $@
+
+
+# Unicode data information
+
+# Before each major release, update these and run make update-unicode.
+
+# Pick a release from here: <https://www.unicode.org/Public/>.  Note
+# that the most recent release listed there is often a pre-release;
+# don't pick that one, except for testing.
+UNICODE_VERSION = 12.1.0
+
+# Pick a release from here: <http://cldr.unicode.org/index/downloads>
+CLDR_VERSION = 34
+
 
 # Tree-wide build support
 
index b43f294fd6893ea1fa98bc129b39473e4433d73d..9084f03009171079bad7c6d5d8433a27df1dd2a3 100644 (file)
@@ -115,9 +115,6 @@ maintainer-clean: distclean
    rm -f $(MAPS)
 
 
-DOWNLOAD = wget -O $@ --no-use-server-timestamps
-#DOWNLOAD = curl -o $@
-
 BIG5.TXT CNS11643.TXT:
    $(DOWNLOAD) https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/OTHER/$(@F)
 
index b2d598cbee88fe5db5dece1806f52257112578a6..02e2588ffe12d6dd3f89c0fcc395735d15653d8d 100644 (file)
@@ -643,73 +643,7 @@ mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
 static int
 ucs_wcwidth(pg_wchar ucs)
 {
-   /* sorted list of non-overlapping intervals of non-spacing characters */
-   static const struct mbinterval combining[] = {
-       {0x0300, 0x036F}, {0x0483, 0x0489}, {0x0591, 0x05BD},
-       {0x05BF, 0x05BF}, {0x05C1, 0x05C2}, {0x05C4, 0x05C5},
-       {0x05C7, 0x05C7}, {0x0610, 0x061A}, {0x064B, 0x065F},
-       {0x0670, 0x0670}, {0x06D6, 0x06DC}, {0x06DF, 0x06E4},
-       {0x06E7, 0x06E8}, {0x06EA, 0x06ED}, {0x0711, 0x0711},
-       {0x0730, 0x074A}, {0x07A6, 0x07B0}, {0x07EB, 0x07F3},
-       {0x07FD, 0x07FD}, {0x0816, 0x0819}, {0x081B, 0x0823},
-       {0x0825, 0x0827}, {0x0829, 0x082D}, {0x0859, 0x085B},
-       {0x08D3, 0x08E1}, {0x08E3, 0x0902}, {0x093A, 0x093A},
-       {0x093C, 0x093C}, {0x0941, 0x0948}, {0x094D, 0x094D},
-       {0x0951, 0x0957}, {0x0962, 0x0963}, {0x0981, 0x0981},
-       {0x09BC, 0x09BC}, {0x09C1, 0x09C4}, {0x09CD, 0x09CD},
-       {0x09E2, 0x09E3}, {0x09FE, 0x0A02}, {0x0A3C, 0x0A3C},
-       {0x0A41, 0x0A51}, {0x0A70, 0x0A71}, {0x0A75, 0x0A75},
-       {0x0A81, 0x0A82}, {0x0ABC, 0x0ABC}, {0x0AC1, 0x0AC8},
-       {0x0ACD, 0x0ACD}, {0x0AE2, 0x0AE3}, {0x0AFA, 0x0B01},
-       {0x0B3C, 0x0B3C}, {0x0B3F, 0x0B3F}, {0x0B41, 0x0B44},
-       {0x0B4D, 0x0B56}, {0x0B62, 0x0B63}, {0x0B82, 0x0B82},
-       {0x0BC0, 0x0BC0}, {0x0BCD, 0x0BCD}, {0x0C00, 0x0C00},
-       {0x0C04, 0x0C04}, {0x0C3E, 0x0C40}, {0x0C46, 0x0C56},
-       {0x0C62, 0x0C63}, {0x0C81, 0x0C81}, {0x0CBC, 0x0CBC},
-       {0x0CBF, 0x0CBF}, {0x0CC6, 0x0CC6}, {0x0CCC, 0x0CCD},
-       {0x0CE2, 0x0CE3}, {0x0D00, 0x0D01}, {0x0D3B, 0x0D3C},
-       {0x0D41, 0x0D44}, {0x0D4D, 0x0D4D}, {0x0D62, 0x0D63},
-       {0x0DCA, 0x0DCA}, {0x0DD2, 0x0DD6}, {0x0E31, 0x0E31},
-       {0x0E34, 0x0E3A}, {0x0E47, 0x0E4E}, {0x0EB1, 0x0EB1},
-       {0x0EB4, 0x0EBC}, {0x0EC8, 0x0ECD}, {0x0F18, 0x0F19},
-       {0x0F35, 0x0F35}, {0x0F37, 0x0F37}, {0x0F39, 0x0F39},
-       {0x0F71, 0x0F7E}, {0x0F80, 0x0F84}, {0x0F86, 0x0F87},
-       {0x0F8D, 0x0FBC}, {0x0FC6, 0x0FC6}, {0x102D, 0x1030},
-       {0x1032, 0x1037}, {0x1039, 0x103A}, {0x103D, 0x103E},
-       {0x1058, 0x1059}, {0x105E, 0x1060}, {0x1071, 0x1074},
-       {0x1082, 0x1082}, {0x1085, 0x1086}, {0x108D, 0x108D},
-       {0x109D, 0x109D}, {0x135D, 0x135F}, {0x1712, 0x1714},
-       {0x1732, 0x1734}, {0x1752, 0x1753}, {0x1772, 0x1773},
-       {0x17B4, 0x17B5}, {0x17B7, 0x17BD}, {0x17C6, 0x17C6},
-       {0x17C9, 0x17D3}, {0x17DD, 0x17DD}, {0x180B, 0x180D},
-       {0x1885, 0x1886}, {0x18A9, 0x18A9}, {0x1920, 0x1922},
-       {0x1927, 0x1928}, {0x1932, 0x1932}, {0x1939, 0x193B},
-       {0x1A17, 0x1A18}, {0x1A1B, 0x1A1B}, {0x1A56, 0x1A56},
-       {0x1A58, 0x1A60}, {0x1A62, 0x1A62}, {0x1A65, 0x1A6C},
-       {0x1A73, 0x1A7F}, {0x1AB0, 0x1B03}, {0x1B34, 0x1B34},
-       {0x1B36, 0x1B3A}, {0x1B3C, 0x1B3C}, {0x1B42, 0x1B42},
-       {0x1B6B, 0x1B73}, {0x1B80, 0x1B81}, {0x1BA2, 0x1BA5},
-       {0x1BA8, 0x1BA9}, {0x1BAB, 0x1BAD}, {0x1BE6, 0x1BE6},
-       {0x1BE8, 0x1BE9}, {0x1BED, 0x1BED}, {0x1BEF, 0x1BF1},
-       {0x1C2C, 0x1C33}, {0x1C36, 0x1C37}, {0x1CD0, 0x1CD2},
-       {0x1CD4, 0x1CE0}, {0x1CE2, 0x1CE8}, {0x1CED, 0x1CED},
-       {0x1CF4, 0x1CF4}, {0x1CF8, 0x1CF9}, {0x1DC0, 0x1DFF},
-       {0x20D0, 0x20F0}, {0x2CEF, 0x2CF1}, {0x2D7F, 0x2D7F},
-       {0x2DE0, 0x2DFF}, {0x302A, 0x302D}, {0x3099, 0x309A},
-       {0xA66F, 0xA672}, {0xA674, 0xA67D}, {0xA69E, 0xA69F},
-       {0xA6F0, 0xA6F1}, {0xA802, 0xA802}, {0xA806, 0xA806},
-       {0xA80B, 0xA80B}, {0xA825, 0xA826}, {0xA8C4, 0xA8C5},
-       {0xA8E0, 0xA8F1}, {0xA8FF, 0xA8FF}, {0xA926, 0xA92D},
-       {0xA947, 0xA951}, {0xA980, 0xA982}, {0xA9B3, 0xA9B3},
-       {0xA9B6, 0xA9B9}, {0xA9BC, 0xA9BD}, {0xA9E5, 0xA9E5},
-       {0xAA29, 0xAA2E}, {0xAA31, 0xAA32}, {0xAA35, 0xAA36},
-       {0xAA43, 0xAA43}, {0xAA4C, 0xAA4C}, {0xAA7C, 0xAA7C},
-       {0xAAB0, 0xAAB0}, {0xAAB2, 0xAAB4}, {0xAAB7, 0xAAB8},
-       {0xAABE, 0xAABF}, {0xAAC1, 0xAAC1}, {0xAAEC, 0xAAED},
-       {0xAAF6, 0xAAF6}, {0xABE5, 0xABE5}, {0xABE8, 0xABE8},
-       {0xABED, 0xABED}, {0xFB1E, 0xFB1E}, {0xFE00, 0xFE0F},
-       {0xFE20, 0xFE2F},
-   };
+#include "common/unicode_combining_table.h"
 
    /* test for 8-bit control characters */
    if (ucs == 0)
index 5e583e2cccf7058655773efa106e50a87580a071..b5a4d8427420930e335caee923cd261a031c9b39 100644 (file)
@@ -1,7 +1,7 @@
 /norm_test
 /norm_test_table.h
 
-# Files downloaded from the Unicode Character Database
+# Downloaded files
 /CompositionExclusions.txt
 /NormalizationTest.txt
 /UnicodeData.txt
index 334859c98480a295c6800e20e09b452f7ea2ac3e..ec78aeec2aa356f550b1a245f01642d7e926d341 100644 (file)
@@ -18,18 +18,24 @@ LIBS += $(PTHREAD_LIBS)
 # By default, do nothing.
 all:
 
-DOWNLOAD = wget -O $@ --no-use-server-timestamps
+update-unicode: unicode_norm_table.h unicode_combining_table.h
+   $(MAKE) normalization-check
+   mv unicode_norm_table.h unicode_combining_table.h ../../../src/include/common/
 
 # These files are part of the Unicode Character Database. Download
-# them on demand.
-UnicodeData.txt CompositionExclusions.txt NormalizationTest.txt:
-   $(DOWNLOAD) https://www.unicode.org/Public/UNIDATA/$(@F)
+# them on demand.  The dependency on Makefile.global is for
+# UNICODE_VERSION.
+UnicodeData.txt CompositionExclusions.txt NormalizationTest.txt: $(top_builddir)/src/Makefile.global
+   $(DOWNLOAD) https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/$(@F)
 
 # Generation of conversion tables used for string normalization with
 # UTF-8 strings.
 unicode_norm_table.h: generate-unicode_norm_table.pl UnicodeData.txt CompositionExclusions.txt
    $(PERL) generate-unicode_norm_table.pl
 
+unicode_combining_table.h: generate-unicode_combining_table.pl UnicodeData.txt
+   $(PERL) $^ >$@
+
 # Test suite
 normalization-check: norm_test
    ./norm_test
index 5aa79044d36ea4a8d652179b4e46d67c44b836b3..56956f6a65fc18d803891c7c87f14f335bccd5ab 100644 (file)
@@ -8,20 +8,11 @@ of Unicode.
 Generating unicode_norm_table.h
 -------------------------------
 
-1. Download the Unicode data file, UnicodeData.txt, from the Unicode
-consortium and place it to the current directory. Run the perl script
-"generate-unicode_norm_table.pl", to process it, and to generate the
-"unicode_norm_table.h" file. The Makefile contains a rule to download the
-data files if they don't exist.
-
-    make unicode_norm_table.h
-
-2. Inspect the resulting header file. Once you're happy with it, copy it to
-the right location.
-
-    cp unicode_norm_table.h ../../../src/include/common/
+Run
 
+    make update-unicode
 
+from the top level of the source tree and commit the result.
 
 Tests
 -----
@@ -33,3 +24,5 @@ normalization code with all the test strings in NormalizationTest.txt.
 To download NormalizationTest.txt and run the tests:
 
     make normalization-check
+
+This is also run as part of the update-unicode target.
diff --git a/src/common/unicode/generate-unicode_combining_table.pl b/src/common/unicode/generate-unicode_combining_table.pl
new file mode 100644 (file)
index 0000000..e468a5f
--- /dev/null
@@ -0,0 +1,52 @@
+#!/usr/bin/perl
+#
+# Generate sorted list of non-overlapping intervals of non-spacing
+# characters, using Unicode data files as input.  Pass UnicodeData.txt
+# as argument.  The output is on stdout.
+#
+# Copyright (c) 2019, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+
+my $range_start = undef;
+my $codepoint;
+my $prev_codepoint;
+my $count = 0;
+
+print "/* generated by src/common/unicode/generate-unicode_combining_table.pl, do not edit */\n\n";
+
+print "static const struct mbinterval combining[] = {\n";
+
+foreach my $line (<ARGV>)
+{
+    chomp $line;
+    my @fields = split ';', $line;
+    $codepoint = hex $fields[0];
+
+    next if $codepoint > 0xFFFF;
+
+    if ($fields[2] eq 'Me' || $fields[2] eq 'Mn')
+    {
+        # combining character, save for start of range
+        if (!defined($range_start))
+        {
+            $range_start = $codepoint;
+        }
+    }
+    else
+    {
+        # not a combining character, print out previous range if any
+        if (defined($range_start))
+        {
+            printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_codepoint;
+            $range_start = undef;
+        }
+    }
+}
+continue
+{
+    $prev_codepoint = $codepoint;
+}
+
+print "};\n";
diff --git a/src/include/common/unicode_combining_table.h b/src/include/common/unicode_combining_table.h
new file mode 100644 (file)
index 0000000..b4a8588
--- /dev/null
@@ -0,0 +1,194 @@
+/* generated by src/common/unicode/generate-unicode_combining_table.pl, do not edit */
+
+static const struct mbinterval combining[] = {
+   {0x0300, 0x036F},
+   {0x0483, 0x0489},
+   {0x0591, 0x05BD},
+   {0x05BF, 0x05BF},
+   {0x05C1, 0x05C2},
+   {0x05C4, 0x05C5},
+   {0x05C7, 0x05C7},
+   {0x0610, 0x061A},
+   {0x064B, 0x065F},
+   {0x0670, 0x0670},
+   {0x06D6, 0x06DC},
+   {0x06DF, 0x06E4},
+   {0x06E7, 0x06E8},
+   {0x06EA, 0x06ED},
+   {0x0711, 0x0711},
+   {0x0730, 0x074A},
+   {0x07A6, 0x07B0},
+   {0x07EB, 0x07F3},
+   {0x07FD, 0x07FD},
+   {0x0816, 0x0819},
+   {0x081B, 0x0823},
+   {0x0825, 0x0827},
+   {0x0829, 0x082D},
+   {0x0859, 0x085B},
+   {0x08D3, 0x08E1},
+   {0x08E3, 0x0902},
+   {0x093A, 0x093A},
+   {0x093C, 0x093C},
+   {0x0941, 0x0948},
+   {0x094D, 0x094D},
+   {0x0951, 0x0957},
+   {0x0962, 0x0963},
+   {0x0981, 0x0981},
+   {0x09BC, 0x09BC},
+   {0x09C1, 0x09C4},
+   {0x09CD, 0x09CD},
+   {0x09E2, 0x09E3},
+   {0x09FE, 0x0A02},
+   {0x0A3C, 0x0A3C},
+   {0x0A41, 0x0A51},
+   {0x0A70, 0x0A71},
+   {0x0A75, 0x0A75},
+   {0x0A81, 0x0A82},
+   {0x0ABC, 0x0ABC},
+   {0x0AC1, 0x0AC8},
+   {0x0ACD, 0x0ACD},
+   {0x0AE2, 0x0AE3},
+   {0x0AFA, 0x0B01},
+   {0x0B3C, 0x0B3C},
+   {0x0B3F, 0x0B3F},
+   {0x0B41, 0x0B44},
+   {0x0B4D, 0x0B56},
+   {0x0B62, 0x0B63},
+   {0x0B82, 0x0B82},
+   {0x0BC0, 0x0BC0},
+   {0x0BCD, 0x0BCD},
+   {0x0C00, 0x0C00},
+   {0x0C04, 0x0C04},
+   {0x0C3E, 0x0C40},
+   {0x0C46, 0x0C56},
+   {0x0C62, 0x0C63},
+   {0x0C81, 0x0C81},
+   {0x0CBC, 0x0CBC},
+   {0x0CBF, 0x0CBF},
+   {0x0CC6, 0x0CC6},
+   {0x0CCC, 0x0CCD},
+   {0x0CE2, 0x0CE3},
+   {0x0D00, 0x0D01},
+   {0x0D3B, 0x0D3C},
+   {0x0D41, 0x0D44},
+   {0x0D4D, 0x0D4D},
+   {0x0D62, 0x0D63},
+   {0x0DCA, 0x0DCA},
+   {0x0DD2, 0x0DD6},
+   {0x0E31, 0x0E31},
+   {0x0E34, 0x0E3A},
+   {0x0E47, 0x0E4E},
+   {0x0EB1, 0x0EB1},
+   {0x0EB4, 0x0EBC},
+   {0x0EC8, 0x0ECD},
+   {0x0F18, 0x0F19},
+   {0x0F35, 0x0F35},
+   {0x0F37, 0x0F37},
+   {0x0F39, 0x0F39},
+   {0x0F71, 0x0F7E},
+   {0x0F80, 0x0F84},
+   {0x0F86, 0x0F87},
+   {0x0F8D, 0x0FBC},
+   {0x0FC6, 0x0FC6},
+   {0x102D, 0x1030},
+   {0x1032, 0x1037},
+   {0x1039, 0x103A},
+   {0x103D, 0x103E},
+   {0x1058, 0x1059},
+   {0x105E, 0x1060},
+   {0x1071, 0x1074},
+   {0x1082, 0x1082},
+   {0x1085, 0x1086},
+   {0x108D, 0x108D},
+   {0x109D, 0x109D},
+   {0x135D, 0x135F},
+   {0x1712, 0x1714},
+   {0x1732, 0x1734},
+   {0x1752, 0x1753},
+   {0x1772, 0x1773},
+   {0x17B4, 0x17B5},
+   {0x17B7, 0x17BD},
+   {0x17C6, 0x17C6},
+   {0x17C9, 0x17D3},
+   {0x17DD, 0x17DD},
+   {0x180B, 0x180D},
+   {0x1885, 0x1886},
+   {0x18A9, 0x18A9},
+   {0x1920, 0x1922},
+   {0x1927, 0x1928},
+   {0x1932, 0x1932},
+   {0x1939, 0x193B},
+   {0x1A17, 0x1A18},
+   {0x1A1B, 0x1A1B},
+   {0x1A56, 0x1A56},
+   {0x1A58, 0x1A60},
+   {0x1A62, 0x1A62},
+   {0x1A65, 0x1A6C},
+   {0x1A73, 0x1A7F},
+   {0x1AB0, 0x1B03},
+   {0x1B34, 0x1B34},
+   {0x1B36, 0x1B3A},
+   {0x1B3C, 0x1B3C},
+   {0x1B42, 0x1B42},
+   {0x1B6B, 0x1B73},
+   {0x1B80, 0x1B81},
+   {0x1BA2, 0x1BA5},
+   {0x1BA8, 0x1BA9},
+   {0x1BAB, 0x1BAD},
+   {0x1BE6, 0x1BE6},
+   {0x1BE8, 0x1BE9},
+   {0x1BED, 0x1BED},
+   {0x1BEF, 0x1BF1},
+   {0x1C2C, 0x1C33},
+   {0x1C36, 0x1C37},
+   {0x1CD0, 0x1CD2},
+   {0x1CD4, 0x1CE0},
+   {0x1CE2, 0x1CE8},
+   {0x1CED, 0x1CED},
+   {0x1CF4, 0x1CF4},
+   {0x1CF8, 0x1CF9},
+   {0x1DC0, 0x1DFF},
+   {0x20D0, 0x20F0},
+   {0x2CEF, 0x2CF1},
+   {0x2D7F, 0x2D7F},
+   {0x2DE0, 0x2DFF},
+   {0x302A, 0x302D},
+   {0x3099, 0x309A},
+   {0xA66F, 0xA672},
+   {0xA674, 0xA67D},
+   {0xA69E, 0xA69F},
+   {0xA6F0, 0xA6F1},
+   {0xA802, 0xA802},
+   {0xA806, 0xA806},
+   {0xA80B, 0xA80B},
+   {0xA825, 0xA826},
+   {0xA8C4, 0xA8C5},
+   {0xA8E0, 0xA8F1},
+   {0xA8FF, 0xA8FF},
+   {0xA926, 0xA92D},
+   {0xA947, 0xA951},
+   {0xA980, 0xA982},
+   {0xA9B3, 0xA9B3},
+   {0xA9B6, 0xA9B9},
+   {0xA9BC, 0xA9BD},
+   {0xA9E5, 0xA9E5},
+   {0xAA29, 0xAA2E},
+   {0xAA31, 0xAA32},
+   {0xAA35, 0xAA36},
+   {0xAA43, 0xAA43},
+   {0xAA4C, 0xAA4C},
+   {0xAA7C, 0xAA7C},
+   {0xAAB0, 0xAAB0},
+   {0xAAB2, 0xAAB4},
+   {0xAAB7, 0xAAB8},
+   {0xAABE, 0xAABF},
+   {0xAAC1, 0xAAC1},
+   {0xAAEC, 0xAAED},
+   {0xAAF6, 0xAAF6},
+   {0xABE5, 0xABE5},
+   {0xABE8, 0xABE8},
+   {0xABED, 0xABED},
+   {0xFB1E, 0xFB1E},
+   {0xFE00, 0xFE0F},
+   {0xFE20, 0xFE2F},
+};
index 46139877ed423430df3e0da56ee886e54aed59f0..a7bff76b768ab505a5f4bb03fe6be2b33306715b 100644 (file)
@@ -77,6 +77,9 @@ but there may be reasons to do them at other times as well.
 
 * Update inet/cidr data types with newest Bind patches
 
+* Update Unicode data: Edit UNICODE_VERSION and CLDR_VERSION in
+  src/Makefile.global.in, run make update-unicode, and commit.
+
 
 Starting a New Development Cycle
 ================================