diff options
author | Jeff Davis | 2024-03-14 06:33:44 +0000 |
---|---|---|
committer | Jeff Davis | 2024-03-14 06:33:44 +0000 |
commit | 2d819a08a1cbc11364e36f816b02e33e8dcc030b (patch) | |
tree | 1a8d3b459866d7df936faffa0e64f5e339e6a6c2 /src/bin | |
parent | 6ab2e8385d55e0b73bb8bbc41d9c286f5f7f357f (diff) |
Introduce "builtin" collation provider.
New provider for collations, like "libc" or "icu", but without any
external dependency.
Initially, the only locale supported by the builtin provider is "C",
which is identical to the libc provider's "C" locale. The libc
provider's "C" locale has always been treated as a special case that
uses an internal implementation, without using libc at all -- so the
new builtin provider uses the same implementation.
The builtin provider's locale is independent of the server environment
variables LC_COLLATE and LC_CTYPE. Using the builtin provider, the
database collation locale can be "C" while LC_COLLATE and LC_CTYPE are
set to "en_US", which is impossible with the libc provider.
By offering a new builtin provider, it clarifies that the semantics of
a collation using this provider will never depend on libc, and makes
it easier to document the behavior.
Discussion: https://postgr.es/m/ab925f69-5f9d-f85e-b87c-bd2a44798659@joeconway.com
Discussion: https://postgr.es/m/dd9261f4-7a98-4565-93ec-336c1c110d90@manitou-mail.org
Discussion: https://postgr.es/m/ff4c2f2f9c8fc7ca27c1c24ae37ecaeaeaff6b53.camel%40j-davis.com
Reviewed-by: Daniel Vérité, Peter Eisentraut, Jeremy Schneider
Diffstat (limited to 'src/bin')
-rw-r--r-- | src/bin/initdb/initdb.c | 53 | ||||
-rw-r--r-- | src/bin/initdb/t/001_initdb.pl | 40 | ||||
-rw-r--r-- | src/bin/pg_dump/pg_dump.c | 23 | ||||
-rw-r--r-- | src/bin/pg_upgrade/t/002_pg_upgrade.pl | 81 | ||||
-rw-r--r-- | src/bin/psql/describe.c | 4 | ||||
-rw-r--r-- | src/bin/scripts/createdb.c | 19 | ||||
-rw-r--r-- | src/bin/scripts/t/020_createdb.pl | 60 |
7 files changed, 233 insertions, 47 deletions
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index de58002a5d4..8d53ef4a1fc 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -145,7 +145,9 @@ static char *lc_numeric = NULL; static char *lc_time = NULL; static char *lc_messages = NULL; static char locale_provider = COLLPROVIDER_LIBC; +static bool builtin_locale_specified = false; static char *datlocale = NULL; +static bool icu_locale_specified = false; static char *icu_rules = NULL; static const char *default_text_search_config = NULL; static char *username = NULL; @@ -2368,7 +2370,7 @@ setlocales(void) lc_monetary = locale; if (!lc_messages) lc_messages = locale; - if (!datlocale && locale_provider == COLLPROVIDER_ICU) + if (!datlocale && locale_provider != COLLPROVIDER_LIBC) datlocale = locale; } @@ -2395,14 +2397,20 @@ setlocales(void) lc_messages = canonname; #endif - if (locale_provider == COLLPROVIDER_ICU) + if (locale_provider != COLLPROVIDER_LIBC && datlocale == NULL) + pg_fatal("locale must be specified if provider is %s", + collprovider_name(locale_provider)); + + if (locale_provider == COLLPROVIDER_BUILTIN) + { + if (strcmp(datlocale, "C") != 0) + pg_fatal("invalid locale name \"%s\" for builtin provider", + datlocale); + } + else if (locale_provider == COLLPROVIDER_ICU) { char *langtag; - /* acquire default locale from the environment, if not specified */ - if (datlocale == NULL) - pg_fatal("ICU locale must be specified"); - /* canonicalize to a language tag */ langtag = icu_language_tag(datlocale); printf(_("Using language tag \"%s\" for ICU locale \"%s\".\n"), @@ -2447,7 +2455,8 @@ usage(const char *progname) " set default locale in the respective category for\n" " new databases (default taken from environment)\n")); printf(_(" --no-locale equivalent to --locale=C\n")); - printf(_(" --locale-provider={libc|icu}\n" + printf(_(" --builtin-locale=LOCALE set builtin locale name for new databases\n")); + printf(_(" --locale-provider={builtin|libc|icu}\n" " set default locale provider for new databases\n")); printf(_(" --pwfile=FILE read password for the new superuser from file\n")); printf(_(" -T, --text-search-config=CFG\n" @@ -2609,9 +2618,9 @@ setup_locale_encoding(void) else { printf(_("The database cluster will be initialized with this locale configuration:\n")); - printf(_(" provider: %s\n"), collprovider_name(locale_provider)); - if (datlocale) - printf(_(" ICU locale: %s\n"), datlocale); + printf(_(" default collation provider: %s\n"), collprovider_name(locale_provider)); + if (locale_provider != COLLPROVIDER_LIBC) + printf(_(" default collation locale: %s\n"), datlocale); printf(_(" LC_COLLATE: %s\n" " LC_CTYPE: %s\n" " LC_MESSAGES: %s\n" @@ -3104,9 +3113,10 @@ main(int argc, char *argv[]) {"allow-group-access", no_argument, NULL, 'g'}, {"discard-caches", no_argument, NULL, 14}, {"locale-provider", required_argument, NULL, 15}, - {"icu-locale", required_argument, NULL, 16}, - {"icu-rules", required_argument, NULL, 17}, - {"sync-method", required_argument, NULL, 18}, + {"builtin-locale", required_argument, NULL, 16}, + {"icu-locale", required_argument, NULL, 17}, + {"icu-rules", required_argument, NULL, 18}, + {"sync-method", required_argument, NULL, 19}, {NULL, 0, NULL, 0} }; @@ -3274,7 +3284,9 @@ main(int argc, char *argv[]) "-c debug_discard_caches=1"); break; case 15: - if (strcmp(optarg, "icu") == 0) + if (strcmp(optarg, "builtin") == 0) + locale_provider = COLLPROVIDER_BUILTIN; + else if (strcmp(optarg, "icu") == 0) locale_provider = COLLPROVIDER_ICU; else if (strcmp(optarg, "libc") == 0) locale_provider = COLLPROVIDER_LIBC; @@ -3283,11 +3295,16 @@ main(int argc, char *argv[]) break; case 16: datlocale = pg_strdup(optarg); + builtin_locale_specified = true; break; case 17: - icu_rules = pg_strdup(optarg); + datlocale = pg_strdup(optarg); + icu_locale_specified = true; break; case 18: + icu_rules = pg_strdup(optarg); + break; + case 19: if (!parse_sync_method(optarg, &sync_method)) exit(1); break; @@ -3317,7 +3334,11 @@ main(int argc, char *argv[]) exit(1); } - if (datlocale && locale_provider != COLLPROVIDER_ICU) + if (builtin_locale_specified && locale_provider != COLLPROVIDER_BUILTIN) + pg_fatal("%s cannot be specified unless locale provider \"%s\" is chosen", + "--builtin-locale", "builtin"); + + if (icu_locale_specified && locale_provider != COLLPROVIDER_ICU) pg_fatal("%s cannot be specified unless locale provider \"%s\" is chosen", "--icu-locale", "icu"); diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl index 594b20cc743..e719f70dae2 100644 --- a/src/bin/initdb/t/001_initdb.pl +++ b/src/bin/initdb/t/001_initdb.pl @@ -117,7 +117,7 @@ if ($ENV{with_icu} eq 'yes') { command_fails_like( [ 'initdb', '--no-sync', '--locale-provider=icu', "$tempdir/data2" ], - qr/initdb: error: ICU locale must be specified/, + qr/initdb: error: locale must be specified if provider is icu/, 'locale provider ICU requires --icu-locale'); command_ok( @@ -138,7 +138,7 @@ if ($ENV{with_icu} eq 'yes') '--lc-monetary=C', '--lc-time=C', "$tempdir/data4" ], - qr/^\s+ICU locale:\s+und\n/ms, + qr/^\s+default collation locale:\s+und\n/ms, 'options --locale-provider=icu --locale=und --lc-*=C'); command_fails_like( @@ -185,6 +185,42 @@ else } command_fails( + [ 'initdb', '--no-sync', '--locale-provider=builtin', "$tempdir/data6" ], + 'locale provider builtin fails without --locale'); + +command_ok( + [ + 'initdb', '--no-sync', + '--locale-provider=builtin', '--locale=C', + "$tempdir/data7" + ], + 'locale provider builtin with --locale'); + +command_ok( + [ + 'initdb', '--no-sync', + '--locale-provider=builtin', '--lc-ctype=C', + '--locale=C', "$tempdir/data10" + ], + 'locale provider builtin with --lc-ctype'); + +command_fails( + [ + 'initdb', '--no-sync', + '--locale-provider=builtin', '--icu-locale=en', + "$tempdir/dataX" + ], + 'fails for locale provider builtin with ICU locale'); + +command_fails( + [ + 'initdb', '--no-sync', + '--locale-provider=builtin', '--icu-rules=""', + "$tempdir/dataX" + ], + 'fails for locale provider builtin with ICU rules'); + +command_fails( [ 'initdb', '--no-sync', '--locale-provider=xyz', "$tempdir/dataX" ], 'fails for invalid locale provider'); diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 23e6217b73f..171e5916965 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -3114,7 +3114,9 @@ dumpDatabase(Archive *fout) } appendPQExpBufferStr(creaQry, " LOCALE_PROVIDER = "); - if (datlocprovider[0] == 'c') + if (datlocprovider[0] == 'b') + appendPQExpBufferStr(creaQry, "builtin"); + else if (datlocprovider[0] == 'c') appendPQExpBufferStr(creaQry, "libc"); else if (datlocprovider[0] == 'i') appendPQExpBufferStr(creaQry, "icu"); @@ -3142,7 +3144,11 @@ dumpDatabase(Archive *fout) } if (locale) { - appendPQExpBufferStr(creaQry, " ICU_LOCALE = "); + if (datlocprovider[0] == 'b') + appendPQExpBufferStr(creaQry, " BUILTIN_LOCALE = "); + else + appendPQExpBufferStr(creaQry, " ICU_LOCALE = "); + appendStringLiteralAH(creaQry, locale, fout); } @@ -13870,7 +13876,9 @@ dumpCollation(Archive *fout, const CollInfo *collinfo) fmtQualifiedDumpable(collinfo)); appendPQExpBufferStr(q, "provider = "); - if (collprovider[0] == 'c') + if (collprovider[0] == 'b') + appendPQExpBufferStr(q, "builtin"); + else if (collprovider[0] == 'c') appendPQExpBufferStr(q, "libc"); else if (collprovider[0] == 'i') appendPQExpBufferStr(q, "icu"); @@ -13891,6 +13899,15 @@ dumpCollation(Archive *fout, const CollInfo *collinfo) /* no locale -- the default collation cannot be reloaded anyway */ } + else if (collprovider[0] == 'b') + { + if (collcollate || collctype || !colllocale || collicurules) + pg_log_warning("invalid collation \"%s\"", qcollname); + + appendPQExpBufferStr(q, ", locale = "); + appendStringLiteralAH(q, colllocale ? colllocale : "", + fout); + } else if (collprovider[0] == 'i') { if (fout->remoteVersion >= 150000) diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl index 34a459496e1..ed79c0930b0 100644 --- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl +++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl @@ -104,19 +104,13 @@ if ($oldnode->pg_version >= 11) push @custom_opts, '--allow-group-access'; } -# Set up the locale settings for the original cluster, so that we -# can test that pg_upgrade copies the locale settings of template0 -# from the old to the new cluster. +my $old_provider_field; +my $old_datlocale_field; -my $original_encoding = "6"; # UTF-8 -my $original_provider = "c"; -my $original_locale = "C"; -my $original_datlocale = ""; -my $provider_field = "'c' AS datlocprovider"; -my $old_datlocale_field = "NULL AS datlocale"; -if ($oldnode->pg_version >= 15 && $ENV{with_icu} eq 'yes') +# account for field additions and changes +if ($oldnode->pg_version >= 15) { - $provider_field = "datlocprovider"; + $old_provider_field = "datlocprovider"; if ($oldnode->pg_version >= '17devel') { $old_datlocale_field = "datlocale"; @@ -125,18 +119,65 @@ if ($oldnode->pg_version >= 15 && $ENV{with_icu} eq 'yes') { $old_datlocale_field = "daticulocale AS datlocale"; } +} +else +{ + $old_provider_field = "'c' AS datlocprovider"; + $old_datlocale_field = "NULL AS datlocale"; +} + +# Set up the locale settings for the original cluster, so that we +# can test that pg_upgrade copies the locale settings of template0 +# from the old to the new cluster. + +my $original_enc_name; +my $original_provider; +my $original_datcollate = "C"; +my $original_datctype = "C"; +my $original_datlocale; + +if ($oldnode->pg_version >= '17devel') +{ + $original_enc_name = "UTF-8"; + $original_provider = "b"; + $original_datlocale = "C"; +} +elsif ($oldnode->pg_version >= 15 && $ENV{with_icu} eq 'yes') +{ + $original_enc_name = "UTF-8"; $original_provider = "i"; $original_datlocale = "fr-CA"; } +else +{ + $original_enc_name = "SQL_ASCII"; + $original_provider = "c"; + $original_datlocale = ""; +} + +my %encodings = ('UTF-8' => 6, 'SQL_ASCII' => 0); +my $original_encoding = $encodings{$original_enc_name}; my @initdb_params = @custom_opts; -push @initdb_params, ('--encoding', 'UTF-8'); -push @initdb_params, ('--locale', $original_locale); -if ($original_provider eq "i") +push @initdb_params, ('--encoding', $original_enc_name); +push @initdb_params, ('--lc-collate', $original_datcollate); +push @initdb_params, ('--lc-ctype', $original_datctype); + +# add --locale-provider, if supported +my %provider_name = ('b' => 'builtin', 'i' => 'icu', 'c' => 'libc'); +if ($oldnode->pg_version >= 15) { - push @initdb_params, ('--locale-provider', 'icu'); - push @initdb_params, ('--icu-locale', 'fr-CA'); + push @initdb_params, + ('--locale-provider', $provider_name{$original_provider}); + if ($original_provider eq 'b') + { + push @initdb_params, ('--builtin-locale', $original_datlocale); + } + elsif ($original_provider eq 'i') + { + push @initdb_params, ('--icu-locale', $original_datlocale); + } } $node_params{extra} = \@initdb_params; @@ -146,10 +187,10 @@ $oldnode->start; my $result; $result = $oldnode->safe_psql( 'postgres', - "SELECT encoding, $provider_field, datcollate, datctype, $old_datlocale_field + "SELECT encoding, $old_provider_field, datcollate, datctype, $old_datlocale_field FROM pg_database WHERE datname='template0'"); is( $result, - "$original_encoding|$original_provider|$original_locale|$original_locale|$original_datlocale", + "$original_encoding|$original_provider|$original_datcollate|$original_datctype|$original_datlocale", "check locales in original cluster"); # The default location of the source code is the root of this directory. @@ -433,10 +474,10 @@ if (-d $log_path) # Test that upgraded cluster has original locale settings. $result = $newnode->safe_psql( 'postgres', - "SELECT encoding, $provider_field, datcollate, datctype, datlocale + "SELECT encoding, datlocprovider, datcollate, datctype, datlocale FROM pg_database WHERE datname='template0'"); is( $result, - "$original_encoding|$original_provider|$original_locale|$original_locale|$original_datlocale", + "$original_encoding|$original_provider|$original_datcollate|$original_datctype|$original_datlocale", "check that locales in new cluster match original cluster"); # Second dump from the upgraded instance. diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 68b2ea8872a..1ab80eb7cac 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -926,7 +926,7 @@ listAllDbs(const char *pattern, bool verbose) gettext_noop("Encoding")); if (pset.sversion >= 150000) appendPQExpBuffer(&buf, - " CASE d.datlocprovider WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n", + " CASE d.datlocprovider WHEN 'b' THEN 'builtin' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n", gettext_noop("Locale Provider")); else appendPQExpBuffer(&buf, @@ -4974,7 +4974,7 @@ listCollations(const char *pattern, bool verbose, bool showSystem) if (pset.sversion >= 100000) appendPQExpBuffer(&buf, - " CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n", + " CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'b' THEN 'builtin' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n", gettext_noop("Provider")); else appendPQExpBuffer(&buf, diff --git a/src/bin/scripts/createdb.c b/src/bin/scripts/createdb.c index 14970a6a5f8..007061e756f 100644 --- a/src/bin/scripts/createdb.c +++ b/src/bin/scripts/createdb.c @@ -40,8 +40,9 @@ main(int argc, char *argv[]) {"locale", required_argument, NULL, 'l'}, {"maintenance-db", required_argument, NULL, 3}, {"locale-provider", required_argument, NULL, 4}, - {"icu-locale", required_argument, NULL, 5}, - {"icu-rules", required_argument, NULL, 6}, + {"builtin-locale", required_argument, NULL, 5}, + {"icu-locale", required_argument, NULL, 6}, + {"icu-rules", required_argument, NULL, 7}, {NULL, 0, NULL, 0} }; @@ -67,6 +68,7 @@ main(int argc, char *argv[]) char *lc_ctype = NULL; char *locale = NULL; char *locale_provider = NULL; + char *builtin_locale = NULL; char *icu_locale = NULL; char *icu_rules = NULL; @@ -134,9 +136,12 @@ main(int argc, char *argv[]) locale_provider = pg_strdup(optarg); break; case 5: - icu_locale = pg_strdup(optarg); + builtin_locale = pg_strdup(optarg); break; case 6: + icu_locale = pg_strdup(optarg); + break; + case 7: icu_rules = pg_strdup(optarg); break; default: @@ -216,6 +221,11 @@ main(int argc, char *argv[]) appendPQExpBufferStr(&sql, " LOCALE "); appendStringLiteralConn(&sql, locale, conn); } + if (builtin_locale) + { + appendPQExpBufferStr(&sql, " BUILTIN_LOCALE "); + appendStringLiteralConn(&sql, builtin_locale, conn); + } if (lc_collate) { appendPQExpBufferStr(&sql, " LC_COLLATE "); @@ -294,9 +304,10 @@ help(const char *progname) printf(_(" -l, --locale=LOCALE locale settings for the database\n")); printf(_(" --lc-collate=LOCALE LC_COLLATE setting for the database\n")); printf(_(" --lc-ctype=LOCALE LC_CTYPE setting for the database\n")); + printf(_(" --builtin-locale=LOCALE builtin locale setting for the database\n")); printf(_(" --icu-locale=LOCALE ICU locale setting for the database\n")); printf(_(" --icu-rules=RULES ICU rules setting for the database\n")); - printf(_(" --locale-provider={libc|icu}\n" + printf(_(" --locale-provider={builtin|libc|icu}\n" " locale provider for the database's default collation\n")); printf(_(" -O, --owner=OWNER database user to own the new database\n")); printf(_(" -S, --strategy=STRATEGY database creation strategy wal_log or file_copy\n")); diff --git a/src/bin/scripts/t/020_createdb.pl b/src/bin/scripts/t/020_createdb.pl index 37e47b00782..dfd635bfab2 100644 --- a/src/bin/scripts/t/020_createdb.pl +++ b/src/bin/scripts/t/020_createdb.pl @@ -105,6 +105,66 @@ else 'create database with ICU fails since no ICU support'); } +$node->command_fails( + [ + 'createdb', '-T', + 'template0', '--locale-provider=builtin', + 'tbuiltin1' + ], + 'create database with provider "builtin" fails without --locale'); + +$node->command_ok( + [ + 'createdb', '-T', + 'template0', '--locale-provider=builtin', + '--locale=C', 'tbuiltin2' + ], + 'create database with provider "builtin" and locale "C"'); + +$node->command_ok( + [ + 'createdb', '-T', + 'template0', '--locale-provider=builtin', + '--locale=C', '--lc-collate=C', + 'tbuiltin3' + ], + 'create database with provider "builtin" and LC_COLLATE=C'); + +$node->command_ok( + [ + 'createdb', '-T', + 'template0', '--locale-provider=builtin', + '--locale=C', '--lc-ctype=C', + 'tbuiltin4' + ], + 'create database with provider "builtin" and LC_CTYPE=C'); + +$node->command_fails( + [ + 'createdb', '-T', + 'template0', '--locale-provider=builtin', + '--locale=C', '--icu-locale=en', + 'tbuiltin7' + ], + 'create database with provider "builtin" and ICU_LOCALE="en"'); + +$node->command_fails( + [ + 'createdb', '-T', + 'template0', '--locale-provider=builtin', + '--locale=C', '--icu-rules=""', + 'tbuiltin8' + ], + 'create database with provider "builtin" and ICU_RULES=""'); + +$node->command_fails( + [ + 'createdb', '-T', + 'template1', '--locale-provider=builtin', + '--locale=C', 'tbuiltin9' + ], + 'create database with provider "builtin" not matching template'); + $node->command_fails([ 'createdb', 'foobar1' ], 'fails if database already exists'); |