summaryrefslogtreecommitdiff
path: root/src/backend/utils
diff options
context:
space:
mode:
authorJeff Davis2023-03-28 23:15:59 +0000
committerJeff Davis2023-03-28 23:34:29 +0000
commit1671f990dd669c0b72e45c7bef0fd579a10676ed (patch)
tree002e9ca8c4cd35b2f641de6c308f78b701f3b21a /src/backend/utils
parentb7cea58822c67724effc711ae28e4077a01a7cd6 (diff)
Validate ICU locales.
For ICU collations, ensure that the locale's language exists in ICU, and that the locale can be opened. Basic validation helps avoid minor mistakes and misspellings, which often fall back to the root locale instead of the intended locale. It's even more important to avoid such mistakes in ICU versions 54 and earlier, where the same (misspelled) locale string could fall back to different locales depending on the environment. Discussion: https://postgr.es/m/11b1eeb7e7667fdd4178497aeb796c48d26e69b9.camel@j-davis.com Discussion: https://postgr.es/m/df2efad0cae7c65180df8e5ebb709e5eb4f2a82b.camel@j-davis.com Reviewed-by: Peter Eisentraut
Diffstat (limited to 'src/backend/utils')
-rw-r--r--src/backend/utils/adt/pg_locale.c70
-rw-r--r--src/backend/utils/misc/guc_tables.c26
-rw-r--r--src/backend/utils/misc/postgresql.conf.sample3
3 files changed, 92 insertions, 7 deletions
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 8345c4602f..9497c20d12 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -58,6 +58,7 @@
#include "catalog/pg_collation.h"
#include "catalog/pg_control.h"
#include "mb/pg_wchar.h"
+#include "miscadmin.h"
#include "utils/builtins.h"
#include "utils/formatting.h"
#include "utils/guc_hooks.h"
@@ -95,6 +96,8 @@ char *locale_monetary;
char *locale_numeric;
char *locale_time;
+int icu_validation_level = ERROR;
+
/*
* lc_time localization cache.
*
@@ -2821,24 +2824,77 @@ icu_set_collation_attributes(UCollator *collator, const char *loc,
pfree(lower_str);
}
-#endif /* USE_ICU */
+#endif
/*
- * Check if the given locale ID is valid, and ereport(ERROR) if it isn't.
+ * Perform best-effort check that the locale is a valid one.
*/
void
-check_icu_locale(const char *icu_locale)
+icu_validate_locale(const char *loc_str)
{
#ifdef USE_ICU
- UCollator *collator;
+ UCollator *collator;
+ UErrorCode status;
+ char lang[ULOC_LANG_CAPACITY];
+ bool found = false;
+ int elevel = icu_validation_level;
+
+ /* no validation */
+ if (elevel < 0)
+ return;
+
+ /* downgrade to WARNING during pg_upgrade */
+ if (IsBinaryUpgrade && elevel > WARNING)
+ elevel = WARNING;
+
+ /* validate that we can extract the language */
+ status = U_ZERO_ERROR;
+ uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
+ if (U_FAILURE(status))
+ {
+ ereport(elevel,
+ (errmsg("could not get language from ICU locale \"%s\": %s",
+ loc_str, u_errorName(status)),
+ errhint("To disable ICU locale validation, set parameter icu_validation_level to DISABLED.")));
+ return;
+ }
+
+ /* check for special language name */
+ if (strcmp(lang, "") == 0 ||
+ strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 ||
+ strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
+ found = true;
- collator = pg_ucol_open(icu_locale);
+ /* search for matching language within ICU */
+ for (int32_t i = 0; !found && i < uloc_countAvailable(); i++)
+ {
+ const char *otherloc = uloc_getAvailable(i);
+ char otherlang[ULOC_LANG_CAPACITY];
+
+ status = U_ZERO_ERROR;
+ uloc_getLanguage(otherloc, otherlang, ULOC_LANG_CAPACITY, &status);
+ if (U_FAILURE(status))
+ continue;
+
+ if (strcmp(lang, otherlang) == 0)
+ found = true;
+ }
+
+ if (!found)
+ ereport(elevel,
+ (errmsg("ICU locale \"%s\" has unknown language \"%s\"",
+ loc_str, lang),
+ errhint("To disable ICU locale validation, set parameter icu_validation_level to DISABLED.")));
+
+ /* check that it can be opened */
+ collator = pg_ucol_open(loc_str);
ucol_close(collator);
-#else
+#else /* not USE_ICU */
+ /* could get here if a collation was created by a build with ICU */
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("ICU is not supported in this build")));
-#endif
+#endif /* not USE_ICU */
}
/*
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index a60bd48499..8062589efd 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -166,6 +166,22 @@ static const struct config_enum_entry intervalstyle_options[] = {
{NULL, 0, false}
};
+static const struct config_enum_entry icu_validation_level_options[] = {
+ {"disabled", -1, false},
+ {"debug5", DEBUG5, false},
+ {"debug4", DEBUG4, false},
+ {"debug3", DEBUG3, false},
+ {"debug2", DEBUG2, false},
+ {"debug1", DEBUG1, false},
+ {"debug", DEBUG2, true},
+ {"log", LOG, false},
+ {"info", INFO, true},
+ {"notice", NOTICE, false},
+ {"warning", WARNING, false},
+ {"error", ERROR, false},
+ {NULL, 0, false}
+};
+
StaticAssertDecl(lengthof(intervalstyle_options) == (INTSTYLE_ISO_8601 + 2),
"array length mismatch");
@@ -4644,6 +4660,16 @@ struct config_enum ConfigureNamesEnum[] =
},
{
+ {"icu_validation_level", PGC_USERSET, CLIENT_CONN_LOCALE,
+ gettext_noop("Log level for reporting invalid ICU locale strings."),
+ NULL
+ },
+ &icu_validation_level,
+ ERROR, icu_validation_level_options,
+ NULL, NULL, NULL
+ },
+
+ {
{"log_error_verbosity", PGC_SUSET, LOGGING_WHAT,
gettext_noop("Sets the verbosity of logged messages."),
NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index fc831565d9..ee49ca3937 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -731,6 +731,9 @@
#lc_numeric = 'C' # locale for number formatting
#lc_time = 'C' # locale for time formatting
+#icu_validation_level = ERROR # report ICU locale validation
+ # errors at the given level
+
# default configuration for text search
#default_text_search_config = 'pg_catalog.simple'