Call pg_newlocale_from_collation() also with default collation
authorPeter Eisentraut <peter@eisentraut.org>
Thu, 20 Jan 2022 08:38:05 +0000 (09:38 +0100)
committerPeter Eisentraut <peter@eisentraut.org>
Thu, 20 Jan 2022 08:50:18 +0000 (09:50 +0100)
Previously, callers of pg_newlocale_from_collation() did not call it
if the collation was DEFAULT_COLLATION_OID and instead proceeded with
a pg_locale_t of 0.  Instead, now we call it anyway and have it return
0 if the default collation was passed.  It already did this, so we
just have to adjust the callers.  This simplifies all the call sites
and also makes future enhancements easier.

After discussion and testing, the previous comment in pg_locale.c
about avoiding this for performance reasons may have been mistaken
since it was testing a very different patch version way back when.

Reviewed-by: Julien Rouhaud <rjuju123@gmail.com>
Discussion: https://www.postgresql.org/message-id/ed3baa81-7fac-7788-cc12-41e3f7917e34@enterprisedb.com

src/backend/access/hash/hashfunc.c
src/backend/regex/regc_pg_locale.c
src/backend/utils/adt/formatting.c
src/backend/utils/adt/like.c
src/backend/utils/adt/like_support.c
src/backend/utils/adt/pg_locale.c
src/backend/utils/adt/varchar.c
src/backend/utils/adt/varlena.c

index 0521c69dd57a2d2d3a3a8e359af5afbe85ba96eb..b57ed946c42bb54ede800e95045aa937a8dbad85 100644 (file)
@@ -278,7 +278,7 @@ hashtext(PG_FUNCTION_ARGS)
                                 errmsg("could not determine which collation to use for string hashing"),
                                 errhint("Use the COLLATE clause to set the collation explicitly.")));
 
-       if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+       if (!lc_collate_is_c(collid))
                mylocale = pg_newlocale_from_collation(collid);
 
        if (!mylocale || mylocale->deterministic)
@@ -334,7 +334,7 @@ hashtextextended(PG_FUNCTION_ARGS)
                                 errmsg("could not determine which collation to use for string hashing"),
                                 errhint("Use the COLLATE clause to set the collation explicitly.")));
 
-       if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+       if (!lc_collate_is_c(collid))
                mylocale = pg_newlocale_from_collation(collid);
 
        if (!mylocale || mylocale->deterministic)
index e0d93eab3216bd7ac898903537008f3f9bc78381..6e84f42cb24f661b7dd1c68d83d4883c68e4708d 100644 (file)
@@ -231,6 +231,18 @@ static const unsigned char pg_char_properties[128] = {
 void
 pg_set_regex_collation(Oid collation)
 {
+       if (!OidIsValid(collation))
+       {
+               /*
+                * This typically means that the parser could not resolve a
+                * conflict of implicit collations, so report it that way.
+                */
+               ereport(ERROR,
+                               (errcode(ERRCODE_INDETERMINATE_COLLATION),
+                                errmsg("could not determine which collation to use for regular expression"),
+                                errhint("Use the COLLATE clause to set the collation explicitly.")));
+       }
+
        if (lc_ctype_is_c(collation))
        {
                /* C/POSIX collations use this path regardless of database encoding */
@@ -240,28 +252,12 @@ pg_set_regex_collation(Oid collation)
        }
        else
        {
-               if (collation == DEFAULT_COLLATION_OID)
-                       pg_regex_locale = 0;
-               else if (OidIsValid(collation))
-               {
-                       /*
-                        * NB: pg_newlocale_from_collation will fail if not HAVE_LOCALE_T;
-                        * the case of pg_regex_locale != 0 but not HAVE_LOCALE_T does not
-                        * have to be considered below.
-                        */
-                       pg_regex_locale = pg_newlocale_from_collation(collation);
-               }
-               else
-               {
-                       /*
-                        * This typically means that the parser could not resolve a
-                        * conflict of implicit collations, so report it that way.
-                        */
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_INDETERMINATE_COLLATION),
-                                        errmsg("could not determine which collation to use for regular expression"),
-                                        errhint("Use the COLLATE clause to set the collation explicitly.")));
-               }
+               /*
+                * NB: pg_newlocale_from_collation will fail if not HAVE_LOCALE_T;
+                * the case of pg_regex_locale != 0 but not HAVE_LOCALE_T does not
+                * have to be considered below.
+                */
+               pg_regex_locale = pg_newlocale_from_collation(collation);
 
                if (pg_regex_locale && !pg_regex_locale->deterministic)
                        ereport(ERROR,
index e8f996ac83d11b7b7410d02f44237277ee67bb82..d4c2e7b0692024f57998898f0df34e9a46e0c43b 100644 (file)
@@ -1641,6 +1641,19 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
        if (!buff)
                return NULL;
 
+       if (!OidIsValid(collid))
+       {
+               /*
+                * This typically means that the parser could not resolve a
+                * conflict of implicit collations, so report it that way.
+                */
+               ereport(ERROR,
+                               (errcode(ERRCODE_INDETERMINATE_COLLATION),
+                                errmsg("could not determine which collation to use for %s function",
+                                               "lower()"),
+                                errhint("Use the COLLATE clause to set the collation explicitly.")));
+       }
+
        /* C/POSIX collations use this path regardless of database encoding */
        if (lc_ctype_is_c(collid))
        {
@@ -1648,24 +1661,9 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
        }
        else
        {
-               pg_locale_t mylocale = 0;
+               pg_locale_t mylocale;
 
-               if (collid != DEFAULT_COLLATION_OID)
-               {
-                       if (!OidIsValid(collid))
-                       {
-                               /*
-                                * This typically means that the parser could not resolve a
-                                * conflict of implicit collations, so report it that way.
-                                */
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INDETERMINATE_COLLATION),
-                                                errmsg("could not determine which collation to use for %s function",
-                                                               "lower()"),
-                                                errhint("Use the COLLATE clause to set the collation explicitly.")));
-                       }
-                       mylocale = pg_newlocale_from_collation(collid);
-               }
+               mylocale = pg_newlocale_from_collation(collid);
 
 #ifdef USE_ICU
                if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
@@ -1765,6 +1763,19 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
        if (!buff)
                return NULL;
 
+       if (!OidIsValid(collid))
+       {
+               /*
+                * This typically means that the parser could not resolve a
+                * conflict of implicit collations, so report it that way.
+                */
+               ereport(ERROR,
+                               (errcode(ERRCODE_INDETERMINATE_COLLATION),
+                                errmsg("could not determine which collation to use for %s function",
+                                               "upper()"),
+                                errhint("Use the COLLATE clause to set the collation explicitly.")));
+       }
+
        /* C/POSIX collations use this path regardless of database encoding */
        if (lc_ctype_is_c(collid))
        {
@@ -1772,24 +1783,9 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
        }
        else
        {
-               pg_locale_t mylocale = 0;
+               pg_locale_t mylocale;
 
-               if (collid != DEFAULT_COLLATION_OID)
-               {
-                       if (!OidIsValid(collid))
-                       {
-                               /*
-                                * This typically means that the parser could not resolve a
-                                * conflict of implicit collations, so report it that way.
-                                */
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INDETERMINATE_COLLATION),
-                                                errmsg("could not determine which collation to use for %s function",
-                                                               "upper()"),
-                                                errhint("Use the COLLATE clause to set the collation explicitly.")));
-                       }
-                       mylocale = pg_newlocale_from_collation(collid);
-               }
+               mylocale = pg_newlocale_from_collation(collid);
 
 #ifdef USE_ICU
                if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
@@ -1890,6 +1886,19 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
        if (!buff)
                return NULL;
 
+       if (!OidIsValid(collid))
+       {
+               /*
+                * This typically means that the parser could not resolve a
+                * conflict of implicit collations, so report it that way.
+                */
+               ereport(ERROR,
+                               (errcode(ERRCODE_INDETERMINATE_COLLATION),
+                                errmsg("could not determine which collation to use for %s function",
+                                               "initcap()"),
+                                errhint("Use the COLLATE clause to set the collation explicitly.")));
+       }
+
        /* C/POSIX collations use this path regardless of database encoding */
        if (lc_ctype_is_c(collid))
        {
@@ -1897,24 +1906,9 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
        }
        else
        {
-               pg_locale_t mylocale = 0;
+               pg_locale_t mylocale;
 
-               if (collid != DEFAULT_COLLATION_OID)
-               {
-                       if (!OidIsValid(collid))
-                       {
-                               /*
-                                * This typically means that the parser could not resolve a
-                                * conflict of implicit collations, so report it that way.
-                                */
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INDETERMINATE_COLLATION),
-                                                errmsg("could not determine which collation to use for %s function",
-                                                               "initcap()"),
-                                                errhint("Use the COLLATE clause to set the collation explicitly.")));
-                       }
-                       mylocale = pg_newlocale_from_collation(collid);
-               }
+               mylocale = pg_newlocale_from_collation(collid);
 
 #ifdef USE_ICU
                if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
index 9f241dc7c664ad22742c18a4696632c7a2891f04..833ee8f814c875dc4404e2a9035b7bc1d837d1e9 100644 (file)
@@ -150,7 +150,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
 static inline int
 GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
 {
-       if (collation && !lc_ctype_is_c(collation) && collation != DEFAULT_COLLATION_OID)
+       if (collation && !lc_ctype_is_c(collation))
        {
                pg_locale_t locale = pg_newlocale_from_collation(collation);
 
@@ -178,28 +178,27 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
        pg_locale_t locale = 0;
        bool            locale_is_c = false;
 
+       if (!OidIsValid(collation))
+       {
+               /*
+                * This typically means that the parser could not resolve a
+                * conflict of implicit collations, so report it that way.
+                */
+               ereport(ERROR,
+                               (errcode(ERRCODE_INDETERMINATE_COLLATION),
+                                errmsg("could not determine which collation to use for ILIKE"),
+                                errhint("Use the COLLATE clause to set the collation explicitly.")));
+       }
+
        if (lc_ctype_is_c(collation))
                locale_is_c = true;
-       else if (collation != DEFAULT_COLLATION_OID)
-       {
-               if (!OidIsValid(collation))
-               {
-                       /*
-                        * This typically means that the parser could not resolve a
-                        * conflict of implicit collations, so report it that way.
-                        */
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_INDETERMINATE_COLLATION),
-                                        errmsg("could not determine which collation to use for ILIKE"),
-                                        errhint("Use the COLLATE clause to set the collation explicitly.")));
-               }
+       else
                locale = pg_newlocale_from_collation(collation);
 
-               if (locale && !locale->deterministic)
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                                        errmsg("nondeterministic collations are not supported for ILIKE")));
-       }
+       if (locale && !locale->deterministic)
+               ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                errmsg("nondeterministic collations are not supported for ILIKE")));
 
        /*
         * For efficiency reasons, in the single byte case we don't call lower()
index 7ca2a01e492b89eba46ea4d3e8d74b310ee3df24..65a57fc3c461d615f7607ae9bc51f5690c6627d9 100644 (file)
@@ -1012,24 +1012,23 @@ like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
                                        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                                         errmsg("case insensitive matching not supported on type bytea")));
 
+               if (!OidIsValid(collation))
+               {
+                       /*
+                        * This typically means that the parser could not resolve a
+                        * conflict of implicit collations, so report it that way.
+                        */
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INDETERMINATE_COLLATION),
+                                        errmsg("could not determine which collation to use for ILIKE"),
+                                        errhint("Use the COLLATE clause to set the collation explicitly.")));
+               }
+
                /* If case-insensitive, we need locale info */
                if (lc_ctype_is_c(collation))
                        locale_is_c = true;
-               else if (collation != DEFAULT_COLLATION_OID)
-               {
-                       if (!OidIsValid(collation))
-                       {
-                               /*
-                                * This typically means that the parser could not resolve a
-                                * conflict of implicit collations, so report it that way.
-                                */
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INDETERMINATE_COLLATION),
-                                                errmsg("could not determine which collation to use for ILIKE"),
-                                                errhint("Use the COLLATE clause to set the collation explicitly.")));
-                       }
+               else
                        locale = pg_newlocale_from_collation(collation);
-               }
        }
 
        if (typeid != BYTEAOID)
index 18f3afdc62e4f6844d71699ee74cfc6061a36fe8..33cccc5c6c726510383812dd1e91192ca68d9f37 100644 (file)
@@ -1454,8 +1454,6 @@ report_newlocale_failure(const char *localename)
  *
  * As a special optimization, the default/database collation returns 0.
  * Callers should then revert to the non-locale_t-enabled code path.
- * In fact, they shouldn't call this function at all when they are dealing
- * with the default locale.  That can save quite a bit in hotspots.
  * Also, callers should avoid calling this before going down a C/POSIX
  * fastpath, because such a fastpath should work even on platforms without
  * locale_t support in the C library.
@@ -1472,7 +1470,6 @@ pg_newlocale_from_collation(Oid collid)
        /* Callers must pass a valid OID */
        Assert(OidIsValid(collid));
 
-       /* Return 0 for "default" collation, just in case caller forgets */
        if (collid == DEFAULT_COLLATION_OID)
                return (pg_locale_t) 0;
 
index 95f768c8843c41e2cf757ec973d46a6a9dc2f35b..8b5b30ed714812886e98af74cc2a6e4fbe7d978c 100644 (file)
@@ -743,15 +743,20 @@ bpchareq(PG_FUNCTION_ARGS)
                                len2;
        bool            result;
        Oid                     collid = PG_GET_COLLATION();
+       bool            locale_is_c = false;
+       pg_locale_t     mylocale = 0;
 
        check_collation_set(collid);
 
        len1 = bcTruelen(arg1);
        len2 = bcTruelen(arg2);
 
-       if (lc_collate_is_c(collid) ||
-               collid == DEFAULT_COLLATION_OID ||
-               pg_newlocale_from_collation(collid)->deterministic)
+       if (lc_collate_is_c(collid))
+               locale_is_c = true;
+       else
+               mylocale = pg_newlocale_from_collation(collid);
+
+       if (locale_is_c || !mylocale || mylocale->deterministic)
        {
                /*
                 * Since we only care about equality or not-equality, we can avoid all
@@ -783,15 +788,20 @@ bpcharne(PG_FUNCTION_ARGS)
                                len2;
        bool            result;
        Oid                     collid = PG_GET_COLLATION();
+       bool            locale_is_c = false;
+       pg_locale_t     mylocale = 0;
 
        check_collation_set(collid);
 
        len1 = bcTruelen(arg1);
        len2 = bcTruelen(arg2);
 
-       if (lc_collate_is_c(collid) ||
-               collid == DEFAULT_COLLATION_OID ||
-               pg_newlocale_from_collation(collid)->deterministic)
+       if (lc_collate_is_c(collid))
+               locale_is_c = true;
+       else
+               mylocale = pg_newlocale_from_collation(collid);
+
+       if (locale_is_c || !mylocale || mylocale->deterministic)
        {
                /*
                 * Since we only care about equality or not-equality, we can avoid all
@@ -996,7 +1006,7 @@ hashbpchar(PG_FUNCTION_ARGS)
        keydata = VARDATA_ANY(key);
        keylen = bcTruelen(key);
 
-       if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+       if (!lc_collate_is_c(collid))
                mylocale = pg_newlocale_from_collation(collid);
 
        if (!mylocale || mylocale->deterministic)
@@ -1056,7 +1066,7 @@ hashbpcharextended(PG_FUNCTION_ARGS)
        keydata = VARDATA_ANY(key);
        keylen = bcTruelen(key);
 
-       if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+       if (!lc_collate_is_c(collid))
                mylocale = pg_newlocale_from_collation(collid);
 
        if (!mylocale || mylocale->deterministic)
index b3eb39761d378cfa55b1b6ddf6f53d4c5b84fdb8..a8db8080e29e8ccc7d9762f9a24bb972af001efa 100644 (file)
@@ -1200,7 +1200,7 @@ text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
 
        check_collation_set(collid);
 
-       if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+       if (!lc_collate_is_c(collid))
                mylocale = pg_newlocale_from_collation(collid);
 
        if (mylocale && !mylocale->deterministic)
@@ -1556,10 +1556,9 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
                char            a2buf[TEXTBUFLEN];
                char       *a1p,
                                   *a2p;
-               pg_locale_t mylocale = 0;
+               pg_locale_t mylocale;
 
-               if (collid != DEFAULT_COLLATION_OID)
-                       mylocale = pg_newlocale_from_collation(collid);
+               mylocale = pg_newlocale_from_collation(collid);
 
                /*
                 * memcmp() can't tell us which of two unequal strings sorts first,
@@ -1776,13 +1775,18 @@ Datum
 texteq(PG_FUNCTION_ARGS)
 {
        Oid                     collid = PG_GET_COLLATION();
+       bool            locale_is_c = false;
+       pg_locale_t     mylocale = 0;
        bool            result;
 
        check_collation_set(collid);
 
-       if (lc_collate_is_c(collid) ||
-               collid == DEFAULT_COLLATION_OID ||
-               pg_newlocale_from_collation(collid)->deterministic)
+       if (lc_collate_is_c(collid))
+               locale_is_c = true;
+       else
+               mylocale = pg_newlocale_from_collation(collid);
+
+       if (locale_is_c || !mylocale || mylocale->deterministic)
        {
                Datum           arg1 = PG_GETARG_DATUM(0);
                Datum           arg2 = PG_GETARG_DATUM(1);
@@ -1830,13 +1834,18 @@ Datum
 textne(PG_FUNCTION_ARGS)
 {
        Oid                     collid = PG_GET_COLLATION();
+       bool            locale_is_c = false;
+       pg_locale_t     mylocale = 0;
        bool            result;
 
        check_collation_set(collid);
 
-       if (lc_collate_is_c(collid) ||
-               collid == DEFAULT_COLLATION_OID ||
-               pg_newlocale_from_collation(collid)->deterministic)
+       if (lc_collate_is_c(collid))
+               locale_is_c = true;
+       else
+               mylocale = pg_newlocale_from_collation(collid);
+
+       if (locale_is_c || !mylocale || mylocale->deterministic)
        {
                Datum           arg1 = PG_GETARG_DATUM(0);
                Datum           arg2 = PG_GETARG_DATUM(1);
@@ -1947,7 +1956,7 @@ text_starts_with(PG_FUNCTION_ARGS)
 
        check_collation_set(collid);
 
-       if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
+       if (!lc_collate_is_c(collid))
                mylocale = pg_newlocale_from_collation(collid);
 
        if (mylocale && !mylocale->deterministic)
@@ -2061,8 +2070,7 @@ varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
                 * we'll figure out the collation based on the locale id and cache the
                 * result.
                 */
-               if (collid != DEFAULT_COLLATION_OID)
-                       locale = pg_newlocale_from_collation(collid);
+               locale = pg_newlocale_from_collation(collid);
 
                /*
                 * There is a further exception on Windows.  When the database