LCOV - code coverage report
Current view: top level - src/backend/utils/adt - pg_locale_builtin.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 74 86 86.0 %
Date: 2025-07-09 01:17:29 Functions: 15 19 78.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-----------------------------------------------------------------------
       2             :  *
       3             :  * PostgreSQL locale utilities for builtin provider
       4             :  *
       5             :  * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group
       6             :  *
       7             :  * src/backend/utils/adt/pg_locale_builtin.c
       8             :  *
       9             :  *-----------------------------------------------------------------------
      10             :  */
      11             : 
      12             : #include "postgres.h"
      13             : 
      14             : #include "catalog/pg_database.h"
      15             : #include "catalog/pg_collation.h"
      16             : #include "common/unicode_case.h"
      17             : #include "common/unicode_category.h"
      18             : #include "mb/pg_wchar.h"
      19             : #include "miscadmin.h"
      20             : #include "utils/builtins.h"
      21             : #include "utils/pg_locale.h"
      22             : #include "utils/syscache.h"
      23             : 
      24             : extern pg_locale_t create_pg_locale_builtin(Oid collid,
      25             :                                             MemoryContext context);
      26             : extern char *get_collation_actual_version_builtin(const char *collcollate);
      27             : 
      28             : struct WordBoundaryState
      29             : {
      30             :     const char *str;
      31             :     size_t      len;
      32             :     size_t      offset;
      33             :     bool        posix;
      34             :     bool        init;
      35             :     bool        prev_alnum;
      36             : };
      37             : 
      38             : /*
      39             :  * Simple word boundary iterator that draws boundaries each time the result of
      40             :  * pg_u_isalnum() changes.
      41             :  */
      42             : static size_t
      43         824 : initcap_wbnext(void *state)
      44             : {
      45         824 :     struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
      46             : 
      47        1700 :     while (wbstate->offset < wbstate->len &&
      48        1506 :            wbstate->str[wbstate->offset] != '\0')
      49             :     {
      50        1506 :         pg_wchar    u = utf8_to_unicode((unsigned char *) wbstate->str +
      51        1506 :                                         wbstate->offset);
      52        1506 :         bool        curr_alnum = pg_u_isalnum(u, wbstate->posix);
      53             : 
      54        1506 :         if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
      55             :         {
      56         630 :             size_t      prev_offset = wbstate->offset;
      57             : 
      58         630 :             wbstate->init = true;
      59         630 :             wbstate->offset += unicode_utf8len(u);
      60         630 :             wbstate->prev_alnum = curr_alnum;
      61         630 :             return prev_offset;
      62             :         }
      63             : 
      64         876 :         wbstate->offset += unicode_utf8len(u);
      65             :     }
      66             : 
      67         194 :     return wbstate->len;
      68             : }
      69             : 
      70             : static size_t
      71       13022 : strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
      72             :                  pg_locale_t locale)
      73             : {
      74       26044 :     return unicode_strlower(dest, destsize, src, srclen,
      75       13022 :                             locale->info.builtin.casemap_full);
      76             : }
      77             : 
      78             : static size_t
      79         194 : strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
      80             :                  pg_locale_t locale)
      81             : {
      82         194 :     struct WordBoundaryState wbstate = {
      83             :         .str = src,
      84             :         .len = srclen,
      85             :         .offset = 0,
      86         194 :         .posix = !locale->info.builtin.casemap_full,
      87             :         .init = false,
      88             :         .prev_alnum = false,
      89             :     };
      90             : 
      91         388 :     return unicode_strtitle(dest, destsize, src, srclen,
      92         194 :                             locale->info.builtin.casemap_full,
      93             :                             initcap_wbnext, &wbstate);
      94             : }
      95             : 
      96             : static size_t
      97      316882 : strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
      98             :                  pg_locale_t locale)
      99             : {
     100      633764 :     return unicode_strupper(dest, destsize, src, srclen,
     101      316882 :                             locale->info.builtin.casemap_full);
     102             : }
     103             : 
     104             : static size_t
     105          12 : strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
     106             :                 pg_locale_t locale)
     107             : {
     108          24 :     return unicode_strfold(dest, destsize, src, srclen,
     109          12 :                            locale->info.builtin.casemap_full);
     110             : }
     111             : 
     112             : static bool
     113       65660 : wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale)
     114             : {
     115       65660 :     return pg_u_isdigit(wc, !locale->info.builtin.casemap_full);
     116             : }
     117             : 
     118             : static bool
     119        4118 : wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale)
     120             : {
     121        4118 :     return pg_u_isalpha(wc);
     122             : }
     123             : 
     124             : static bool
     125       36860 : wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale)
     126             : {
     127       36860 :     return pg_u_isalnum(wc, !locale->info.builtin.casemap_full);
     128             : }
     129             : 
     130             : static bool
     131       24576 : wc_isupper_builtin(pg_wchar wc, pg_locale_t locale)
     132             : {
     133       24576 :     return pg_u_isupper(wc);
     134             : }
     135             : 
     136             : static bool
     137           0 : wc_islower_builtin(pg_wchar wc, pg_locale_t locale)
     138             : {
     139           0 :     return pg_u_islower(wc);
     140             : }
     141             : 
     142             : static bool
     143           0 : wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale)
     144             : {
     145           0 :     return pg_u_isgraph(wc);
     146             : }
     147             : 
     148             : static bool
     149           0 : wc_isprint_builtin(pg_wchar wc, pg_locale_t locale)
     150             : {
     151           0 :     return pg_u_isprint(wc);
     152             : }
     153             : 
     154             : static bool
     155       24576 : wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale)
     156             : {
     157       24576 :     return pg_u_ispunct(wc, !locale->info.builtin.casemap_full);
     158             : }
     159             : 
     160             : static bool
     161       16398 : wc_isspace_builtin(pg_wchar wc, pg_locale_t locale)
     162             : {
     163       16398 :     return pg_u_isspace(wc);
     164             : }
     165             : 
     166             : static bool
     167           0 : char_is_cased_builtin(char ch, pg_locale_t locale)
     168             : {
     169           0 :     return IS_HIGHBIT_SET(ch) ||
     170           0 :         (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
     171             : }
     172             : 
     173             : static pg_wchar
     174         528 : wc_toupper_builtin(pg_wchar wc, pg_locale_t locale)
     175             : {
     176         528 :     return unicode_uppercase_simple(wc);
     177             : }
     178             : 
     179             : static pg_wchar
     180         528 : wc_tolower_builtin(pg_wchar wc, pg_locale_t locale)
     181             : {
     182         528 :     return unicode_lowercase_simple(wc);
     183             : }
     184             : 
     185             : static const struct ctype_methods ctype_methods_builtin = {
     186             :     .strlower = strlower_builtin,
     187             :     .strtitle = strtitle_builtin,
     188             :     .strupper = strupper_builtin,
     189             :     .strfold = strfold_builtin,
     190             :     .wc_isdigit = wc_isdigit_builtin,
     191             :     .wc_isalpha = wc_isalpha_builtin,
     192             :     .wc_isalnum = wc_isalnum_builtin,
     193             :     .wc_isupper = wc_isupper_builtin,
     194             :     .wc_islower = wc_islower_builtin,
     195             :     .wc_isgraph = wc_isgraph_builtin,
     196             :     .wc_isprint = wc_isprint_builtin,
     197             :     .wc_ispunct = wc_ispunct_builtin,
     198             :     .wc_isspace = wc_isspace_builtin,
     199             :     .char_is_cased = char_is_cased_builtin,
     200             :     .wc_tolower = wc_tolower_builtin,
     201             :     .wc_toupper = wc_toupper_builtin,
     202             : };
     203             : 
     204             : pg_locale_t
     205        1804 : create_pg_locale_builtin(Oid collid, MemoryContext context)
     206             : {
     207             :     const char *locstr;
     208             :     pg_locale_t result;
     209             : 
     210        1804 :     if (collid == DEFAULT_COLLATION_OID)
     211             :     {
     212             :         HeapTuple   tp;
     213             :         Datum       datum;
     214             : 
     215        1748 :         tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
     216        1748 :         if (!HeapTupleIsValid(tp))
     217           0 :             elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
     218        1748 :         datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
     219             :                                        Anum_pg_database_datlocale);
     220        1748 :         locstr = TextDatumGetCString(datum);
     221        1748 :         ReleaseSysCache(tp);
     222             :     }
     223             :     else
     224             :     {
     225             :         HeapTuple   tp;
     226             :         Datum       datum;
     227             : 
     228          56 :         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
     229          56 :         if (!HeapTupleIsValid(tp))
     230           0 :             elog(ERROR, "cache lookup failed for collation %u", collid);
     231          56 :         datum = SysCacheGetAttrNotNull(COLLOID, tp,
     232             :                                        Anum_pg_collation_colllocale);
     233          56 :         locstr = TextDatumGetCString(datum);
     234          56 :         ReleaseSysCache(tp);
     235             :     }
     236             : 
     237        1804 :     builtin_validate_locale(GetDatabaseEncoding(), locstr);
     238             : 
     239        1804 :     result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
     240             : 
     241        1804 :     result->info.builtin.locale = MemoryContextStrdup(context, locstr);
     242        1804 :     result->info.builtin.casemap_full = (strcmp(locstr, "PG_UNICODE_FAST") == 0);
     243        1804 :     result->deterministic = true;
     244        1804 :     result->collate_is_c = true;
     245        1804 :     result->ctype_is_c = (strcmp(locstr, "C") == 0);
     246        1804 :     if (!result->ctype_is_c)
     247        1782 :         result->ctype = &ctype_methods_builtin;
     248             : 
     249        1804 :     return result;
     250             : }
     251             : 
     252             : char *
     253        1880 : get_collation_actual_version_builtin(const char *collcollate)
     254             : {
     255             :     /*
     256             :      * The only two supported locales (C and C.UTF-8) are both based on memcmp
     257             :      * and are not expected to change, but track the version anyway.
     258             :      *
     259             :      * Note that the character semantics may change for some locales, but the
     260             :      * collation version only tracks changes to sort order.
     261             :      */
     262        1880 :     if (strcmp(collcollate, "C") == 0)
     263          48 :         return "1";
     264        1832 :     else if (strcmp(collcollate, "C.UTF-8") == 0)
     265        1808 :         return "1";
     266          24 :     else if (strcmp(collcollate, "PG_UNICODE_FAST") == 0)
     267          24 :         return "1";
     268             :     else
     269           0 :         ereport(ERROR,
     270             :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
     271             :                  errmsg("invalid locale name \"%s\" for builtin provider",
     272             :                         collcollate)));
     273             : 
     274             :     return NULL;                /* keep compiler quiet */
     275             : }

Generated by: LCOV version 1.16