Add function to import operating system collations
authorPeter Eisentraut <peter_e@gmx.net>
Wed, 18 Jan 2017 17:00:00 +0000 (12:00 -0500)
committerPeter Eisentraut <peter_e@gmx.net>
Wed, 18 Jan 2017 14:35:56 +0000 (09:35 -0500)
Move this logic out of initdb into a user-callable function.  This
simplifies the code and makes it possible to update the standard
collations later on if additional operating system collations appear.

Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Euler Taveira <euler@timbira.com.br>
doc/src/sgml/charset.sgml
doc/src/sgml/func.sgml
src/backend/catalog/pg_collation.c
src/backend/commands/collationcmds.c
src/bin/initdb/initdb.c
src/include/catalog/catversion.h
src/include/catalog/pg_collation_fn.h
src/include/catalog/pg_proc.h

index f8c7ac3b1694cdad624c9926a365d8d2860c3d00..2aba0fc5282fc35263d50f58601f352f819b5d05 100644 (file)
@@ -496,7 +496,7 @@ SELECT * FROM test1 ORDER BY a || b COLLATE "fr_FR";
    </para>
   </sect2>
 
-  <sect2>
+  <sect2 id="collation-managing">
    <title>Managing Collations</title>
 
    <para>
index 10e31868baf24908a3112e36502c1bb3803bdda2..eb1b6984bf05871d2438339efb271620540eb394 100644 (file)
@@ -19190,6 +19190,46 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
     in the database's default tablespace, the tablespace can be specified as 0.
    </para>
 
+   <para>
+    <xref linkend="functions-admin-collation"> lists functions used to manage
+    collations.
+   </para>
+
+   <table id="functions-admin-collation">
+    <title>Collation Management Functions</title>
+    <tgroup cols="3">
+     <thead>
+      <row><entry>Name</entry> <entry>Return Type</entry> <entry>Description</entry></row>
+     </thead>
+
+     <tbody>
+      <row>
+       <entry>
+        <indexterm><primary>pg_import_system_collations</primary></indexterm>
+        <literal><function>pg_import_system_collations(<parameter>if_not_exists</> <type>boolean</>, <parameter>schema</> <type>regnamespace</>)</function></literal>
+       </entry>
+       <entry><type>void</type></entry>
+       <entry>Import operating system collations</entry>
+      </row>
+     </tbody>
+    </tgroup>
+   </table>
+
+   <para>
+    <function>pg_import_system_collations</> populates the system
+    catalog <literal>pg_collation</literal> with collations based on all the
+    locales it finds on the operating system.  This is
+    what <command>initdb</command> uses;
+    see <xref linkend="collation-managing"> for more details.  If additional
+    locales are installed into the operating system later on, this function
+    can be run again to add collations for the new locales.  In that case, the
+    parameter <parameter>if_not_exists</parameter> should be set to true to
+    skip over existing collations.  The <parameter>schema</parameter>
+    parameter would typically be <literal>pg_catalog</literal>, but that is
+    not a requirement.  (Collation objects based on locales that are no longer
+    present on the operating system are never removed by this function.)
+   </para>
+
   </sect2>
 
   <sect2 id="functions-admin-index">
index fa42ad5ec1b50b3d26e84ec1eb0f1d44e4e147f8..694c0f67f55432f18d100b05d8fc81907871412f 100644 (file)
@@ -41,7 +41,8 @@ Oid
 CollationCreate(const char *collname, Oid collnamespace,
                Oid collowner,
                int32 collencoding,
-               const char *collcollate, const char *collctype)
+               const char *collcollate, const char *collctype,
+               bool if_not_exists)
 {
    Relation    rel;
    TupleDesc   tupDesc;
@@ -72,10 +73,21 @@ CollationCreate(const char *collname, Oid collnamespace,
                              PointerGetDatum(collname),
                              Int32GetDatum(collencoding),
                              ObjectIdGetDatum(collnamespace)))
-       ereport(ERROR,
+   {
+       if (if_not_exists)
+       {
+           ereport(NOTICE,
                (errcode(ERRCODE_DUPLICATE_OBJECT),
-                errmsg("collation \"%s\" for encoding \"%s\" already exists",
+                errmsg("collation \"%s\" for encoding \"%s\" already exists, skipping",
                        collname, pg_encoding_to_char(collencoding))));
+           return InvalidOid;
+       }
+       else
+           ereport(ERROR,
+                   (errcode(ERRCODE_DUPLICATE_OBJECT),
+                    errmsg("collation \"%s\" for encoding \"%s\" already exists",
+                           collname, pg_encoding_to_char(collencoding))));
+   }
 
    /*
     * Also forbid matching an any-encoding entry.  This test of course is not
@@ -86,10 +98,21 @@ CollationCreate(const char *collname, Oid collnamespace,
                              PointerGetDatum(collname),
                              Int32GetDatum(-1),
                              ObjectIdGetDatum(collnamespace)))
-       ereport(ERROR,
+   {
+       if (if_not_exists)
+       {
+           ereport(NOTICE,
+               (errcode(ERRCODE_DUPLICATE_OBJECT),
+                errmsg("collation \"%s\" already exists, skipping",
+                       collname)));
+           return InvalidOid;
+       }
+       else
+           ereport(ERROR,
                (errcode(ERRCODE_DUPLICATE_OBJECT),
                 errmsg("collation \"%s\" already exists",
                        collname)));
+   }
 
    /* open pg_collation */
    rel = heap_open(CollationRelationId, RowExclusiveLock);
index ccadfc2e47476658c0b75cb53fa0af106e89834f..5cb3e2bb282e39bd453ad0dbff15f0cd2e3d5d72 100644 (file)
@@ -136,7 +136,11 @@ DefineCollation(ParseState *pstate, List *names, List *parameters)
                             GetUserId(),
                             GetDatabaseEncoding(),
                             collcollate,
-                            collctype);
+                            collctype,
+                            false);
+
+   if (!OidIsValid(newoid))
+       return InvalidObjectAddress;
 
    ObjectAddressSet(address, CollationRelationId, newoid);
 
@@ -177,3 +181,151 @@ IsThereCollationInNamespace(const char *collname, Oid nspOid)
                 errmsg("collation \"%s\" already exists in schema \"%s\"",
                        collname, get_namespace_name(nspOid))));
 }
+
+
+/*
+ * "Normalize" a locale name, stripping off encoding tags such as
+ * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
+ * -> "br_FR@euro").  Return true if a new, different name was
+ * generated.
+ */
+pg_attribute_unused()
+static bool
+normalize_locale_name(char *new, const char *old)
+{
+   char       *n = new;
+   const char *o = old;
+   bool        changed = false;
+
+   while (*o)
+   {
+       if (*o == '.')
+       {
+           /* skip over encoding tag such as ".utf8" or ".UTF-8" */
+           o++;
+           while ((*o >= 'A' && *o <= 'Z')
+                  || (*o >= 'a' && *o <= 'z')
+                  || (*o >= '0' && *o <= '9')
+                  || (*o == '-'))
+               o++;
+           changed = true;
+       }
+       else
+           *n++ = *o++;
+   }
+   *n = '\0';
+
+   return changed;
+}
+
+
+Datum
+pg_import_system_collations(PG_FUNCTION_ARGS)
+{
+#if defined(HAVE_LOCALE_T) && !defined(WIN32)
+   bool        if_not_exists = PG_GETARG_BOOL(0);
+   Oid         nspid = PG_GETARG_OID(1);
+
+   FILE       *locale_a_handle;
+   char        localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */
+   int         count = 0;
+#endif
+
+   if (!superuser())
+       ereport(ERROR,
+               (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+                (errmsg("must be superuser to import system collations"))));
+
+#if defined(HAVE_LOCALE_T) && !defined(WIN32)
+   locale_a_handle = OpenPipeStream("locale -a", "r");
+   if (locale_a_handle == NULL)
+       ereport(ERROR,
+               (errcode_for_file_access(),
+                errmsg("could not execute command \"%s\": %m",
+                       "locale -a")));
+
+   while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
+   {
+       int         i;
+       size_t      len;
+       int         enc;
+       bool        skip;
+       char        alias[NAMEDATALEN];
+
+       len = strlen(localebuf);
+
+       if (len == 0 || localebuf[len - 1] != '\n')
+       {
+           elog(DEBUG1, "locale name too long, skipped: \"%s\"", localebuf);
+           continue;
+       }
+       localebuf[len - 1] = '\0';
+
+       /*
+        * Some systems have locale names that don't consist entirely of ASCII
+        * letters (such as "bokm&aring;l" or "fran&ccedil;ais").  This is
+        * pretty silly, since we need the locale itself to interpret the
+        * non-ASCII characters. We can't do much with those, so we filter
+        * them out.
+        */
+       skip = false;
+       for (i = 0; i < len; i++)
+       {
+           if (IS_HIGHBIT_SET(localebuf[i]))
+           {
+               skip = true;
+               break;
+           }
+       }
+       if (skip)
+       {
+           elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf);
+           continue;
+       }
+
+       enc = pg_get_encoding_from_locale(localebuf, false);
+       if (enc < 0)
+       {
+           /* error message printed by pg_get_encoding_from_locale() */
+           continue;
+       }
+       if (!PG_VALID_BE_ENCODING(enc))
+           continue;           /* ignore locales for client-only encodings */
+       if (enc == PG_SQL_ASCII)
+           continue;           /* C/POSIX are already in the catalog */
+
+       count++;
+
+       CollationCreate(localebuf, nspid, GetUserId(), enc,
+                       localebuf, localebuf, if_not_exists);
+
+       CommandCounterIncrement();
+
+       /*
+        * Generate aliases such as "en_US" in addition to "en_US.utf8" for
+        * ease of use.  Note that collation names are unique per encoding
+        * only, so this doesn't clash with "en_US" for LATIN1, say.
+        *
+        * This always runs in "if not exists" mode, to skip aliases that
+        * conflict with an existing locale name for the same encoding.  For
+        * example, "br_FR.iso88591" is normalized to "br_FR", both for
+        * encoding LATIN1.  But the unnormalized locale "br_FR" already
+        * exists for LATIN1.
+        */
+       if (normalize_locale_name(alias, localebuf))
+       {
+           CollationCreate(alias, nspid, GetUserId(), enc,
+                           localebuf, localebuf, true);
+           CommandCounterIncrement();
+       }
+   }
+
+   ClosePipeStream(locale_a_handle);
+
+   if (count == 0)
+       ereport(ERROR,
+               (errmsg("no usable system locales were found")));
+#endif /* not HAVE_LOCALE_T && not WIN32 */
+
+   PG_RETURN_VOID();
+}
index 1e7d67724478205e9a483b89b1b187a7ddff6510..eb1be100c85326d4c4002a7e89d9d7eb5157f4d2 100644 (file)
@@ -1608,178 +1608,16 @@ setup_description(FILE *cmdfd)
    PG_CMD_PUTS("DROP TABLE tmp_pg_shdescription;\n\n");
 }
 
-#ifdef HAVE_LOCALE_T
-/*
- * "Normalize" a locale name, stripping off encoding tags such as
- * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
- * -> "br_FR@euro").  Return true if a new, different name was
- * generated.
- */
-static bool
-normalize_locale_name(char *new, const char *old)
-{
-   char       *n = new;
-   const char *o = old;
-   bool        changed = false;
-
-   while (*o)
-   {
-       if (*o == '.')
-       {
-           /* skip over encoding tag such as ".utf8" or ".UTF-8" */
-           o++;
-           while ((*o >= 'A' && *o <= 'Z')
-                  || (*o >= 'a' && *o <= 'z')
-                  || (*o >= '0' && *o <= '9')
-                  || (*o == '-'))
-               o++;
-           changed = true;
-       }
-       else
-           *n++ = *o++;
-   }
-   *n = '\0';
-
-   return changed;
-}
-#endif   /* HAVE_LOCALE_T */
-
 /*
  * populate pg_collation
  */
 static void
 setup_collation(FILE *cmdfd)
 {
-#if defined(HAVE_LOCALE_T) && !defined(WIN32)
-   int         i;
-   FILE       *locale_a_handle;
-   char        localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */
-   int         count = 0;
-
-   locale_a_handle = popen_check("locale -a", "r");
-   if (!locale_a_handle)
-       return;                 /* complaint already printed */
-
-   PG_CMD_PUTS("CREATE TEMP TABLE tmp_pg_collation ( "
-               "   collname name, "
-               "   locale name, "
-               "   encoding int) WITHOUT OIDS;\n\n");
-
-   while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
-   {
-       size_t      len;
-       int         enc;
-       bool        skip;
-       char       *quoted_locale;
-       char        alias[NAMEDATALEN];
-
-       len = strlen(localebuf);
-
-       if (len == 0 || localebuf[len - 1] != '\n')
-       {
-           if (debug)
-               fprintf(stderr, _("%s: locale name too long, skipped: \"%s\"\n"),
-                       progname, localebuf);
-           continue;
-       }
-       localebuf[len - 1] = '\0';
-
-       /*
-        * Some systems have locale names that don't consist entirely of ASCII
-        * letters (such as "bokm&aring;l" or "fran&ccedil;ais").  This is
-        * pretty silly, since we need the locale itself to interpret the
-        * non-ASCII characters. We can't do much with those, so we filter
-        * them out.
-        */
-       skip = false;
-       for (i = 0; i < len; i++)
-       {
-           if (IS_HIGHBIT_SET(localebuf[i]))
-           {
-               skip = true;
-               break;
-           }
-       }
-       if (skip)
-       {
-           if (debug)
-               fprintf(stderr, _("%s: locale name has non-ASCII characters, skipped: \"%s\"\n"),
-                       progname, localebuf);
-           continue;
-       }
-
-       enc = pg_get_encoding_from_locale(localebuf, debug);
-       if (enc < 0)
-       {
-           /* error message printed by pg_get_encoding_from_locale() */
-           continue;
-       }
-       if (!PG_VALID_BE_ENCODING(enc))
-           continue;           /* ignore locales for client-only encodings */
-       if (enc == PG_SQL_ASCII)
-           continue;           /* C/POSIX are already in the catalog */
-
-       count++;
-
-       quoted_locale = escape_quotes(localebuf);
-
-       PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n\n",
-                      quoted_locale, quoted_locale, enc);
-
-       /*
-        * Generate aliases such as "en_US" in addition to "en_US.utf8" for
-        * ease of use.  Note that collation names are unique per encoding
-        * only, so this doesn't clash with "en_US" for LATIN1, say.
-        */
-       if (normalize_locale_name(alias, localebuf))
-       {
-           char       *quoted_alias = escape_quotes(alias);
-
-           PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n\n",
-                          quoted_alias, quoted_locale, enc);
-           free(quoted_alias);
-       }
-       free(quoted_locale);
-   }
+   PG_CMD_PUTS("SELECT pg_import_system_collations(if_not_exists => false, schema => 'pg_catalog');\n\n");
 
    /* Add an SQL-standard name */
-   PG_CMD_PRINTF1("INSERT INTO tmp_pg_collation VALUES ('ucs_basic', 'C', %d);\n\n", PG_UTF8);
-
-   /*
-    * When copying collations to the final location, eliminate aliases that
-    * conflict with an existing locale name for the same encoding.  For
-    * example, "br_FR.iso88591" is normalized to "br_FR", both for encoding
-    * LATIN1.  But the unnormalized locale "br_FR" already exists for LATIN1.
-    * Prefer the alias that matches the OS locale name, else the first locale
-    * name by sort order (arbitrary choice to be deterministic).
-    *
-    * Also, eliminate any aliases that conflict with pg_collation's
-    * hard-wired entries for "C" etc.
-    */
-   PG_CMD_PUTS("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) "
-               " SELECT DISTINCT ON (collname, encoding)"
-               "   collname, "
-               "   (SELECT oid FROM pg_namespace WHERE nspname = 'pg_catalog') AS collnamespace, "
-               "   (SELECT relowner FROM pg_class WHERE relname = 'pg_collation') AS collowner, "
-               "   encoding, locale, locale "
-               "  FROM tmp_pg_collation"
-               "  WHERE NOT EXISTS (SELECT 1 FROM pg_collation WHERE collname = tmp_pg_collation.collname)"
-    "  ORDER BY collname, encoding, (collname = locale) DESC, locale;\n\n");
-
-   /*
-    * Even though the table is temp, drop it explicitly so it doesn't get
-    * copied into template0/postgres databases.
-    */
-   PG_CMD_PUTS("DROP TABLE tmp_pg_collation;\n\n");
-
-   pclose(locale_a_handle);
-
-   if (count == 0 && !debug)
-   {
-       printf(_("No usable system locales were found.\n"));
-       printf(_("Use the option \"--debug\" to see details.\n"));
-   }
-#endif   /* not HAVE_LOCALE_T  && not WIN32 */
+   PG_CMD_PRINTF2("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) VALUES ('ucs_basic', 'pg_catalog'::regnamespace, '%s'::regrole, %d, 'C', 'C');\n\n", escape_quotes(username), PG_UTF8);
 }
 
 /*
index 54211f5618d2dacaf51feaa05f26cb481e953b5f..7d33f39bea599c8ca189feaa2a918b966bf5fede 100644 (file)
@@ -53,6 +53,6 @@
  */
 
 /*                         yyyymmddN */
-#define CATALOG_VERSION_NO 201701172
+#define CATALOG_VERSION_NO 201701181
 
 #endif
index 1ea757f1505558668559e4a9c98ce50a09a01b92..482ba7920e5e3c8f8078802491e598df09a1539e 100644 (file)
@@ -17,7 +17,8 @@
 extern Oid CollationCreate(const char *collname, Oid collnamespace,
                Oid collowner,
                int32 collencoding,
-               const char *collcollate, const char *collctype);
+               const char *collcollate, const char *collctype,
+               bool if_not_exists);
 extern void RemoveCollationById(Oid collationOid);
 
 #endif   /* PG_COLLATION_FN_H */
index 42f36891af582507d18767ca832108d5975610da..1a0eba3ca1b807d5e5c5b216dbb2099817bcf443 100644 (file)
@@ -5349,6 +5349,9 @@ DESCR("pg_controldata recovery state information as a function");
 DATA(insert OID = 3444 ( pg_control_init PGNSP PGUID 12 1 0 0 0 f f f f t f v s 0 0 2249 "" "{23,23,23,23,23,23,23,23,23,16,16,16,23}" "{o,o,o,o,o,o,o,o,o,o,o,o,o}" "{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,bigint_timestamps,float4_pass_by_value,float8_pass_by_value,data_page_checksum_version}" _null_ _null_ pg_control_init _null_ _null_ _null_ ));
 DESCR("pg_controldata init state information as a function");
 
+DATA(insert OID = 3445 ( pg_import_system_collations PGNSP PGUID 12 100 0 0 0 f f f f t f v r 2 0 2278 "16 4089" _null_ _null_ "{if_not_exists,schema}" _null_ _null_ pg_import_system_collations _null_ _null_ _null_ ));
+DESCR("import collations from operating system");
+
 /*
  * Symbolic values for provolatile column: these indicate whether the result
  * of a function is dependent *only* on the values of its explicit arguments,