Make it clearer that not every Postgres character set can be used as a
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 28 Jul 2006 15:33:17 +0000 (15:33 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 28 Jul 2006 15:33:17 +0000 (15:33 +0000)
server-side character set.

doc/src/sgml/charset.sgml

index c25f72a73ebcdf4315f9db1e03692e352a21a5b4..5c0ad1b040b0bc021e14f4badd3309ddeb59a03b 100644 (file)
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.76 2006/02/18 16:15:21 petere Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.77 2006/07/28 15:33:17 tgl Exp $ -->
 
 <chapter id="charset">
  <title>Localization</>
@@ -304,14 +304,13 @@ initdb --locale=sv_SE
    allows you to store text in a variety of character sets, including
    single-byte character sets such as the ISO 8859 series and
    multiple-byte character sets such as <acronym>EUC</> (Extended Unix
-   Code), UTF-8, and Mule internal code.  All character sets can be
-   used transparently throughout the server.  (If you use extension
-   functions from other sources, it depends on whether they wrote
-   their code correctly.)  The default character set is selected while
+   Code), UTF-8, and Mule internal code.  All supported character sets
+   can be used transparently by clients, but a few are not supported
+   for use within the server (that is, as a server-side encoding).
+   The default character set is selected while
    initializing your <productname>PostgreSQL</productname> database
    cluster using <command>initdb</>.  It can be overridden when you
-   create a database using <command>createdb</command> or by using the
-   SQL command <command>CREATE DATABASE</>. So you can have multiple
+   create a database, so you can have multiple
    databases each with a different character set.
   </para>
 
@@ -320,17 +319,18 @@ initdb --locale=sv_SE
 
     <para>
      <xref linkend="charset-table"> shows the character sets available
-     for use in the server.
+     for use in <productname>PostgreSQL</productname>.
     </para>
 
      <table id="charset-table">
       <title>Server Character Sets</title>
-      <tgroup cols="2">
+      <tgroup cols="6">
        <thead>
         <row>
          <entry>Name</entry>
          <entry>Description</entry>
          <entry>Language</entry>
+         <entry>Server?</entry>
          <!--
           The Bytes/Char field is populated by looking at the values returned
           by pg_wchar_table.mblen function for each encoding.
@@ -344,6 +344,7 @@ initdb --locale=sv_SE
          <entry><literal>BIG5</literal></entry>
          <entry>Big Five</entry>
          <entry>Traditional Chinese</entry>
+         <entry>No</entry>
          <entry>1-2</entry>
          <entry><literal>WIN950</>, <literal>Windows950</></entry>
         </row>
@@ -351,6 +352,7 @@ initdb --locale=sv_SE
          <entry><literal>EUC_CN</literal></entry>
          <entry>Extended UNIX Code-CN</entry>
          <entry>Simplified Chinese</entry>
+         <entry>Yes</entry>
          <entry>1-3</entry>
          <entry></entry>
         </row>
@@ -358,6 +360,7 @@ initdb --locale=sv_SE
          <entry><literal>EUC_JP</literal></entry>
          <entry>Extended UNIX Code-JP</entry>
          <entry>Japanese</entry>
+         <entry>Yes</entry>
          <entry>1-3</entry>
          <entry></entry>
         </row>
@@ -365,6 +368,7 @@ initdb --locale=sv_SE
          <entry><literal>EUC_KR</literal></entry>
          <entry>Extended UNIX Code-KR</entry>
          <entry>Korean</entry>
+         <entry>Yes</entry>
          <entry>1-3</entry>
          <entry></entry>
         </row>
@@ -372,6 +376,7 @@ initdb --locale=sv_SE
          <entry><literal>EUC_TW</literal></entry>
          <entry>Extended UNIX Code-TW</entry>
          <entry>Traditional Chinese, Taiwanese</entry>
+         <entry>Yes</entry>
          <entry>1-3</entry>
          <entry></entry>
         </row>
@@ -379,6 +384,7 @@ initdb --locale=sv_SE
          <entry><literal>GB18030</literal></entry>
          <entry>National Standard</entry>
          <entry>Chinese</entry>
+         <entry>No</entry>
          <entry>1-2</entry>
          <entry></entry>
         </row>
@@ -386,6 +392,7 @@ initdb --locale=sv_SE
          <entry><literal>GBK</literal></entry>
          <entry>Extended National Standard</entry>
          <entry>Simplified Chinese</entry>
+         <entry>No</entry>
          <entry>1-2</entry>
          <entry><literal>WIN936</>, <literal>Windows936</></entry>
         </row>
@@ -393,6 +400,7 @@ initdb --locale=sv_SE
          <entry><literal>ISO_8859_5</literal></entry>
          <entry>ISO 8859-5, <acronym>ECMA</> 113</entry>
          <entry>Latin/Cyrillic</entry>
+         <entry>Yes</entry>
          <entry>1</entry>
          <entry></entry>
         </row>
@@ -400,6 +408,7 @@ initdb --locale=sv_SE
          <entry><literal>ISO_8859_6</literal></entry>
          <entry>ISO 8859-6, <acronym>ECMA</> 114</entry>
          <entry>Latin/Arabic</entry>
+         <entry>Yes</entry>
          <entry>1</entry>
          <entry></entry>
         </row>
@@ -407,6 +416,7 @@ initdb --locale=sv_SE
          <entry><literal>ISO_8859_7</literal></entry>
          <entry>ISO 8859-7, <acronym>ECMA</> 118</entry>
          <entry>Latin/Greek</entry>
+         <entry>Yes</entry>
          <entry>1</entry>
          <entry></entry>
         </row>
@@ -414,6 +424,7 @@ initdb --locale=sv_SE
          <entry><literal>ISO_8859_8</literal></entry>
          <entry>ISO 8859-8, <acronym>ECMA</> 121</entry>
          <entry>Latin/Hebrew</entry>
+         <entry>Yes</entry>
          <entry>1</entry>
          <entry></entry>
         </row>
@@ -421,6 +432,7 @@ initdb --locale=sv_SE
          <entry><literal>JOHAB</literal></entry>
          <entry><acronym>JOHAB</></entry>
          <entry>Korean (Hangul)</entry>
+         <entry>Yes</entry>
          <entry>1-3</entry>
          <entry></entry>
         </row>
@@ -428,6 +440,7 @@ initdb --locale=sv_SE
          <entry><literal>KOI8</literal></entry>
          <entry><acronym>KOI</acronym>8-R(U)</entry>
          <entry>Cyrillic</entry>
+         <entry>Yes</entry>
          <entry>1</entry>
          <entry><literal>KOI8R</></entry>
         </row>
@@ -435,6 +448,7 @@ initdb --locale=sv_SE
          <entry><literal>LATIN1</literal></entry>
          <entry>ISO 8859-1, <acronym>ECMA</> 94</entry>
          <entry>Western European</entry>
+         <entry>Yes</entry>
          <entry>1</entry>
          <entry><literal>ISO88591</></entry>
         </row>
@@ -442,6 +456,7 @@ initdb --locale=sv_SE
          <entry><literal>LATIN2</literal></entry>
          <entry>ISO 8859-2, <acronym>ECMA</> 94</entry>
          <entry>Central European</entry>
+         <entry>Yes</entry>
          <entry>1</entry>
          <entry><literal>ISO88592</></entry>
         </row>
@@ -449,6 +464,7 @@ initdb --locale=sv_SE
          <entry><literal>LATIN3</literal></entry>
          <entry>ISO 8859-3, <acronym>ECMA</> 94</entry>
          <entry>South European</entry>
+         <entry>Yes</entry>
          <entry>1</entry>
          <entry><literal>ISO88593</></entry>
         </row>
@@ -456,6 +472,7 @@ initdb --locale=sv_SE
          <entry><literal>LATIN4</literal></entry>
          <entry>ISO 8859-4, <acronym>ECMA</> 94</entry>
          <entry>North European</entry>
+         <entry>Yes</entry>
          <entry>1</entry>
          <entry><literal>ISO88594</></entry>
         </row>
@@ -463,6 +480,7 @@ initdb --locale=sv_SE
          <entry><literal>LATIN5</literal></entry>
          <entry>ISO 8859-9, <acronym>ECMA</> 128</entry>
          <entry>Turkish</entry>
+         <entry>Yes</entry>
          <entry>1</entry>
          <entry><literal>ISO88599</></entry>
         </row>
@@ -470,6 +488,7 @@ initdb --locale=sv_SE
          <entry><literal>LATIN6</literal></entry>
          <entry>ISO 8859-10, <acronym>ECMA</> 144</entry>
          <entry>Nordic</entry>
+         <entry>Yes</entry>
          <entry>1</entry>
          <entry><literal>ISO885910</></entry>
         </row>
@@ -477,6 +496,7 @@ initdb --locale=sv_SE
          <entry><literal>LATIN7</literal></entry>
          <entry>ISO 8859-13</entry>
          <entry>Baltic</entry>
+         <entry>Yes</entry>
          <entry>1</entry>
          <entry><literal>ISO885913</></entry>
         </row>
@@ -484,6 +504,7 @@ initdb --locale=sv_SE
          <entry><literal>LATIN8</literal></entry>
          <entry>ISO 8859-14</entry>
          <entry>Celtic</entry>
+         <entry>Yes</entry>
          <entry>1</entry>
          <entry><literal>ISO885914</></entry>
         </row>
@@ -491,6 +512,7 @@ initdb --locale=sv_SE
          <entry><literal>LATIN9</literal></entry>
          <entry>ISO 8859-15</entry>
          <entry>LATIN1 with Euro and accents</entry>
+         <entry>Yes</entry>
          <entry>1</entry>
          <entry>ISO885915</entry>
         </row>
@@ -498,6 +520,7 @@ initdb --locale=sv_SE
          <entry><literal>LATIN10</literal></entry>
          <entry>ISO 8859-16, <acronym>ASRO</> SR 14111</entry>
          <entry>Romanian</entry>
+         <entry>Yes</entry>
          <entry>1</entry>
          <entry><literal>ISO885916</></entry>
         </row>
@@ -505,6 +528,7 @@ initdb --locale=sv_SE
          <entry><literal>MULE_INTERNAL</literal></entry>
          <entry>Mule internal code</entry>
          <entry>Multilingual Emacs</entry>
+         <entry>Yes</entry>
          <entry>1-4</entry>
          <entry></entry>
         </row>
@@ -512,6 +536,7 @@ initdb --locale=sv_SE
          <entry><literal>SJIS</literal></entry>
          <entry>Shift JIS</entry>
          <entry>Japanese</entry>
+         <entry>No</entry>
          <entry>1-2</entry>
          <entry><literal>Mskanji</>, <literal>ShiftJIS</>, <literal>WIN932</>, <literal>Windows932</></entry>
         </row>
@@ -519,6 +544,7 @@ initdb --locale=sv_SE
          <entry><literal>SQL_ASCII</literal></entry>
          <entry>unspecified (see text)</entry>
          <entry><emphasis>any</></entry>
+         <entry>Yes</entry>
          <entry>1</entry>
          <entry></entry>
         </row>
@@ -526,6 +552,7 @@ initdb --locale=sv_SE
          <entry><literal>UHC</literal></entry>
          <entry>Unified Hangul Code</entry>
          <entry>Korean</entry>
+         <entry>No</entry>
          <entry>1-2</entry>
          <entry><literal>WIN949</>, <literal>Windows949</></entry>
         </row>
@@ -533,6 +560,7 @@ initdb --locale=sv_SE
          <entry><literal>UTF8</literal></entry>
          <entry>Unicode, 8-bit</entry>
          <entry><emphasis>all</></entry>
+         <entry>Yes</entry>
          <entry>1-4</entry>
          <entry><literal>Unicode</></entry>
         </row>
@@ -540,6 +568,7 @@ initdb --locale=sv_SE
          <entry><literal>WIN866</literal></entry>
          <entry>Windows CP866</entry>
          <entry>Cyrillic</entry>
+         <entry>Yes</entry>
          <entry>1</entry>
          <entry><literal>ALT</></entry>
         </row>
@@ -547,6 +576,7 @@ initdb --locale=sv_SE
          <entry><literal>WIN874</literal></entry>
          <entry>Windows CP874</entry>
          <entry>Thai</entry>
+         <entry>Yes</entry>
          <entry>1</entry>
          <entry></entry>
         </row>
@@ -554,6 +584,7 @@ initdb --locale=sv_SE
          <entry><literal>WIN1250</literal></entry>
          <entry>Windows CP1250</entry>
          <entry>Central European</entry>
+         <entry>Yes</entry>
          <entry>1</entry>
          <entry></entry>
         </row>
@@ -561,6 +592,7 @@ initdb --locale=sv_SE
          <entry><literal>WIN1251</literal></entry>
          <entry>Windows CP1251</entry>
          <entry>Cyrillic</entry>
+         <entry>Yes</entry>
          <entry>1</entry>
          <entry><literal>WIN</></entry>
         </row>
@@ -568,48 +600,55 @@ initdb --locale=sv_SE
          <entry><literal>WIN1252</literal></entry>
          <entry>Windows CP1252</entry>
          <entry>Western European</entry>
+         <entry>Yes</entry>
          <entry>1</entry>
          <entry></entry>
         </row>
-   <row>
-    <entry><literal>WIN1253</literal></entry>
-    <entry>Windows CP1253</entry>
-    <entry>Greek</entry>
-    <entry>1</entry>
+        <row>
+         <entry><literal>WIN1253</literal></entry>
+         <entry>Windows CP1253</entry>
+         <entry>Greek</entry>
+         <entry>Yes</entry>
+         <entry>1</entry>
+         <entry></entry>
+        </row>
+        <row>
+         <entry><literal>WIN1254</literal></entry>
+         <entry>Windows CP1254</entry>
+         <entry>Turkish</entry>
+         <entry>Yes</entry>
+         <entry>1</entry>
+         <entry></entry>
+        </row>
+        <row>
+         <entry><literal>WIN1255</literal></entry>
+         <entry>Windows CP1255</entry>
+         <entry>Hebrew</entry>
+         <entry>Yes</entry>
+         <entry>1</entry>
          <entry></entry>
         </row>
-   <row>
-    <entry><literal>WIN1254</literal></entry>
-    <entry>Windows CP1254</entry>
-    <entry>Turkish</entry>
-    <entry>1</entry>
-    <entry></entry>
-   </row>
-   <row>
-    <entry><literal>WIN1255</literal></entry>
-    <entry>Windows CP1255</entry>
-    <entry>Hebrew</entry>
-    <entry>1</entry>
-    <entry></entry>
-   </row>
         <row>
          <entry><literal>WIN1256</literal></entry>
          <entry>Windows CP1256</entry>
          <entry>Arabic</entry>
+         <entry>Yes</entry>
+         <entry>1</entry>
+         <entry></entry>
+        </row>
+        <row>
+         <entry><literal>WIN1257</literal></entry>
+         <entry>Windows CP1257</entry>
+         <entry>Baltic</entry>
+         <entry>Yes</entry>
          <entry>1</entry>
          <entry></entry>
         </row>
-   <row>
-    <entry><literal>WIN1257</literal></entry>
-    <entry>Windows CP1257</entry>
-    <entry>Baltic</entry>
-    <entry>1</entry>
-    <entry></entry>
-   </row>
         <row>
          <entry><literal>WIN1258</literal></entry>
          <entry>Windows CP1258</entry>
          <entry>Vietnamese</entry>
+         <entry>Yes</entry>
          <entry>1</entry>
          <entry><literal>ABC</>, <literal>TCVN</>, <literal>TCVN5712</>, <literal>VSCII</></entry>
         </row>
@@ -731,12 +770,11 @@ $ <userinput>psql -l</userinput>
     <para>
      <productname>PostgreSQL</productname> supports automatic
      character set conversion between server and client for certain
-     character sets. The conversion information is stored in the
-     <literal>pg_conversion</> system catalog. You can create a new
-     conversion by using the SQL command <command>CREATE
-     CONVERSION</command>. <productname>PostgreSQL</> comes with some
-     predefined conversions. They are listed in <xref
-     linkend="multibyte-translation-table">.
+     character set combinations. The conversion information is stored in the
+     <literal>pg_conversion</> system catalog.  <productname>PostgreSQL</>
+     comes with some predefined conversions, as shown in <xref
+     linkend="multibyte-translation-table">. You can create a new
+     conversion using the SQL command <command>CREATE CONVERSION</command>.
     </para>
 
      <table id="multibyte-translation-table">
@@ -1081,7 +1119,8 @@ char *pg_encoding_to_char(int <replaceable>encoding_id</replaceable>);
 SET CLIENT_ENCODING TO '<replaceable>value</>';
 </programlisting>
 
-        Also you can use the more standard SQL syntax <literal>SET NAMES</literal> for this purpose:
+        Also you can use the standard SQL syntax <literal>SET NAMES</literal>
+        for this purpose:
 
 <programlisting>
 SET NAMES '<replaceable>value</>';
@@ -1130,10 +1169,8 @@ RESET client_encoding;
      If the conversion of a particular character is not possible
      &mdash; suppose you chose <literal>EUC_JP</literal> for the
      server and <literal>LATIN1</literal> for the client, then some
-     Japanese characters cannot be converted to
-     <literal>LATIN1</literal> &mdash; it is transformed to its
-     hexadecimal byte values in parentheses, e.g.,
-     <literal>(826C)</literal>.
+     Japanese characters do not have a representation in
+     <literal>LATIN1</literal> &mdash; then an error is reported.
     </para>
 
     <para>