Support non-ASCII letters in psql variable names.
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 26 Aug 2011 14:41:31 +0000 (10:41 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 26 Aug 2011 17:53:09 +0000 (13:53 -0400)
As in the backend, the implementation actually accepts any non-ASCII
character, but we only document that you can use letters.

doc/src/sgml/ref/psql-ref.sgml
src/bin/psql/command.c
src/bin/psql/psqlscan.l
src/bin/psql/variables.c
src/bin/psql/variables.h

index 2db4adff4b99aeadc808e67ba17cf9a64cb6c8b8..7e30c57c35bd0f3af34934c7d9da77b58fefb64c 100644 (file)
@@ -2206,7 +2206,7 @@ lo_import 152801
         </para>
 
         <para>
-        Valid variable names can contain characters, digits, and
+        Valid variable names can contain letters, digits, and
         underscores. See the section <xref
         linkend="APP-PSQL-variables"
         endterm="APP-PSQL-variables-title"> below for details.
@@ -2461,8 +2461,12 @@ lo_import 152801
     <application>psql</application> provides variable substitution
     features similar to common Unix command shells.
     Variables are simply name/value pairs, where the value
-    can be any string of any length. To set variables, use the
-    <application>psql</application> meta-command
+    can be any string of any length.  The name must consist of letters
+    (including non-Latin letters), digits, and underscores.
+    </para>
+
+    <para>
+    To set a variable, use the <application>psql</application> meta-command
     <command>\set</command>:
 <programlisting>
 testdb=&gt; <userinput>\set foo bar</userinput>
@@ -2498,16 +2502,15 @@ bar
     </para>
 
     <para>
-    <application>psql</application>'s internal variable names can
-    consist of letters, numbers, and underscores in any order and any
-    number of them. A number of these variables are treated specially
-    by <application>psql</application>. They indicate certain option
+    A number of these variables are treated specially
+    by <application>psql</application>. They represent certain option
     settings that can be changed at run time by altering the value of
-    the variable or that represent some state of the application. Although
-    you can use these variables for any other purpose, this is not
+    the variable, or in some cases represent changeable state of
+    <application>psql</application>. Although
+    you can use these variables for other purposes, this is not
     recommended, as the program behavior might grow really strange
-    really quickly. By convention, all specially treated variables
-    consist of all upper-case letters (and possibly numbers and
+    really quickly. By convention, all specially treated variables' names
+    consist of all upper-case ASCII letters (and possibly digits and
     underscores). To ensure maximum compatibility in the future, avoid
     using such variable names for your own purposes. A list of all specially
     treated variables follows.
index d6a925e435bf697019e508c25de5a46d655776be..6d9cd6492f68002a11c415e873e79f458f8b5b7b 100644 (file)
@@ -995,7 +995,7 @@ exec_command(const char *cmd,
 
                        if (!SetVariable(pset.vars, opt, result))
                        {
-                               psql_error("\\%s: error\n", cmd);
+                               psql_error("\\%s: error while setting variable\n", cmd);
                                success = false;
                        }
 
@@ -1096,7 +1096,7 @@ exec_command(const char *cmd,
 
                        if (!SetVariable(pset.vars, opt0, newval))
                        {
-                               psql_error("\\%s: error\n", cmd);
+                               psql_error("\\%s: error while setting variable\n", cmd);
                                success = false;
                        }
                        free(newval);
@@ -1272,7 +1272,7 @@ exec_command(const char *cmd,
                }
                else if (!SetVariable(pset.vars, opt, NULL))
                {
-                       psql_error("\\%s: error\n", cmd);
+                       psql_error("\\%s: error while setting variable\n", cmd);
                        success = false;
                }
                free(opt);
index 8439c865bfe401a82fb7fa93db7e4708d51a0cd0..1df8f3aa4f695a1d363d119bde9dc5a972f1fd7b 100644 (file)
@@ -120,6 +120,7 @@ static bool var_is_current_source(PsqlScanState state, const char *varname);
 static YY_BUFFER_STATE prepare_buffer(const char *txt, int len,
                                                                          char **txtcopy);
 static void emit(const char *txt, int len);
+static char *extract_substring(const char *txt, int len);
 static void escape_variable(bool as_ident);
 
 #define ECHO emit(yytext, yyleng)
@@ -384,6 +385,9 @@ realfail2           ({integer}|{decimal})[Ee][-+]
 
 param                  \${integer}
 
+/* psql-specific: characters allowed in variable names */
+variable_char  [A-Za-z\200-\377_0-9]
+
 other                  .
 
 /*
@@ -680,11 +684,12 @@ other                     .
                                        return LEXRES_BACKSLASH;
                                }
 
-:[A-Za-z0-9_]+ {
+:{variable_char}+      {
                                        /* Possible psql variable substitution */
-                                       const char *varname = yytext + 1;
+                                       char   *varname;
                                        const char *value;
 
+                                       varname = extract_substring(yytext + 1, yyleng - 1);
                                        value = GetVariable(pset.vars, varname);
 
                                        if (value)
@@ -713,13 +718,15 @@ other                     .
                                                 */
                                                ECHO;
                                        }
+
+                                       free(varname);
                                }
 
-:'[A-Za-z0-9_]+'       {
+:'{variable_char}+'    {
                                        escape_variable(false);
                                }
 
-:\"[A-Za-z0-9_]+\"     {
+:\"{variable_char}+\"  {
                                        escape_variable(true);
                                }
 
@@ -728,13 +735,13 @@ other                     .
         * two rules above fails to match completely.
         */
 
-:'[A-Za-z0-9_]*        {
+:'{variable_char}*     {
                                        /* Throw back everything but the colon */
                                        yyless(1);
                                        ECHO;
                                }
 
-:\"[A-Za-z0-9_]*       {
+:\"{variable_char}*    {
                                        /* Throw back everything but the colon */
                                        yyless(1);
                                        ECHO;
@@ -930,15 +937,18 @@ other                     .
                                        }
                                }
 
-:[A-Za-z0-9_]+ {
+:{variable_char}+      {
                                        /* Possible psql variable substitution */
                                        if (option_type == OT_VERBATIM)
                                                ECHO;
                                        else
                                        {
+                                               char   *varname;
                                                const char *value;
 
-                                               value = GetVariable(pset.vars, yytext + 1);
+                                               varname = extract_substring(yytext + 1, yyleng - 1);
+                                               value = GetVariable(pset.vars, varname);
+                                               free(varname);
 
                                                /*
                                                 * The variable value is just emitted without any
@@ -956,7 +966,7 @@ other                       .
                                        return LEXRES_OK;
                                }
 
-:'[A-Za-z0-9_]+'       {
+:'{variable_char}+'    {
                                        if (option_type == OT_VERBATIM)
                                                ECHO;
                                        else
@@ -967,7 +977,7 @@ other                       .
                                }
 
 
-:\"[A-Za-z0-9_]+\"     {
+:\"{variable_char}+\"  {
                                        if (option_type == OT_VERBATIM)
                                                ECHO;
                                        else
@@ -977,14 +987,14 @@ other                     .
                                        }
                                }
 
-:'[A-Za-z0-9_]*        {
+:'{variable_char}*     {
                                        /* Throw back everything but the colon */
                                        yyless(1);
                                        ECHO;
                                        BEGIN(xslashdefaultarg);
                                }
 
-:\"[A-Za-z0-9_]*       {
+:\"{variable_char}*    {
                                        /* Throw back everything but the colon */
                                        yyless(1);
                                        ECHO;
@@ -1844,16 +1854,58 @@ emit(const char *txt, int len)
        }
 }
 
+/*
+ * extract_substring --- fetch the true value of (part of) the current token
+ *
+ * This is like emit(), except that the data is returned as a malloc'd string
+ * rather than being pushed directly to output_buf.
+ */
+static char *
+extract_substring(const char *txt, int len)
+{
+       char       *result = (char *) pg_malloc(len + 1);
+
+       if (cur_state->safe_encoding)
+               memcpy(result, txt, len);
+       else
+       {
+               /* Gotta do it the hard way */
+               const char *reference = cur_state->refline;
+               int             i;
+
+               reference += (txt - cur_state->curline);
+
+               for (i = 0; i < len; i++)
+               {
+                       char    ch = txt[i];
+
+                       if (ch == (char) 0xFF)
+                               ch = reference[i];
+                       result[i] = ch;
+               }
+       }
+       result[len] = '\0';
+       return result;
+}
+
+/*
+ * escape_variable --- process :'VARIABLE' or :"VARIABLE"
+ *
+ * If the variable name is found, escape its value using the appropriate
+ * quoting method and emit the value to output_buf.  (Since the result is
+ * surely quoted, there is never any reason to rescan it.)  If we don't
+ * find the variable or the escaping function fails, emit the token as-is.
+ */
 static void
 escape_variable(bool as_ident)
 {
-       char            saved_char;
+       char       *varname;
        const char *value;
 
        /* Variable lookup. */
-       saved_char = yytext[yyleng - 1];
-       yytext[yyleng - 1] = '\0';
-       value = GetVariable(pset.vars, yytext + 2);
+       varname = extract_substring(yytext + 2, yyleng - 3);
+       value = GetVariable(pset.vars, varname);
+       free(varname);
 
        /* Escaping. */
        if (value)
@@ -1870,9 +1922,11 @@ escape_variable(bool as_ident)
                        else
                                escaped_value =
                                        PQescapeLiteral(pset.db, value, strlen(value));
+
                        if (escaped_value == NULL)
                        {
                                const char *error = PQerrorMessage(pset.db);
+
                                psql_error("%s", error);
                        }
                        else
@@ -1888,6 +1942,5 @@ escape_variable(bool as_ident)
         * If we reach this point, some kind of error has occurred.  Emit the
         * original text into the output buffer.
         */
-       yytext[yyleng - 1] = saved_char;
        emit(yytext, yyleng);
 }
index 382528933774416d5386df6b2c2553ee38172c1f..a43c786bf02dd7f0593b80c3999f63b1f1123f33 100644 (file)
@@ -6,10 +6,40 @@
  * src/bin/psql/variables.c
  */
 #include "postgres_fe.h"
+
 #include "common.h"
 #include "variables.h"
 
 
+/*
+ * Check whether a variable's name is allowed.
+ *
+ * We allow any non-ASCII character, as well as ASCII letters, digits, and
+ * underscore.  Keep this in sync with the definition of variable_char in
+ * psqlscan.l.
+ */
+static bool
+valid_variable_name(const char *name)
+{
+       const unsigned char *ptr = (const unsigned char *) name;
+
+       /* Mustn't be zero-length */
+       if (*ptr == '\0')
+               return false;
+
+       while (*ptr)
+       {
+               if (IS_HIGHBIT_SET(*ptr) ||
+                       strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz"
+                                  "_0123456789", *ptr) != NULL)
+                       ptr++;
+               else
+                       return false;
+       }
+
+       return true;
+}
+
 /*
  * A "variable space" is represented by an otherwise-unused struct _variable
  * that serves as list header.
@@ -158,7 +188,7 @@ SetVariable(VariableSpace space, const char *name, const char *value)
        if (!space)
                return false;
 
-       if (strspn(name, VALID_VARIABLE_CHARS) != strlen(name))
+       if (!valid_variable_name(name))
                return false;
 
        if (!value)
@@ -202,7 +232,7 @@ SetVariableAssignHook(VariableSpace space, const char *name, VariableAssignHook
        if (!space)
                return false;
 
-       if (strspn(name, VALID_VARIABLE_CHARS) != strlen(name))
+       if (!valid_variable_name(name))
                return false;
 
        for (previous = space, current = space->next;
index 4197069b4b76161f2f2d4b41e1630ee30feef8c6..865391dba7b60ba31dfdb7339639ca01a0b4ebcd 100644 (file)
@@ -32,10 +32,6 @@ struct _variable
 
 typedef struct _variable *VariableSpace;
 
-/* Allowed chars in a variable's name */
-#define VALID_VARIABLE_CHARS "abcdefghijklmnopqrstuvwxyz"\
-                                                        "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789_"
-
 VariableSpace CreateVariableSpace(void);
 const char *GetVariable(VariableSpace space, const char *name);