Handle \v as a whitespace character in parsers

author Michael Paquier <michael@paquier.xyz>

Wed, 5 Jul 2023 23:16:24 +0000 (08:16 +0900)

committer Michael Paquier <michael@paquier.xyz>

Wed, 5 Jul 2023 23:16:24 +0000 (08:16 +0900)
author Michael Paquier <michael@paquier.xyz>
Wed, 5 Jul 2023 23:16:24 +0000 (08:16 +0900)
committer Michael Paquier <michael@paquier.xyz>
Wed, 5 Jul 2023 23:16:24 +0000 (08:16 +0900)
diff --git a/contrib/cube/cubescan.l b/contrib/cube/cubescan.l

index 49cb69921653f968c90fcd20e9918b5192b4c62c..a30fbfc3111a25c2493931bc81d9dc7fbf2e7b17 100644 (file)
--- a/contrib/cube/cubescan.l
+++ b/contrib/cube/cubescan.l
@@ -63,7 +63,7 @@ NaN          [nN][aA][nN]
  \(           cube_yylval = "("; return O_PAREN;
  \)           cube_yylval = ")"; return C_PAREN;
  \,           cube_yylval = ","; return COMMA;
-[ \t\n\r\f]+ /* discard spaces */
+[ \t\n\r\f\v]+ /* discard spaces */
  .            return yytext[0]; /* alert parser of the garbage */
  
  %%
diff --git a/contrib/hstore/expected/hstore_utf8.out b/contrib/hstore/expected/hstore_utf8.out

index 4405824413218bda562a22ad046f6b7e8c785b2e..bbc885a181a373f92912088fb6044977f0b42fe4 100644 (file)
--- a/contrib/hstore/expected/hstore_utf8.out
+++ b/contrib/hstore/expected/hstore_utf8.out
@@ -34,3 +34,34 @@ SELECT 'keyąfoo=>valueą'::hstore;
   "keyąfoo"=>"valueą"
  (1 row)
  
+-- More patterns that may depend on isspace() and locales, all discarded.
+SELECT E'key\u000A=>value\u000A'::hstore; -- \n
+     hstore     
+----------------
+ "key"=>"value"
+(1 row)
+
+SELECT E'key\u0009=>value\u0009'::hstore; -- \t
+     hstore     
+----------------
+ "key"=>"value"
+(1 row)
+
+SELECT E'key\u000D=>value\u000D'::hstore; -- \r
+     hstore     
+----------------
+ "key"=>"value"
+(1 row)
+
+SELECT E'key\u000B=>value\u000B'::hstore; -- \v
+     hstore     
+----------------
+ "key"=>"value"
+(1 row)
+
+SELECT E'key\u000C=>value\u000C'::hstore; -- \f
+     hstore     
+----------------
+ "key"=>"value"
+(1 row)
+
diff --git a/contrib/hstore/sql/hstore_utf8.sql b/contrib/hstore/sql/hstore_utf8.sql

index face878324c37deb26ac006514c684e2684e9baf..38c9481ee649307eb13649896188f0b667859016 100644 (file)
--- a/contrib/hstore/sql/hstore_utf8.sql
+++ b/contrib/hstore/sql/hstore_utf8.sql
@@ -17,3 +17,10 @@ SELECT E'key\u0105=>value\u0105'::hstore;
  SELECT 'keyą=>valueą'::hstore;
  SELECT 'ą=>ą'::hstore;
  SELECT 'keyąfoo=>valueą'::hstore;
+
+-- More patterns that may depend on isspace() and locales, all discarded.
+SELECT E'key\u000A=>value\u000A'::hstore; -- \n
+SELECT E'key\u0009=>value\u0009'::hstore; -- \t
+SELECT E'key\u000D=>value\u000D'::hstore; -- \r
+SELECT E'key\u000B=>value\u000B'::hstore; -- \v
+SELECT E'key\u000C=>value\u000C'::hstore; -- \f
diff --git a/contrib/seg/segscan.l b/contrib/seg/segscan.l

index a1e9e9937ef3232777a084242ccda987a6f32e64..4ad529eccc468a016bb9566d3b10bfafa6c3f892 100644 (file)
--- a/contrib/seg/segscan.l
+++ b/contrib/seg/segscan.l
@@ -59,7 +59,7 @@ float        ({integer}|{real})([eE]{integer})?
  \<           seg_yylval.text = "<"; return EXTENSION;
  \>           seg_yylval.text = ">"; return EXTENSION;
  \~           seg_yylval.text = "~"; return EXTENSION;
-[ \t\n\r\f]+ /* discard spaces */
+[ \t\n\r\f\v]+ /* discard spaces */
  .            return yytext[0]; /* alert parser of the garbage */
  
  %%
diff --git a/src/backend/parser/parse_type.c b/src/backend/parser/parse_type.c

index be75dc6ab07e2d353f10b9cd9089cd13325a1f4d..63b4e969624b755426fc69155318e6a76ba8d4d4 100644 (file)
--- a/src/backend/parser/parse_type.c
+++ b/src/backend/parser/parse_type.c
@@ -742,7 +742,7 @@ typeStringToTypeName(const char *str, Node *escontext)
         ErrorContextCallback ptserrcontext;
  
         /* make sure we give useful error for empty input */
-       if (strspn(str, " \t\n\r\f") == strlen(str))
+       if (strspn(str, " \t\n\r\f\v") == strlen(str))
                 goto fail;
  
         /*
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l

index b2216a9eacd101e067f48ad5df679c55edf4c7b5..0708ba65405309250613b53e38e7712fa97b286f 100644 (file)
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -213,16 +213,16 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner);
   * versions of Postgres failed to recognize -- as a comment if the input
   * did not end with a newline.
   *
- * XXX perhaps \f (formfeed) should be treated as a newline as well?
+ * non_newline_space tracks all the other space characters except newlines.
   *
   * XXX if you change the set of whitespace characters, fix scanner_isspace()
   * to agree.
   */
  
-space                  [ \t\n\r\f]
-horiz_space            [ \t\f]
-newline                        [\n\r]
-non_newline            [^\n\r]
+space                          [ \t\n\r\f\v]
+non_newline_space      [ \t\f\v]
+newline                                [\n\r]
+non_newline                    [^\n\r]
  
  comment                        ("--"{non_newline}*)
  
@@ -236,8 +236,8 @@ whitespace          ({space}+|{comment})
   */
  
  special_whitespace             ({space}+|{comment}{newline})
-horiz_whitespace               ({horiz_space}|{comment})
-whitespace_with_newline        ({horiz_whitespace}*{newline}{special_whitespace}*)
+non_newline_whitespace ({non_newline_space}|{comment})
+whitespace_with_newline        ({non_newline_whitespace}*{newline}{special_whitespace}*)
  
  quote                  '
  /* If we see {quote} then {quotecontinue}, the quoted string continues */
@@ -1414,6 +1414,8 @@ unescape_single_char(unsigned char c, core_yyscan_t yyscanner)
                         return '\r';
                 case 't':
                         return '\t';
+               case 'v':
+                       return '\v';
                 default:
                         /* check for backslash followed by non-7-bit-ASCII */
                         if (c == '\0' || IS_HIGHBIT_SET(c))
diff --git a/src/backend/parser/scansup.c b/src/backend/parser/scansup.c

index ed67f5f5fe25de8da081733805fa9794a222e589..4f0005a114e434f8e6ad15925da499eda486be69 100644 (file)
--- a/src/backend/parser/scansup.c
+++ b/src/backend/parser/scansup.c
@@ -121,6 +121,7 @@ scanner_isspace(char ch)
                 ch == '\t' ||
                 ch == '\n' ||
                 ch == '\r' ||
+               ch == '\v' ||
                 ch == '\f')
                 return true;
         return false;
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l

index cb467ca46f7f0cf08b8430bb7bd1630de801b1f8..1cc7fb858cd581acf018214fbdfacf568faee285 100644 (file)
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -73,7 +73,7 @@ static void addlitchar(unsigned char ychar);
  %x xd
  %x xq
  
-space                  [ \t\n\r\f]
+space                  [ \t\n\r\f\v]
  
  quote                  '
  quotestop              {quote}
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c

index 9000f83a8366d5c8943e71e10bad540fefdcfcef..4359dbd83df34f7c9f49f7ad7b37a2df5892cea0 100644 (file)
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -24,6 +24,7 @@
  #include "nodes/nodeFuncs.h"
  #include "nodes/supportnodes.h"
  #include "optimizer/optimizer.h"
+#include "parser/scansup.h"
  #include "port/pg_bitutils.h"
  #include "utils/array.h"
  #include "utils/arrayaccess.h"
@@ -89,7 +90,6 @@ typedef struct ArrayIteratorData
         int                     current_item;   /* the item # we're at in the array */
  }                      ArrayIteratorData;
  
-static bool array_isspace(char ch);
  static int     ArrayCount(const char *str, int *dim, char typdelim,
                                            Node *escontext);
  static bool ReadArrayStr(char *arrayStr, const char *origStr,
@@ -254,7 +254,7 @@ array_in(PG_FUNCTION_ARGS)
                  * Note: we currently allow whitespace between, but not within,
                  * dimension items.
                  */
-               while (array_isspace(*p))
+               while (scanner_isspace(*p))
                         p++;
                 if (*p != '[')
                         break;                          /* no more dimension items */
@@ -338,7 +338,7 @@ array_in(PG_FUNCTION_ARGS)
                                          errdetail("Missing \"%s\" after array dimensions.",
                                                            ASSGN)));
                 p += strlen(ASSGN);
-               while (array_isspace(*p))
+               while (scanner_isspace(*p))
                         p++;
  
                 /*
@@ -434,27 +434,6 @@ array_in(PG_FUNCTION_ARGS)
         PG_RETURN_ARRAYTYPE_P(retval);
  }
  
-/*
- * array_isspace() --- a non-locale-dependent isspace()
- *
- * We used to use isspace() for parsing array values, but that has
- * undesirable results: an array value might be silently interpreted
- * differently depending on the locale setting.  Now we just hard-wire
- * the traditional ASCII definition of isspace().
- */
-static bool
-array_isspace(char ch)
-{
-       if (ch == ' ' ||
-               ch == '\t' ||
-               ch == '\n' ||
-               ch == '\r' ||
-               ch == '\v' ||
-               ch == '\f')
-               return true;
-       return false;
-}
-
  /*
   * ArrayCount
   *      Determines the dimensions for an array string.
@@ -654,7 +633,7 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
                                                         itemdone = true;
                                                         nelems[nest_level - 1]++;
                                                 }
-                                               else if (!array_isspace(*ptr))
+                                               else if (!scanner_isspace(*ptr))
                                                 {
                                                         /*
                                                          * Other non-space characters must be after a
@@ -684,7 +663,7 @@ ArrayCount(const char *str, int *dim, char typdelim, Node *escontext)
         /* only whitespace is allowed after the closing brace */
         while (*ptr)
         {
-               if (!array_isspace(*ptr++))
+               if (!scanner_isspace(*ptr++))
                         ereturn(escontext, -1,
                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                          errmsg("malformed array literal: \"%s\"", str),
@@ -884,7 +863,7 @@ ReadArrayStr(char *arrayStr,
                                                 indx[ndim - 1]++;
                                                 srcptr++;
                                         }
-                                       else if (array_isspace(*srcptr))
+                                       else if (scanner_isspace(*srcptr))
                                         {
                                                 /*
                                                  * If leading space, drop it immediately.  Else, copy
@@ -1176,7 +1155,7 @@ array_out(PG_FUNCTION_ARGS)
                                         overall_length += 1;
                                 }
                                 else if (ch == '{' || ch == '}' || ch == typdelim ||
-                                                array_isspace(ch))
+                                                scanner_isspace(ch))
                                         needquote = true;
                         }
                 }
diff --git a/src/bin/psql/psqlscanslash.l b/src/bin/psql/psqlscanslash.l

index 5c020f30b905436f2749245fbfcfb273f0d09b42..1461fa3d3e6537cf807ea8b5593bfbf8f96b74e9 100644 (file)
--- a/src/bin/psql/psqlscanslash.l
+++ b/src/bin/psql/psqlscanslash.l
@@ -108,7 +108,7 @@ extern void slash_yyset_column(int column_no, yyscan_t yyscanner);
  /*
   * Assorted character class definitions that should match psqlscan.l.
   */
-space                  [ \t\n\r\f]
+space                  [ \t\n\r\f\v]
  quote                  '
  xeoctesc               [\\][0-7]{1,3}
  xehexesc               [\\]x[0-9A-Fa-f]{1,2}
diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l

index 84754aca4a99b012b916ac3613a9b51e2fd1eba5..5dc6fc2fb9e52f88868ee203e31992816a7e792c 100644 (file)
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@@ -149,16 +149,16 @@ extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
   * versions of Postgres failed to recognize -- as a comment if the input
   * did not end with a newline.
   *
- * XXX perhaps \f (formfeed) should be treated as a newline as well?
+ * non_newline_space tracks all space characters except newlines.
   *
   * XXX if you change the set of whitespace characters, fix scanner_isspace()
   * to agree.
   */
  
-space                  [ \t\n\r\f]
-horiz_space            [ \t\f]
-newline                        [\n\r]
-non_newline            [^\n\r]
+space                          [ \t\n\r\f\v]
+non_newline_space      [ \t\f\v]
+newline                                [\n\r]
+non_newline                    [^\n\r]
  
  comment                        ("--"{non_newline}*)
  
@@ -172,8 +172,8 @@ whitespace          ({space}+|{comment})
   */
  
  special_whitespace             ({space}+|{comment}{newline})
-horiz_whitespace               ({horiz_space}|{comment})
-whitespace_with_newline        ({horiz_whitespace}*{newline}{special_whitespace}*)
+non_newline_whitespace ({non_newline_space}|{comment})
+whitespace_with_newline        ({non_newline_whitespace}*{newline}{special_whitespace}*)
  
  quote                  '
  /* If we see {quote} then {quotecontinue}, the quoted string continues */
diff --git a/src/fe_utils/string_utils.c b/src/fe_utils/string_utils.c

index 0429a72bfe245054cca6f7e40bc42bce93c30aa9..58b21c4d6a8b0b2787d59da2f4ef85aeddd33597 100644 (file)
--- a/src/fe_utils/string_utils.c
+++ b/src/fe_utils/string_utils.c
@@ -761,7 +761,7 @@ appendPGArray(PQExpBuffer buffer, const char *value)
  
                         if (ch == '"' || ch == '\\' ||
                                 ch == '{' || ch == '}' || ch == ',' ||
-                       /* these match array_isspace(): */
+                       /* these match scanner_isspace(): */
                                 ch == ' ' || ch == '\t' || ch == '\n' ||
                                 ch == '\r' || ch == '\v' || ch == '\f')
                         {
diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l

index dcd567e8c3a1d6b5acbc0f3e98c31860de0a99d2..77bdf4f82ff594f06f9cd5c97fe2bcac197d9859 100644 (file)
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@@ -180,16 +180,16 @@ static struct _if_value
   * versions of Postgres failed to recognize -- as a comment if the input
   * did not end with a newline.
   *
- * XXX perhaps \f (formfeed) should be treated as a newline as well?
+ * non_newline_space tracks all space characters except newlines.
   *
   * XXX if you change the set of whitespace characters, fix ecpg_isspace()
   * to agree.
   */
  
-space                  [ \t\n\r\f]
-horiz_space            [ \t\f]
-newline                        [\n\r]
-non_newline            [^\n\r]
+space                          [ \t\n\r\f\v]
+non_newline_space      [ \t\f\v]
+newline                                [\n\r]
+non_newline                    [^\n\r]
  
  comment                        ("--"{non_newline}*)
  
@@ -202,8 +202,8 @@ whitespace          ({space}+|{comment})
   * it, whereas {whitespace} should generally have a * after it...
   */
  
-horiz_whitespace               ({horiz_space}|{comment})
-whitespace_with_newline        ({horiz_whitespace}*{newline}{whitespace}*)
+non_newline_whitespace ({non_newline_space}|{comment})
+whitespace_with_newline        ({non_newline_whitespace}*{newline}{whitespace}*)
  
  quote                  '
  /* If we see {quote} then {quotecontinue}, the quoted string continues */
@@ -1721,7 +1721,8 @@ ecpg_isspace(char ch)
                 ch == '\t' ||
                 ch == '\n' ||
                 ch == '\r' ||
-               ch == '\f')
+               ch == '\f' ||
+               ch == '\v')
                 return true;
         return false;
  }
author	Michael Paquier <michael@paquier.xyz>
	Wed, 5 Jul 2023 23:16:24 +0000 (08:16 +0900)
committer	Michael Paquier <michael@paquier.xyz>
	Wed, 5 Jul 2023 23:16:24 +0000 (08:16 +0900)
contrib/cube/cubescan.l		patch \| blob \| blame \| history
contrib/hstore/expected/hstore_utf8.out		patch \| blob \| blame \| history
contrib/hstore/sql/hstore_utf8.sql		patch \| blob \| blame \| history
contrib/seg/segscan.l		patch \| blob \| blame \| history
src/backend/parser/parse_type.c		patch \| blob \| blame \| history
src/backend/parser/scan.l		patch \| blob \| blame \| history
src/backend/parser/scansup.c		patch \| blob \| blame \| history
src/backend/replication/repl_scanner.l		patch \| blob \| blame \| history
src/backend/utils/adt/arrayfuncs.c		patch \| blob \| blame \| history
src/bin/psql/psqlscanslash.l		patch \| blob \| blame \| history
src/fe_utils/psqlscan.l		patch \| blob \| blame \| history
src/fe_utils/string_utils.c		patch \| blob \| blame \| history
src/interfaces/ecpg/preproc/pgc.l		patch \| blob \| blame \| history