Fix plpgsql's handling of -- comments following expressions.
authorTom Lane <tgl@sss.pgh.pa.us>
Wed, 10 Apr 2024 19:45:58 +0000 (15:45 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Wed, 10 Apr 2024 19:45:58 +0000 (15:45 -0400)
Up to now, read_sql_construct() has collected all the source text from
the statement or expression's initial token up to the character just
before the "until" token.  It normally tries to strip trailing
whitespace from that, largely for neatness.  If there was a "-- text"
comment after the expression, this resulted in removing the newline
that terminates the comment, which creates a hazard if we try to paste
the collected text into a larger SQL construct without inserting a
newline after it.  In particular this caused our handling of CASE
constructs to fail if there's a comment after a WHEN expression.

Commit 4adead1d2 noticed a similar problem with cursor arguments,
and worked around it through the rather crude hack of suppressing
the whitespace-trimming behavior for those.  Rather than do that
and leave the hazard open for future hackers to trip over, let's
fix it properly.  pl_scanner.c already has enough infrastructure
to report the end location of the expression's last token, so
we can copy up to that location and never collect any trailing
whitespace or comment to begin with.

Erik Wienhold and Tom Lane, per report from Michal Bartak.
Back-patch to all supported branches.

Discussion: https://postgr.es/m/CAAVzF_FjRoi8fOVuLCZhQJx6HATQ7MKm=aFOHWZODFnLmjX-xA@mail.gmail.com

src/pl/plpgsql/src/expected/plpgsql_control.out
src/pl/plpgsql/src/pl_gram.y
src/pl/plpgsql/src/pl_scanner.c
src/pl/plpgsql/src/plpgsql.h
src/pl/plpgsql/src/sql/plpgsql_control.sql
src/test/regress/expected/plpgsql.out
src/test/regress/sql/plpgsql.sql

index 328bd4858617d5321d359790fc53bf674bd5c106..ccd4f5470425c90478d9c6d580a08cc2773853c3 100644 (file)
@@ -681,3 +681,20 @@ select case_test(13);
  other
 (1 row)
 
+-- test line comment between WHEN and THEN
+create or replace function case_comment(int) returns text as $$
+begin
+  case $1
+    when 1 -- comment before THEN
+      then return 'one';
+    else
+      return 'other';
+  end case;
+end;
+$$ language plpgsql immutable;
+select case_comment(1);
+ case_comment 
+--------------
+ one
+(1 row)
+
index bef33d58a2f7e9ef53cb81140354a0f8d65b9ec5..a29d2dfacdd2029fbfd7416948433f18cba88c69 100644 (file)
@@ -66,7 +66,6 @@ static    PLpgSQL_expr    *read_sql_construct(int until,
                                            RawParseMode parsemode,
                                            bool isexpression,
                                            bool valid_sql,
-                                           bool trim,
                                            int *startloc,
                                            int *endtoken);
 static PLpgSQL_expr    *read_sql_expression(int until,
@@ -895,7 +894,7 @@ stmt_perform    : K_PERFORM
                         */
                        new->expr = read_sql_construct(';', 0, 0, ";",
                                                       RAW_PARSE_DEFAULT,
-                                                      false, false, true,
+                                                      false, false,
                                                       &startloc, NULL);
                        /* overwrite "perform" ... */
                        memcpy(new->expr->query, " SELECT", 7);
@@ -981,7 +980,7 @@ stmt_assign     : T_DATUM
                        plpgsql_push_back_token(T_DATUM);
                        new->expr = read_sql_construct(';', 0, 0, ";",
                                                       pmode,
-                                                      false, true, true,
+                                                      false, true,
                                                       NULL, NULL);
 
                        $$ = (PLpgSQL_stmt *) new;
@@ -1474,7 +1473,6 @@ for_control       : for_variable K_IN
                                                       RAW_PARSE_DEFAULT,
                                                       true,
                                                       false,
-                                                      true,
                                                       &expr1loc,
                                                       &tok);
 
@@ -1879,7 +1877,7 @@ stmt_raise        : K_RAISE
                                    expr = read_sql_construct(',', ';', K_USING,
                                                              ", or ; or USING",
                                                              RAW_PARSE_PLPGSQL_EXPR,
-                                                             true, true, true,
+                                                             true, true,
                                                              NULL, &tok);
                                    new->params = lappend(new->params, expr);
                                }
@@ -2016,7 +2014,7 @@ stmt_dynexecute : K_EXECUTE
                        expr = read_sql_construct(K_INTO, K_USING, ';',
                                                  "INTO or USING or ;",
                                                  RAW_PARSE_PLPGSQL_EXPR,
-                                                 true, true, true,
+                                                 true, true,
                                                  NULL, &endtoken);
 
                        new = palloc(sizeof(PLpgSQL_stmt_dynexecute));
@@ -2055,7 +2053,7 @@ stmt_dynexecute : K_EXECUTE
                                    expr = read_sql_construct(',', ';', K_INTO,
                                                              ", or ; or INTO",
                                                              RAW_PARSE_PLPGSQL_EXPR,
-                                                             true, true, true,
+                                                             true, true,
                                                              NULL, &endtoken);
                                    new->params = lappend(new->params, expr);
                                } while (endtoken == ',');
@@ -2640,7 +2638,7 @@ read_sql_expression(int until, const char *expected)
 {
    return read_sql_construct(until, 0, 0, expected,
                              RAW_PARSE_PLPGSQL_EXPR,
-                             true, true, true, NULL, NULL);
+                             true, true, NULL, NULL);
 }
 
 /* Convenience routine to read an expression with two possible terminators */
@@ -2650,7 +2648,7 @@ read_sql_expression2(int until, int until2, const char *expected,
 {
    return read_sql_construct(until, until2, 0, expected,
                              RAW_PARSE_PLPGSQL_EXPR,
-                             true, true, true, NULL, endtoken);
+                             true, true, NULL, endtoken);
 }
 
 /* Convenience routine to read a SQL statement that must end with ';' */
@@ -2659,7 +2657,7 @@ read_sql_stmt(void)
 {
    return read_sql_construct(';', 0, 0, ";",
                              RAW_PARSE_DEFAULT,
-                             false, true, true, NULL, NULL);
+                             false, true, NULL, NULL);
 }
 
 /*
@@ -2672,7 +2670,6 @@ read_sql_stmt(void)
  * parsemode:  raw_parser() mode to use
  * isexpression: whether to say we're reading an "expression" or a "statement"
  * valid_sql:   whether to check the syntax of the expr
- * trim:       trim trailing whitespace
  * startloc:   if not NULL, location of first token is stored at *startloc
  * endtoken:   if not NULL, ending token is stored at *endtoken
  *             (this is only interesting if until2 or until3 isn't zero)
@@ -2685,7 +2682,6 @@ read_sql_construct(int until,
                   RawParseMode parsemode,
                   bool isexpression,
                   bool valid_sql,
-                  bool trim,
                   int *startloc,
                   int *endtoken)
 {
@@ -2693,6 +2689,7 @@ read_sql_construct(int until,
    StringInfoData ds;
    IdentifierLookup save_IdentifierLookup;
    int         startlocation = -1;
+   int         endlocation = -1;
    int         parenlevel = 0;
    PLpgSQL_expr *expr;
 
@@ -2743,6 +2740,8 @@ read_sql_construct(int until,
                                expected),
                         parser_errposition(yylloc)));
        }
+       /* Remember end+1 location of last accepted token */
+       endlocation = yylloc + plpgsql_token_length();
    }
 
    plpgsql_IdentifierLookup = save_IdentifierLookup;
@@ -2753,7 +2752,7 @@ read_sql_construct(int until,
        *endtoken = tok;
 
    /* give helpful complaint about empty input */
-   if (startlocation >= yylloc)
+   if (startlocation >= endlocation)
    {
        if (isexpression)
            yyerror("missing expression");
@@ -2761,14 +2760,14 @@ read_sql_construct(int until,
            yyerror("missing SQL statement");
    }
 
-   plpgsql_append_source_text(&ds, startlocation, yylloc);
-
-   /* trim any trailing whitespace, for neatness */
-   if (trim)
-   {
-       while (ds.len > 0 && scanner_isspace(ds.data[ds.len - 1]))
-           ds.data[--ds.len] = '\0';
-   }
+   /*
+    * We save only the text from startlocation to endlocation-1.  This
+    * suppresses the "until" token as well as any whitespace or comments
+    * following the last accepted token.  (We used to strip such trailing
+    * whitespace by hand, but that causes problems if there's a "-- comment"
+    * in front of said whitespace.)
+    */
+   plpgsql_append_source_text(&ds, startlocation, endlocation);
 
    expr = palloc0(sizeof(PLpgSQL_expr));
    expr->query = pstrdup(ds.data);
@@ -3933,16 +3932,12 @@ read_cursor_args(PLpgSQL_var *cursor, int until)
         * Read the value expression. To provide the user with meaningful
         * parse error positions, we check the syntax immediately, instead of
         * checking the final expression that may have the arguments
-        * reordered. Trailing whitespace must not be trimmed, because
-        * otherwise input of the form (param -- comment\n, param) would be
-        * translated into a form where the second parameter is commented
-        * out.
+        * reordered.
         */
        item = read_sql_construct(',', ')', 0,
                                  ",\" or \")",
                                  RAW_PARSE_PLPGSQL_EXPR,
                                  true, true,
-                                 false, /* do not trim */
                                  NULL, &endtoken);
 
        argv[argpos] = item->query;
index 101804d5062d8f7cd734348b80df94be5ae9506e..9407da51efa6b7e26ee124b2719ae4d8902738cc 100644 (file)
@@ -184,6 +184,8 @@ plpgsql_yylex(void)
                            tok1 = T_DATUM;
                        else
                            tok1 = T_CWORD;
+                       /* Adjust token length to include A.B.C */
+                       aux1.leng = aux5.lloc - aux1.lloc + aux5.leng;
                    }
                    else
                    {
@@ -197,6 +199,8 @@ plpgsql_yylex(void)
                            tok1 = T_DATUM;
                        else
                            tok1 = T_CWORD;
+                       /* Adjust token length to include A.B */
+                       aux1.leng = aux3.lloc - aux1.lloc + aux3.leng;
                    }
                }
                else
@@ -210,6 +214,8 @@ plpgsql_yylex(void)
                        tok1 = T_DATUM;
                    else
                        tok1 = T_CWORD;
+                   /* Adjust token length to include A.B */
+                   aux1.leng = aux3.lloc - aux1.lloc + aux3.leng;
                }
            }
            else
@@ -298,6 +304,17 @@ plpgsql_yylex(void)
    return tok1;
 }
 
+/*
+ * Return the length of the token last returned by plpgsql_yylex().
+ *
+ * In the case of compound tokens, the length includes all the parts.
+ */
+int
+plpgsql_token_length(void)
+{
+   return plpgsql_yyleng;
+}
+
 /*
  * Internal yylex function.  This wraps the core lexer and adds one feature:
  * a token pushback stack.  We also make a couple of trivial single-token
index 40056bb851039914822a9b236a431379db9bd1c0..50c3b28472b54a6efa20f15b2b65391844b11d00 100644 (file)
@@ -1317,6 +1317,7 @@ extern void plpgsql_dumptree(PLpgSQL_function *func);
  */
 extern int plpgsql_base_yylex(void);
 extern int plpgsql_yylex(void);
+extern int plpgsql_token_length(void);
 extern void plpgsql_push_back_token(int token);
 extern bool plpgsql_token_is_unreserved_keyword(int token);
 extern void plpgsql_append_source_text(StringInfo buf,
index ed7231134f4df0c7df0c8c04652e8b45e7223d1a..8e007c51dcaeb242b196dc1eb07f97495040b79c 100644 (file)
@@ -486,3 +486,17 @@ select case_test(1);
 select case_test(2);
 select case_test(12);
 select case_test(13);
+
+-- test line comment between WHEN and THEN
+create or replace function case_comment(int) returns text as $$
+begin
+  case $1
+    when 1 -- comment before THEN
+      then return 'one';
+    else
+      return 'other';
+  end case;
+end;
+$$ language plpgsql immutable;
+
+select case_comment(1);
index 06372566762d874e57e67902d21ec7e75072fd8c..074af8f33a8e45207216a262dbff2f339a0a6fed 100644 (file)
@@ -2390,11 +2390,9 @@ select namedparmcursor_test7();
 ERROR:  division by zero
 CONTEXT:  SQL expression "42/0 AS p1, 77 AS p2"
 PL/pgSQL function namedparmcursor_test7() line 6 at OPEN
--- check that line comments work correctly within the argument list (there
--- is some special handling of this case in the code: the newline after the
--- comment must be preserved when the argument-evaluating query is
--- constructed, otherwise the comment effectively comments out the next
--- argument, too)
+-- check that line comments work correctly within the argument list
+-- (this used to require a special hack in the code; it no longer does,
+-- but let's keep the test anyway)
 create function namedparmcursor_test8() returns int4 as $$
 declare
   c1 cursor (p1 int, p2 int) for
index 9ca9449a5019847397c195635c76700a27dae9b4..18c91572ae14045b5425b3aa4e9f054bca104c21 100644 (file)
@@ -2047,11 +2047,9 @@ begin
 end $$ language plpgsql;
 select namedparmcursor_test7();
 
--- check that line comments work correctly within the argument list (there
--- is some special handling of this case in the code: the newline after the
--- comment must be preserved when the argument-evaluating query is
--- constructed, otherwise the comment effectively comments out the next
--- argument, too)
+-- check that line comments work correctly within the argument list
+-- (this used to require a special hack in the code; it no longer does,
+-- but let's keep the test anyway)
 create function namedparmcursor_test8() returns int4 as $$
 declare
   c1 cursor (p1 int, p2 int) for