Fix list-munging bug that broke SQL function result coercions.
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 19 Oct 2020 18:33:01 +0000 (14:33 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 19 Oct 2020 18:33:09 +0000 (14:33 -0400)
Since commit 913bbd88d, check_sql_fn_retval() can either insert type
coercion steps in-line in the Query that produces the SQL function's
results, or generate a new top-level Query to perform the coercions,
if modifying the Query's output in-place wouldn't be safe.  However,
it appears that the latter case has never actually worked, because
the code tried to inject the new Query back into the query list it was
passed ... which is not the list that will be used for later processing
when we execute the SQL function "normally" (without inlining it).
So we ended up with no coercion happening at run-time, leading to
wrong results or crashes depending on the datatypes involved.

While the regression tests look like they cover this area well enough,
through a huge bit of bad luck all the test cases that exercise the
separate-Query path were checking either inline-able cases (which
accidentally didn't have the bug) or cases that are no-ops at runtime
(e.g., varchar to text), so that the failure to perform the coercion
wasn't obvious.  The fact that the cases that don't work weren't
allowed at all before v13 probably contributed to not noticing the
problem sooner, too.

To fix, get rid of the separate "flat" list of Query nodes and instead
pass the real two-level list that is going to be used later.  I chose
to make the same change in check_sql_fn_statements(), although that has
no actual bug, just so that we don't need that data structure at all.

This is an API change, as evidenced by the adjustments needed to
callers outside functions.c.  That's a bit scary to be doing in a
released branch, but so far as I can tell from a quick search,
there are no outside callers of these functions (and they are
sufficiently specific to our semantics for SQL-language functions that
it's not apparent why any extension would need to call them).  In any
case, v13 already changed the API of check_sql_fn_retval() compared to
prior branches.

Per report from pinker.  Back-patch to v13 where this code came in.

Discussion: https://postgr.es/m/1603050466566-0.post@n3.nabble.com

src/backend/catalog/pg_proc.c
src/backend/executor/functions.c
src/backend/optimizer/util/clauses.c
src/include/executor/functions.h
src/test/regress/expected/rangefuncs.out
src/test/regress/sql/rangefuncs.sql

index f7dab9925b9e2138f49d4c80c039f9763e010481..1dd9ecc0634aa4e46cbe431eadd29087d625c4f4 100644 (file)
@@ -913,8 +913,8 @@ fmgr_sql_validator(PG_FUNCTION_ARGS)
                                                                  (ParserSetupHook) sql_fn_parser_setup,
                                                                  pinfo,
                                                                  NULL);
-               querytree_list = list_concat(querytree_list,
-                                            querytree_sublist);
+               querytree_list = lappend(querytree_list,
+                                        querytree_sublist);
            }
 
            check_sql_fn_statements(querytree_list);
index bf00a9c1e8da2ba2c201152534b05846ac85be48..459a33375b14dcea4b829ce23af88096fd3e5834 100644 (file)
@@ -609,7 +609,6 @@ init_sql_fcache(FunctionCallInfo fcinfo, Oid collation, bool lazyEvalOK)
    SQLFunctionCachePtr fcache;
    List       *raw_parsetree_list;
    List       *queryTree_list;
-   List       *flat_query_list;
    List       *resulttlist;
    ListCell   *lc;
    Datum       tmp;
@@ -689,13 +688,7 @@ init_sql_fcache(FunctionCallInfo fcinfo, Oid collation, bool lazyEvalOK)
 
    /*
     * Parse and rewrite the queries in the function text.  Use sublists to
-    * keep track of the original query boundaries.  But we also build a
-    * "flat" list of the rewritten queries to pass to check_sql_fn_retval.
-    * This is because the last canSetTag query determines the result type
-    * independently of query boundaries --- and it might not be in the last
-    * sublist, for example if the last query rewrites to DO INSTEAD NOTHING.
-    * (It might not be unreasonable to throw an error in such a case, but
-    * this is the historical behavior and it doesn't seem worth changing.)
+    * keep track of the original query boundaries.
     *
     * Note: since parsing and planning is done in fcontext, we will generate
     * a lot of cruft that lives as long as the fcache does.  This is annoying
@@ -705,7 +698,6 @@ init_sql_fcache(FunctionCallInfo fcinfo, Oid collation, bool lazyEvalOK)
    raw_parsetree_list = pg_parse_query(fcache->src);
 
    queryTree_list = NIL;
-   flat_query_list = NIL;
    foreach(lc, raw_parsetree_list)
    {
        RawStmt    *parsetree = lfirst_node(RawStmt, lc);
@@ -717,10 +709,12 @@ init_sql_fcache(FunctionCallInfo fcinfo, Oid collation, bool lazyEvalOK)
                                                          fcache->pinfo,
                                                          NULL);
        queryTree_list = lappend(queryTree_list, queryTree_sublist);
-       flat_query_list = list_concat(flat_query_list, queryTree_sublist);
    }
 
-   check_sql_fn_statements(flat_query_list);
+   /*
+    * Check that there are no statements we don't want to allow.
+    */
+   check_sql_fn_statements(queryTree_list);
 
    /*
     * Check that the function returns the type it claims to.  Although in
@@ -740,7 +734,7 @@ init_sql_fcache(FunctionCallInfo fcinfo, Oid collation, bool lazyEvalOK)
     * the rowtype column into multiple columns, since we have no way to
     * notify the caller that it should do that.)
     */
-   fcache->returnsTuple = check_sql_fn_retval(flat_query_list,
+   fcache->returnsTuple = check_sql_fn_retval(queryTree_list,
                                               rettype,
                                               rettupdesc,
                                               false,
@@ -1510,51 +1504,63 @@ ShutdownSQLFunction(Datum arg)
  * is not acceptable.
  */
 void
-check_sql_fn_statements(List *queryTreeList)
+check_sql_fn_statements(List *queryTreeLists)
 {
    ListCell   *lc;
 
-   foreach(lc, queryTreeList)
+   /* We are given a list of sublists of Queries */
+   foreach(lc, queryTreeLists)
    {
-       Query      *query = lfirst_node(Query, lc);
+       List       *sublist = lfirst_node(List, lc);
+       ListCell   *lc2;
 
-       /*
-        * Disallow procedures with output arguments.  The current
-        * implementation would just throw the output values away, unless the
-        * statement is the last one.  Per SQL standard, we should assign the
-        * output values by name.  By disallowing this here, we preserve an
-        * opportunity for future improvement.
-        */
-       if (query->commandType == CMD_UTILITY &&
-           IsA(query->utilityStmt, CallStmt))
+       foreach(lc2, sublist)
        {
-           CallStmt   *stmt = castNode(CallStmt, query->utilityStmt);
-           HeapTuple   tuple;
-           int         numargs;
-           Oid        *argtypes;
-           char      **argnames;
-           char       *argmodes;
-           int         i;
-
-           tuple = SearchSysCache1(PROCOID, ObjectIdGetDatum(stmt->funcexpr->funcid));
-           if (!HeapTupleIsValid(tuple))
-               elog(ERROR, "cache lookup failed for function %u", stmt->funcexpr->funcid);
-           numargs = get_func_arg_info(tuple, &argtypes, &argnames, &argmodes);
-           ReleaseSysCache(tuple);
-
-           for (i = 0; i < numargs; i++)
+           Query      *query = lfirst_node(Query, lc2);
+
+           /*
+            * Disallow procedures with output arguments.  The current
+            * implementation would just throw the output values away, unless
+            * the statement is the last one.  Per SQL standard, we should
+            * assign the output values by name.  By disallowing this here, we
+            * preserve an opportunity for future improvement.
+            */
+           if (query->commandType == CMD_UTILITY &&
+               IsA(query->utilityStmt, CallStmt))
            {
-               if (argmodes && (argmodes[i] == PROARGMODE_INOUT || argmodes[i] == PROARGMODE_OUT))
-                   ereport(ERROR,
-                           (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                            errmsg("calling procedures with output arguments is not supported in SQL functions")));
+               CallStmt   *stmt = castNode(CallStmt, query->utilityStmt);
+               HeapTuple   tuple;
+               int         numargs;
+               Oid        *argtypes;
+               char      **argnames;
+               char       *argmodes;
+               int         i;
+
+               tuple = SearchSysCache1(PROCOID,
+                                       ObjectIdGetDatum(stmt->funcexpr->funcid));
+               if (!HeapTupleIsValid(tuple))
+                   elog(ERROR, "cache lookup failed for function %u",
+                        stmt->funcexpr->funcid);
+               numargs = get_func_arg_info(tuple,
+                                           &argtypes, &argnames, &argmodes);
+               ReleaseSysCache(tuple);
+
+               for (i = 0; i < numargs; i++)
+               {
+                   if (argmodes && (argmodes[i] == PROARGMODE_INOUT ||
+                                    argmodes[i] == PROARGMODE_OUT))
+                       ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                errmsg("calling procedures with output arguments is not supported in SQL functions")));
+               }
            }
        }
    }
 }
 
 /*
- * check_sql_fn_retval() -- check return value of a list of sql parse trees.
+ * check_sql_fn_retval()
+ *     Check return value of a list of lists of sql parse trees.
  *
  * The return value of a sql function is the value returned by the last
  * canSetTag query in the function.  We do some ad-hoc type checking and
@@ -1592,7 +1598,7 @@ check_sql_fn_statements(List *queryTreeList)
  * function is defined to return VOID then *resultTargetList is set to NIL.
  */
 bool
-check_sql_fn_retval(List *queryTreeList,
+check_sql_fn_retval(List *queryTreeLists,
                    Oid rettype, TupleDesc rettupdesc,
                    bool insertDroppedCols,
                    List **resultTargetList)
@@ -1619,20 +1625,30 @@ check_sql_fn_retval(List *queryTreeList,
        return false;
 
    /*
-    * Find the last canSetTag query in the list.  This isn't necessarily the
-    * last parsetree, because rule rewriting can insert queries after what
-    * the user wrote.
+    * Find the last canSetTag query in the function body (which is presented
+    * to us as a list of sublists of Query nodes).  This isn't necessarily
+    * the last parsetree, because rule rewriting can insert queries after
+    * what the user wrote.  Note that it might not even be in the last
+    * sublist, for example if the last query rewrites to DO INSTEAD NOTHING.
+    * (It might not be unreasonable to throw an error in such a case, but
+    * this is the historical behavior and it doesn't seem worth changing.)
     */
    parse = NULL;
    parse_cell = NULL;
-   foreach(lc, queryTreeList)
+   foreach(lc, queryTreeLists)
    {
-       Query      *q = lfirst_node(Query, lc);
+       List       *sublist = lfirst_node(List, lc);
+       ListCell   *lc2;
 
-       if (q->canSetTag)
+       foreach(lc2, sublist)
        {
-           parse = q;
-           parse_cell = lc;
+           Query      *q = lfirst_node(Query, lc2);
+
+           if (q->canSetTag)
+           {
+               parse = q;
+               parse_cell = lc2;
+           }
        }
    }
 
index 750586fceb74628a32771f91907c42d306ad09fa..e7d814651b1843db1e29374ca827bda6aef40a64 100644 (file)
@@ -4522,7 +4522,8 @@ inline_function(Oid funcid, Oid result_type, Oid result_collid,
     * needed; that's probably not important, but let's be careful.
     */
    querytree_list = list_make1(querytree);
-   if (check_sql_fn_retval(querytree_list, result_type, rettupdesc,
+   if (check_sql_fn_retval(list_make1(querytree_list),
+                           result_type, rettupdesc,
                            false, NULL))
        goto fail;              /* reject whole-tuple-result cases */
 
@@ -5040,7 +5041,7 @@ inline_set_returning_function(PlannerInfo *root, RangeTblEntry *rte)
     * shows it's returning a whole tuple result; otherwise what it's
     * returning is a single composite column which is not what we need.
     */
-   if (!check_sql_fn_retval(querytree_list,
+   if (!check_sql_fn_retval(list_make1(querytree_list),
                             fexpr->funcresulttype, rettupdesc,
                             true, NULL) &&
        (functypclass == TYPEFUNC_COMPOSITE ||
@@ -5052,7 +5053,7 @@ inline_set_returning_function(PlannerInfo *root, RangeTblEntry *rte)
     * check_sql_fn_retval might've inserted a projection step, but that's
     * fine; just make sure we use the upper Query.
     */
-   querytree = linitial(querytree_list);
+   querytree = linitial_node(Query, querytree_list);
 
    /*
     * Looks good --- substitute parameters into the query.
index cb13428a5a8848ac52d72906848ca8f406167adf..a0db24bde699fc1e7891fe6a927b8c3ea7831f66 100644 (file)
@@ -29,9 +29,9 @@ extern SQLFunctionParseInfoPtr prepare_sql_fn_parse_info(HeapTuple procedureTupl
 extern void sql_fn_parser_setup(struct ParseState *pstate,
                                SQLFunctionParseInfoPtr pinfo);
 
-extern void check_sql_fn_statements(List *queryTreeList);
+extern void check_sql_fn_statements(List *queryTreeLists);
 
-extern bool check_sql_fn_retval(List *queryTreeList,
+extern bool check_sql_fn_retval(List *queryTreeLists,
                                Oid rettype, TupleDesc rettupdesc,
                                bool insertDroppedCols,
                                List **resultTargetList);
index e618aec2ebca82c8b399718c64ff79c55e77b842..06bd129fd22816b2b741ebfd1b480ec1f9f361aa 100644 (file)
@@ -2109,6 +2109,50 @@ select * from testrngfunc();
  7.136178 | 7.14
 (1 row)
 
+create or replace function testrngfunc() returns setof rngfunc_type as $$
+  select 1, 2 union select 3, 4 order by 1;
+$$ language sql immutable;
+explain (verbose, costs off)
+select testrngfunc();
+       QUERY PLAN        
+-------------------------
+ ProjectSet
+   Output: testrngfunc()
+   ->  Result
+(3 rows)
+
+select testrngfunc();
+   testrngfunc   
+-----------------
+ (1.000000,2.00)
+ (3.000000,4.00)
+(2 rows)
+
+explain (verbose, costs off)
+select * from testrngfunc();
+                        QUERY PLAN                        
+----------------------------------------------------------
+ Subquery Scan on "*SELECT*"
+   Output: "*SELECT*"."?column?", "*SELECT*"."?column?_1"
+   ->  Unique
+         Output: (1), (2)
+         ->  Sort
+               Output: (1), (2)
+               Sort Key: (1), (2)
+               ->  Append
+                     ->  Result
+                           Output: 1, 2
+                     ->  Result
+                           Output: 3, 4
+(12 rows)
+
+select * from testrngfunc();
+    f1    |  f2  
+----------+------
+ 1.000000 | 2.00
+ 3.000000 | 4.00
+(2 rows)
+
 -- Check a couple of error cases while we're here
 select * from testrngfunc() as t(f1 int8,f2 int8);  -- fail, composite result
 ERROR:  a column definition list is redundant for a function returning a named composite type
index 5f41cb2d8d0b10195da2d1c6966df76b748f495f..3c436028daffd3afb9043aa8ab158aabcec3ab1b 100644 (file)
@@ -629,6 +629,17 @@ explain (verbose, costs off)
 select * from testrngfunc();
 select * from testrngfunc();
 
+create or replace function testrngfunc() returns setof rngfunc_type as $$
+  select 1, 2 union select 3, 4 order by 1;
+$$ language sql immutable;
+
+explain (verbose, costs off)
+select testrngfunc();
+select testrngfunc();
+explain (verbose, costs off)
+select * from testrngfunc();
+select * from testrngfunc();
+
 -- Check a couple of error cases while we're here
 select * from testrngfunc() as t(f1 int8,f2 int8);  -- fail, composite result
 select * from pg_get_keywords() as t(f1 int8,f2 int8);  -- fail, OUT params