When checking a subquery's output expressions to see if it's safe to
push down an upper-level qual, check_output_expressions() previously
treated grouping Vars as opaque Vars. This implicitly assumed they
were stable and scalar.
However, a grouping Var's underlying expression corresponds to the
grouping clause, which may be volatile or set-returning. If an
upper-level qual references such an output column, pushing it down
into the subquery is unsafe. This can cause strange results due to
multiple evaluation of a volatile function, or introduce SRFs into
the subquery's WHERE/HAVING quals.
This patch teaches check_output_expressions() to look through grouping
Vars to their underlying expressions. This ensures that any
volatility or set-returning properties in the grouping clause are
detected, preventing the unsafe pushdown.
We do not need to recursively examine the Vars contained in these
underlying expressions. Even if they reference outputs from
lower-level subqueries (at any depth), those references are guaranteed
not to expand to volatile or set-returning functions, because
subqueries containing such functions in their targetlists are never
pulled up.
Backpatch to v18, where this issue was introduced.
Reported-by: Eric Ridge <eebbrr@gmail.com>
Diagnosed-by: Tom Lane <tgl@sss.pgh.pa.us>
Author: Richard Guo <guofenglinux@gmail.com>
Discussion: https://postgr.es/m/
7900964C-F99E-481E-BEE5-
4338774CEB9F@gmail.com
Backpatch-through: 18
static void
check_output_expressions(Query *subquery, pushdown_safety_info *safetyInfo)
{
+ List *flattened_targetList = subquery->targetList;
ListCell *lc;
- foreach(lc, subquery->targetList)
+ /*
+ * We must be careful with grouping Vars and join alias Vars in the
+ * subquery's outputs, as they hide the underlying expressions.
+ *
+ * We need to expand grouping Vars to their underlying expressions (the
+ * grouping clauses) because the grouping expressions themselves might be
+ * volatile or set-returning. However, we do not need to expand join
+ * alias Vars, as their underlying structure does not introduce volatile
+ * or set-returning functions at the current level.
+ *
+ * In neither case do we need to recursively examine the Vars contained in
+ * these underlying expressions. Even if they reference outputs from
+ * lower-level subqueries (at any depth), those references are guaranteed
+ * not to expand to volatile or set-returning functions, because
+ * subqueries containing such functions in their targetlists are never
+ * pulled up.
+ */
+ if (subquery->hasGroupRTE)
+ {
+ flattened_targetList = (List *)
+ flatten_group_exprs(NULL, subquery, (Node *) subquery->targetList);
+ }
+
+ foreach(lc, flattened_targetList)
{
TargetEntry *tle = (TargetEntry *) lfirst(lc);
* wrapper.
*
* NOTE: this is also used by ruleutils.c, to deparse one query parsetree back
- * to source text. For that use-case, root will be NULL, which is why we have
- * to pass the Query separately. We need the root itself only for preserving
+ * to source text, and by check_output_expressions() to check for unsafe
+ * pushdowns. For these use-cases, root will be NULL, which is why we have to
+ * pass the Query separately. We need the root itself only for preserving
* varnullingrels. We can avoid preserving varnullingrels in the ruleutils.c's
* usage because it does not make any difference to the deparsed source text.
+ * We can also avoid it in check_output_expressions() because that function
+ * uses the expanded expressions solely to check for volatile or set-returning
+ * functions, which is independent of the Vars' nullingrels.
*/
Node *
flatten_group_exprs(PlannerInfo *root, Query *query, Node *node)
9 | 3
(3 rows)
+--
+-- check that an upper-level qual is not pushed down if it references a grouped
+-- Var whose underlying expression contains SRFs
+--
+explain (verbose, costs off)
+select * from
+ (select generate_series(1, ten) as g, count(*) from tenk1 group by 1) ss
+ where ss.g = 1;
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Subquery Scan on ss
+ Output: ss.g, ss.count
+ Filter: (ss.g = 1)
+ -> HashAggregate
+ Output: (generate_series(1, tenk1.ten)), count(*)
+ Group Key: generate_series(1, tenk1.ten)
+ -> ProjectSet
+ Output: generate_series(1, tenk1.ten)
+ -> Seq Scan on public.tenk1
+ Output: tenk1.unique1, tenk1.unique2, tenk1.two, tenk1.four, tenk1.ten, tenk1.twenty, tenk1.hundred, tenk1.thousand, tenk1.twothousand, tenk1.fivethous, tenk1.tenthous, tenk1.odd, tenk1.even, tenk1.stringu1, tenk1.stringu2, tenk1.string4
+(10 rows)
+
+select * from
+ (select generate_series(1, ten) as g, count(*) from tenk1 group by 1) ss
+ where ss.g = 1;
+ g | count
+---+-------
+ 1 | 9000
+(1 row)
+
+--
+-- check that an upper-level qual is not pushed down if it references a grouped
+-- Var whose underlying expression contains volatile functions
+--
+alter function tattle(x int, y int) volatile;
+explain (verbose, costs off)
+select * from
+ (select tattle(3, ten) as v, count(*) from tenk1 where unique1 < 3 group by 1) ss
+ where ss.v;
+ QUERY PLAN
+------------------------------------------------------------
+ Subquery Scan on ss
+ Output: ss.v, ss.count
+ Filter: ss.v
+ -> GroupAggregate
+ Output: (tattle(3, tenk1.ten)), count(*)
+ Group Key: (tattle(3, tenk1.ten))
+ -> Sort
+ Output: (tattle(3, tenk1.ten))
+ Sort Key: (tattle(3, tenk1.ten))
+ -> Bitmap Heap Scan on public.tenk1
+ Output: tattle(3, tenk1.ten)
+ Recheck Cond: (tenk1.unique1 < 3)
+ -> Bitmap Index Scan on tenk1_unique1
+ Index Cond: (tenk1.unique1 < 3)
+(14 rows)
+
+select * from
+ (select tattle(3, ten) as v, count(*) from tenk1 where unique1 < 3 group by 1) ss
+ where ss.v;
+NOTICE: x = 3, y = 2
+NOTICE: x = 3, y = 1
+NOTICE: x = 3, y = 0
+ v | count
+---+-------
+ t | 3
+(1 row)
+
+-- if we pretend it's stable, we get different results:
+alter function tattle(x int, y int) stable;
+explain (verbose, costs off)
+select * from
+ (select tattle(3, ten) as v, count(*) from tenk1 where unique1 < 3 group by 1) ss
+ where ss.v;
+ QUERY PLAN
+------------------------------------------------------
+ GroupAggregate
+ Output: (tattle(3, tenk1.ten)), count(*)
+ Group Key: (tattle(3, tenk1.ten))
+ -> Sort
+ Output: (tattle(3, tenk1.ten))
+ Sort Key: (tattle(3, tenk1.ten))
+ -> Bitmap Heap Scan on public.tenk1
+ Output: tattle(3, tenk1.ten)
+ Recheck Cond: (tenk1.unique1 < 3)
+ Filter: tattle(3, tenk1.ten)
+ -> Bitmap Index Scan on tenk1_unique1
+ Index Cond: (tenk1.unique1 < 3)
+(12 rows)
+
+select * from
+ (select tattle(3, ten) as v, count(*) from tenk1 where unique1 < 3 group by 1) ss
+ where ss.v;
+NOTICE: x = 3, y = 2
+NOTICE: x = 3, y = 2
+NOTICE: x = 3, y = 1
+NOTICE: x = 3, y = 1
+NOTICE: x = 3, y = 0
+NOTICE: x = 3, y = 0
+ v | count
+---+-------
+ t | 3
+(1 row)
+
drop function tattle(x int, y int);
--
-- Test that LIMIT can be pushed to SORT through a subquery that just projects
(select 9 as x, unnest(array[1,2,3,11,12,13]) as u) ss
where tattle(x, u);
+--
+-- check that an upper-level qual is not pushed down if it references a grouped
+-- Var whose underlying expression contains SRFs
+--
+explain (verbose, costs off)
+select * from
+ (select generate_series(1, ten) as g, count(*) from tenk1 group by 1) ss
+ where ss.g = 1;
+
+select * from
+ (select generate_series(1, ten) as g, count(*) from tenk1 group by 1) ss
+ where ss.g = 1;
+
+--
+-- check that an upper-level qual is not pushed down if it references a grouped
+-- Var whose underlying expression contains volatile functions
+--
+alter function tattle(x int, y int) volatile;
+
+explain (verbose, costs off)
+select * from
+ (select tattle(3, ten) as v, count(*) from tenk1 where unique1 < 3 group by 1) ss
+ where ss.v;
+
+select * from
+ (select tattle(3, ten) as v, count(*) from tenk1 where unique1 < 3 group by 1) ss
+ where ss.v;
+
+-- if we pretend it's stable, we get different results:
+alter function tattle(x int, y int) stable;
+
+explain (verbose, costs off)
+select * from
+ (select tattle(3, ten) as v, count(*) from tenk1 where unique1 < 3 group by 1) ss
+ where ss.v;
+
+select * from
+ (select tattle(3, ten) as v, count(*) from tenk1 where unique1 < 3 group by 1) ss
+ where ss.v;
+
drop function tattle(x int, y int);
--