Improve NestLoopParam generation for lateral subqueries

author David Rowley <drowley@postgresql.org>

Fri, 26 Jan 2024 03:18:58 +0000 (16:18 +1300)

committer David Rowley <drowley@postgresql.org>

Fri, 26 Jan 2024 03:18:58 +0000 (16:18 +1300)
author David Rowley <drowley@postgresql.org>
Fri, 26 Jan 2024 03:18:58 +0000 (16:18 +1300)
committer David Rowley <drowley@postgresql.org>
Fri, 26 Jan 2024 03:18:58 +0000 (16:18 +1300)
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index ca619eab944ddd40d0930ae5cd3b078f7ba481df..610f4a56d6bfbf385087fc2923759166c322b821 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -3720,13 +3720,22 @@ create_subqueryscan_plan(PlannerInfo *root, SubqueryScanPath *best_path,
     /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */
     scan_clauses = extract_actual_clauses(scan_clauses, false);
  
-   /* Replace any outer-relation variables with nestloop params */
+   /*
+    * Replace any outer-relation variables with nestloop params.
+    *
+    * We must provide nestloop params for both lateral references of the
+    * subquery and outer vars in the scan_clauses.  It's better to assign the
+    * former first, because that code path requires specific param IDs, while
+    * replace_nestloop_params can adapt to the IDs assigned by
+    * process_subquery_nestloop_params.  This avoids possibly duplicating
+    * nestloop params when the same Var is needed for both reasons.
+    */
     if (best_path->path.param_info)
     {
-       scan_clauses = (List *)
-           replace_nestloop_params(root, (Node *) scan_clauses);
         process_subquery_nestloop_params(root,
                                          rel->subplan_params);
+       scan_clauses = (List *)
+           replace_nestloop_params(root, (Node *) scan_clauses);
     }
  
     scan_plan = make_subqueryscan(tlist,
diff --git a/src/test/regress/expected/memoize.out b/src/test/regress/expected/memoize.out

index f5202430f807bc279ce5c031769bd71cf42c14e2..ca198ec3b800c71c7101c1573cd48260f8be1a8d 100644 (file)
--- a/src/test/regress/expected/memoize.out
+++ b/src/test/regress/expected/memoize.out
@@ -92,6 +92,43 @@ WHERE t1.unique1 < 1000;
    1000 | 9.5000000000000000
  (1 row)
  
+-- Try with LATERAL joins
+SELECT explain_memoize('
+SELECT COUNT(*),AVG(t2.t1two) FROM tenk1 t1 LEFT JOIN
+LATERAL (
+    SELECT t1.two as t1two, * FROM tenk1 t2 WHERE t2.unique1 < 5 OFFSET 0
+) t2
+ON t1.two = t2.two
+WHERE t1.unique1 < 10;', false);
+                                       explain_memoize                                        
+----------------------------------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=N)
+   ->  Nested Loop Left Join (actual rows=25 loops=N)
+         ->  Index Scan using tenk1_unique1 on tenk1 t1 (actual rows=10 loops=N)
+               Index Cond: (unique1 < 10)
+         ->  Memoize (actual rows=2 loops=N)
+               Cache Key: t1.two, t1.two
+               Cache Mode: binary
+               Hits: 8  Misses: 2  Evictions: Zero  Overflows: 0  Memory Usage: NkB
+               ->  Subquery Scan on t2 (actual rows=2 loops=N)
+                     Filter: (t1.two = t2.two)
+                     Rows Removed by Filter: 2
+                     ->  Index Scan using tenk1_unique1 on tenk1 t2_1 (actual rows=5 loops=N)
+                           Index Cond: (unique1 < 5)
+(13 rows)
+
+-- And check we get the expected results.
+SELECT COUNT(*),AVG(t2.t1two) FROM tenk1 t1 LEFT JOIN
+LATERAL (
+    SELECT t1.two as t1two, * FROM tenk1 t2 WHERE t2.unique1 < 5 OFFSET 0
+) t2
+ON t1.two = t2.two
+WHERE t1.unique1 < 10;
+ count |          avg           
+-------+------------------------
+    25 | 0.40000000000000000000
+(1 row)
+
  -- Reduce work_mem and hash_mem_multiplier so that we see some cache evictions
  SET work_mem TO '64kB';
  SET hash_mem_multiplier TO 1.0;
diff --git a/src/test/regress/sql/memoize.sql b/src/test/regress/sql/memoize.sql

index 29ab1ea62d39e234b29ae69e27f3f3f32391fe0e..3793c28593c4897940bd3f9fdc9a00dfd6ce2407 100644 (file)
--- a/src/test/regress/sql/memoize.sql
+++ b/src/test/regress/sql/memoize.sql
@@ -57,6 +57,23 @@ LATERAL (SELECT t2.unique1 FROM tenk1 t2
           WHERE t1.twenty = t2.unique1 OFFSET 0) t2
  WHERE t1.unique1 < 1000;
  
+-- Try with LATERAL joins
+SELECT explain_memoize('
+SELECT COUNT(*),AVG(t2.t1two) FROM tenk1 t1 LEFT JOIN
+LATERAL (
+    SELECT t1.two as t1two, * FROM tenk1 t2 WHERE t2.unique1 < 5 OFFSET 0
+) t2
+ON t1.two = t2.two
+WHERE t1.unique1 < 10;', false);
+
+-- And check we get the expected results.
+SELECT COUNT(*),AVG(t2.t1two) FROM tenk1 t1 LEFT JOIN
+LATERAL (
+    SELECT t1.two as t1two, * FROM tenk1 t2 WHERE t2.unique1 < 5 OFFSET 0
+) t2
+ON t1.two = t2.two
+WHERE t1.unique1 < 10;
+
  -- Reduce work_mem and hash_mem_multiplier so that we see some cache evictions
  SET work_mem TO '64kB';
  SET hash_mem_multiplier TO 1.0;
author	David Rowley <drowley@postgresql.org>
	Fri, 26 Jan 2024 03:18:58 +0000 (16:18 +1300)
committer	David Rowley <drowley@postgresql.org>
	Fri, 26 Jan 2024 03:18:58 +0000 (16:18 +1300)
src/backend/optimizer/plan/createplan.c		patch \| blob \| blame \| history
src/test/regress/expected/memoize.out		patch \| blob \| blame \| history
src/test/regress/sql/memoize.sql		patch \| blob \| blame \| history