Ignore nullingrels when looking up statistics

author Richard Guo <rguo@postgresql.org>

Thu, 2 Jan 2025 08:59:32 +0000 (17:59 +0900)

committer Richard Guo <rguo@postgresql.org>

Thu, 2 Jan 2025 08:59:32 +0000 (17:59 +0900)
author Richard Guo <rguo@postgresql.org>
Thu, 2 Jan 2025 08:59:32 +0000 (17:59 +0900)
committer Richard Guo <rguo@postgresql.org>
Thu, 2 Jan 2025 08:59:32 +0000 (17:59 +0900)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c

index f4b3e91baa89c8d78af9d22f241a5c759ea604a8..f420517961f013459f69156a67d4b371db253a7c 100644 (file)
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -121,6 +121,7 @@
  #include "parser/parse_clause.h"
  #include "parser/parse_relation.h"
  #include "parser/parsetree.h"
+#include "rewrite/rewriteManip.h"
  #include "statistics/statistics.h"
  #include "storage/bufmgr.h"
  #include "utils/acl.h"
@@ -3306,6 +3307,15 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
  
     ndistinct = get_variable_numdistinct(vardata, &isdefault);
  
+   /*
+    * The nullingrels bits within the var could cause the same var to be
+    * counted multiple times if it's marked with different nullingrels.  They
+    * could also prevent us from matching the var to the expressions in
+    * extended statistics (see estimate_multivariate_ndistinct).  So strip
+    * them out first.
+    */
+   var = remove_nulling_relids(var, root->outer_join_rels, NULL);
+
     foreach(lc, varinfos)
     {
         varinfo = (GroupVarInfo *) lfirst(lc);
@@ -5017,6 +5027,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
  {
     Node       *basenode;
     Relids      varnos;
+   Relids      basevarnos;
     RelOptInfo *onerel;
  
     /* Make sure we don't return dangling pointers in vardata */
@@ -5058,10 +5069,11 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
      * relation are considered "real" vars.
      */
     varnos = pull_varnos(root, basenode);
+   basevarnos = bms_difference(varnos, root->outer_join_rels);
  
     onerel = NULL;
  
-   if (bms_is_empty(varnos))
+   if (bms_is_empty(basevarnos))
     {
         /* No Vars at all ... must be pseudo-constant clause */
     }
@@ -5069,7 +5081,8 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
     {
         int         relid;
  
-       if (bms_get_singleton_member(varnos, &relid))
+       /* Check if the expression is in vars of a single base relation */
+       if (bms_get_singleton_member(basevarnos, &relid))
         {
             if (varRelid == 0 || varRelid == relid)
             {
@@ -5099,7 +5112,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
         }
     }
  
-   bms_free(varnos);
+   bms_free(basevarnos);
  
     vardata->var = node;
     vardata->atttype = exprType(node);
@@ -5124,6 +5137,14 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
         ListCell   *slist;
         Oid         userid;
  
+       /*
+        * The nullingrels bits within the expression could prevent us from
+        * matching it to expressional index columns or to the expressions in
+        * extended statistics.  So strip them out first.
+        */
+       if (bms_overlap(varnos, root->outer_join_rels))
+           node = remove_nulling_relids(node, root->outer_join_rels, NULL);
+
         /*
          * Determine the user ID to use for privilege checks: either
          * onerel->userid if it's set (e.g., in case we're accessing the table
@@ -5394,6 +5415,8 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
             }
         }
     }
+
+   bms_free(varnos);
  }
  
  /*
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out

index 8d1d3ec1dcffde6d7b9c8257e3b65adfb85ed3ee..2eaadceed0db13eff496551312b4708fab7f25f2 100644 (file)
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -2517,10 +2517,11 @@ where t1.f1 = coalesce(t2.f1, 1);
                 ->  Materialize
                       ->  Seq Scan on int4_tbl t2
                             Filter: (f1 > 1)
-         ->  Seq Scan on int4_tbl t3
+         ->  Materialize
+               ->  Seq Scan on int4_tbl t3
     ->  Materialize
           ->  Seq Scan on int4_tbl t4
-(13 rows)
+(14 rows)
  
  explain (costs off)
  select * from int4_tbl t1
@@ -8014,3 +8015,24 @@ SELECT t1.a FROM skip_fetch t1 LEFT JOIN skip_fetch t2 ON t2.a = 1 WHERE t2.a IS
  
  RESET enable_indexonlyscan;
  RESET enable_seqscan;
+-- Test that we do not account for nullingrels when looking up statistics
+CREATE TABLE group_tbl (a INT, b INT);
+INSERT INTO group_tbl SELECT 1, 1;
+CREATE STATISTICS group_tbl_stat (ndistinct) ON a, b FROM group_tbl;
+ANALYZE group_tbl;
+EXPLAIN (COSTS OFF)
+SELECT 1 FROM group_tbl t1
+    LEFT JOIN (SELECT a c1, COALESCE(a) c2 FROM group_tbl t2) s ON TRUE
+GROUP BY s.c1, s.c2;
+                 QUERY PLAN                 
+--------------------------------------------
+ Group
+   Group Key: t2.a, (COALESCE(t2.a))
+   ->  Sort
+         Sort Key: t2.a, (COALESCE(t2.a))
+         ->  Nested Loop Left Join
+               ->  Seq Scan on group_tbl t1
+               ->  Seq Scan on group_tbl t2
+(7 rows)
+
+DROP TABLE group_tbl;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql

index 8281bbd8ef89dc44611aa345b89d732d63384422..dcc94c0715d9cd22c8da4d26708baef05f3605d7 100644 (file)
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -2952,3 +2952,16 @@ SELECT t1.a FROM skip_fetch t1 LEFT JOIN skip_fetch t2 ON t2.a = 1 WHERE t2.a IS
  
  RESET enable_indexonlyscan;
  RESET enable_seqscan;
+
+-- Test that we do not account for nullingrels when looking up statistics
+CREATE TABLE group_tbl (a INT, b INT);
+INSERT INTO group_tbl SELECT 1, 1;
+CREATE STATISTICS group_tbl_stat (ndistinct) ON a, b FROM group_tbl;
+ANALYZE group_tbl;
+
+EXPLAIN (COSTS OFF)
+SELECT 1 FROM group_tbl t1
+    LEFT JOIN (SELECT a c1, COALESCE(a) c2 FROM group_tbl t2) s ON TRUE
+GROUP BY s.c1, s.c2;
+
+DROP TABLE group_tbl;
author	Richard Guo <rguo@postgresql.org>
	Thu, 2 Jan 2025 08:59:32 +0000 (17:59 +0900)
committer	Richard Guo <rguo@postgresql.org>
	Thu, 2 Jan 2025 08:59:32 +0000 (17:59 +0900)
src/backend/utils/adt/selfuncs.c		patch \| blob \| blame \| history
src/test/regress/expected/join.out		patch \| blob \| blame \| history
src/test/regress/sql/join.sql		patch \| blob \| blame \| history