Fix rowcount estimate for gather (merge) paths

author Richard Guo <rguo@postgresql.org>

Tue, 23 Jul 2024 01:33:26 +0000 (10:33 +0900)

committer Richard Guo <rguo@postgresql.org>

Tue, 23 Jul 2024 01:33:26 +0000 (10:33 +0900)
author Richard Guo <rguo@postgresql.org>
Tue, 23 Jul 2024 01:33:26 +0000 (10:33 +0900)
committer Richard Guo <rguo@postgresql.org>
Tue, 23 Jul 2024 01:33:26 +0000 (10:33 +0900)
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c

index aa78c0af0cda70275f2bc5ce6f113e7ac0d4d0b2..057b4b79ebb8dd702035cef7eb6de4605bf9fc87 100644 (file)
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -3079,8 +3079,7 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
      * of partial_pathlist because of the way add_partial_path works.
      */
     cheapest_partial_path = linitial(rel->partial_pathlist);
-   rows =
-       cheapest_partial_path->rows * cheapest_partial_path->parallel_workers;
+   rows = compute_gather_rows(cheapest_partial_path);
     simple_gather_path = (Path *)
         create_gather_path(root, rel, cheapest_partial_path, rel->reltarget,
                            NULL, rowsp);
@@ -3098,7 +3097,7 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
         if (subpath->pathkeys == NIL)
             continue;
  
-       rows = subpath->rows * subpath->parallel_workers;
+       rows = compute_gather_rows(subpath);
         path = create_gather_merge_path(root, rel, subpath, rel->reltarget,
                                         subpath->pathkeys, NULL, rowsp);
         add_path(rel, &path->path);
@@ -3282,7 +3281,6 @@ generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_r
                                                     subpath,
                                                     useful_pathkeys,
                                                     -1.0);
-               rows = subpath->rows * subpath->parallel_workers;
             }
             else
                 subpath = (Path *) create_incremental_sort_path(root,
@@ -3291,6 +3289,7 @@ generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_r
                                                                 useful_pathkeys,
                                                                 presorted_keys,
                                                                 -1);
+           rows = compute_gather_rows(subpath);
             path = create_gather_merge_path(root, rel,
                                             subpath,
                                             rel->reltarget,
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index 2021c481b46c8138cd3b6465f2ce95925b920b38..79991b19807b9def79c3c8d2b936b86cfb01fecc 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -6473,3 +6473,21 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel,
  
     return pages_fetched;
  }
+
+/*
+ * compute_gather_rows
+ *   Estimate number of rows for gather (merge) nodes.
+ *
+ * In a parallel plan, each worker's row estimate is determined by dividing the
+ * total number of rows by parallel_divisor, which accounts for the leader's
+ * contribution in addition to the number of workers.  Accordingly, when
+ * estimating the number of rows for gather (merge) nodes, we multiply the rows
+ * per worker by the same parallel_divisor to undo the division.
+ */
+double
+compute_gather_rows(Path *path)
+{
+   Assert(path->parallel_workers > 0);
+
+   return clamp_row_est(path->rows * get_parallel_divisor(path));
+}
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index 4711f912390f6bb40b6ba6159e1f06502e301020..948afd90948b574d34365a1a02b14da678a7c839 100644 (file)
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -5370,8 +5370,7 @@ create_ordered_paths(PlannerInfo *root,
                                                                     root->sort_pathkeys,
                                                                     presorted_keys,
                                                                     limit_tuples);
-           total_groups = input_path->rows *
-               input_path->parallel_workers;
+           total_groups = compute_gather_rows(sorted_path);
             sorted_path = (Path *)
                 create_gather_merge_path(root, ordered_rel,
                                          sorted_path,
@@ -7543,8 +7542,6 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
             (presorted_keys == 0 || !enable_incremental_sort))
             continue;
  
-       total_groups = path->rows * path->parallel_workers;
-
         /*
          * We've no need to consider both a sort and incremental sort. We'll
          * just do a sort if there are no presorted keys and an incremental
@@ -7561,7 +7558,7 @@ gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel)
                                                          groupby_pathkeys,
                                                          presorted_keys,
                                                          -1.0);
-
+       total_groups = compute_gather_rows(path);
         path = (Path *)
             create_gather_merge_path(root,
                                      rel,
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index c42742d2c7bfc086e97bbc4417b78e1e45449f26..d1c4e1a6aa7c088bf41af8753d04743390635adb 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1899,7 +1899,6 @@ create_gather_merge_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
     pathnode->num_workers = subpath->parallel_workers;
     pathnode->path.pathkeys = pathkeys;
     pathnode->path.pathtarget = target ? target : rel->reltarget;
-   pathnode->path.rows += subpath->rows;
  
     if (pathkeys_contained_in(pathkeys, subpath->pathkeys))
     {
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h

index b1c51a4e70ffc0a444100d6c258cbb16fa7fead7..57861bfb44608275b339bf487789300cde5e6549 100644 (file)
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -212,5 +212,6 @@ extern PathTarget *set_pathtarget_cost_width(PlannerInfo *root, PathTarget *targ
  extern double compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel,
                                    Path *bitmapqual, double loop_count,
                                    Cost *cost_p, double *tuples_p);
+extern double compute_gather_rows(Path *path);
  
  #endif                         /* COST_H */
diff --git a/src/test/regress/expected/join_hash.out b/src/test/regress/expected/join_hash.out

index 262fa71ed8d663520e273025463259b75735f335..4fc34a0e72aba2f4bbfaff0e727949ffa420cc90 100644 (file)
--- a/src/test/regress/expected/join_hash.out
+++ b/src/test/regress/expected/join_hash.out
@@ -508,18 +508,17 @@ set local hash_mem_multiplier = 1.0;
  set local enable_parallel_hash = on;
  explain (costs off)
    select count(*) from simple r join extremely_skewed s using (id);
-                              QUERY PLAN                               
------------------------------------------------------------------------
- Finalize Aggregate
+                           QUERY PLAN                            
+-----------------------------------------------------------------
+ Aggregate
     ->  Gather
           Workers Planned: 1
-         ->  Partial Aggregate
-               ->  Parallel Hash Join
-                     Hash Cond: (r.id = s.id)
-                     ->  Parallel Seq Scan on simple r
-                     ->  Parallel Hash
-                           ->  Parallel Seq Scan on extremely_skewed s
-(9 rows)
+         ->  Parallel Hash Join
+               Hash Cond: (r.id = s.id)
+               ->  Parallel Seq Scan on simple r
+               ->  Parallel Hash
+                     ->  Parallel Seq Scan on extremely_skewed s
+(8 rows)
  
  select count(*) from simple r join extremely_skewed s using (id);
   count
author	Richard Guo <rguo@postgresql.org>
	Tue, 23 Jul 2024 01:33:26 +0000 (10:33 +0900)
committer	Richard Guo <rguo@postgresql.org>
	Tue, 23 Jul 2024 01:33:26 +0000 (10:33 +0900)
src/backend/optimizer/path/allpaths.c		patch \| blob \| blame \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| blame \| history
src/backend/optimizer/plan/planner.c		patch \| blob \| blame \| history
src/backend/optimizer/util/pathnode.c		patch \| blob \| blame \| history
src/include/optimizer/cost.h		patch \| blob \| blame \| history
src/test/regress/expected/join_hash.out		patch \| blob \| blame \| history