From a71f10189dc10a2fe422158a2c9409e0f77c6b9e Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Tue, 7 Mar 2017 10:33:29 -0500 Subject: [PATCH] Preparatory refactoring for parallel merge join support. Extract the logic used by hash_inner_and_outer into a separate function, get_cheapest_parallel_safe_total_inner, so that it can also be used to plan parallel merge joins. Also, add a require_parallel_safe argument to the existing function get_cheapest_path_for_pathkeys, because parallel merge join needs to find the cheapest path for a given set of pathkeys that is parallel-safe, not just the cheapest one overall. Patch by me, reviewed by Dilip Kumar. Discussion: http://postgr.es/m/CA+TgmoYOv+dFK0MWW6366dFj_xTnohQfoBDrHyB7d1oZhrgPjA@mail.gmail.com --- src/backend/optimizer/path/allpaths.c | 9 ++++++--- src/backend/optimizer/path/joinpath.c | 23 ++++++--------------- src/backend/optimizer/path/pathkeys.c | 29 ++++++++++++++++++++++++++- src/include/optimizer/paths.h | 4 +++- 4 files changed, 43 insertions(+), 22 deletions(-) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 633b5c1608..87a3faff09 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -1447,12 +1447,14 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, get_cheapest_path_for_pathkeys(childrel->pathlist, pathkeys, NULL, - STARTUP_COST); + STARTUP_COST, + false); cheapest_total = get_cheapest_path_for_pathkeys(childrel->pathlist, pathkeys, NULL, - TOTAL_COST); + TOTAL_COST, + false); /* * If we can't find any paths with the right order just use the @@ -1517,7 +1519,8 @@ get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel, cheapest = get_cheapest_path_for_pathkeys(rel->pathlist, NIL, required_outer, - TOTAL_COST); + TOTAL_COST, + false); Assert(cheapest != NULL); if (bms_equal(PATH_REQ_OUTER(cheapest), required_outer)) return cheapest; diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 99ec5834bf..8bb5473330 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -936,7 +936,8 @@ generate_mergejoin_paths(PlannerInfo *root, innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, trialsortkeys, NULL, - TOTAL_COST); + TOTAL_COST, + false); if (innerpath != NULL && (cheapest_total_inner == NULL || compare_path_costs(innerpath, cheapest_total_inner, @@ -971,7 +972,8 @@ generate_mergejoin_paths(PlannerInfo *root, innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, trialsortkeys, NULL, - STARTUP_COST); + STARTUP_COST, + false); if (innerpath != NULL && (cheapest_startup_inner == NULL || compare_path_costs(innerpath, cheapest_startup_inner, @@ -1517,21 +1519,8 @@ hash_inner_and_outer(PlannerInfo *root, if (cheapest_total_inner->parallel_safe) cheapest_safe_inner = cheapest_total_inner; else if (save_jointype != JOIN_UNIQUE_INNER) - { - ListCell *lc; - - foreach(lc, innerrel->pathlist) - { - Path *innerpath = (Path *) lfirst(lc); - - if (innerpath->parallel_safe && - bms_is_empty(PATH_REQ_OUTER(innerpath))) - { - cheapest_safe_inner = innerpath; - break; - } - } - } + cheapest_safe_inner = + get_cheapest_parallel_safe_total_inner(innerrel->pathlist); if (cheapest_safe_inner != NULL) try_partial_hashjoin_path(root, joinrel, diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 1065b31ad1..2c269062ec 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -337,11 +337,13 @@ pathkeys_contained_in(List *keys1, List *keys2) * 'pathkeys' represents a required ordering (in canonical form!) * 'required_outer' denotes allowable outer relations for parameterized paths * 'cost_criterion' is STARTUP_COST or TOTAL_COST + * 'require_parallel_safe' causes us to consider only parallel-safe paths */ Path * get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, - CostSelector cost_criterion) + CostSelector cost_criterion, + bool require_parallel_safe) { Path *matched_path = NULL; ListCell *l; @@ -358,6 +360,9 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, compare_path_costs(matched_path, path, cost_criterion) <= 0) continue; + if (require_parallel_safe && !path->parallel_safe) + continue; + if (pathkeys_contained_in(pathkeys, path->pathkeys) && bms_is_subset(PATH_REQ_OUTER(path), required_outer)) matched_path = path; @@ -407,6 +412,28 @@ get_cheapest_fractional_path_for_pathkeys(List *paths, return matched_path; } + +/* + * get_cheapest_parallel_safe_total_inner + * Find the unparameterized parallel-safe path with the least total cost. + */ +Path * +get_cheapest_parallel_safe_total_inner(List *paths) +{ + ListCell *l; + + foreach(l, paths) + { + Path *innerpath = (Path *) lfirst(l); + + if (innerpath->parallel_safe && + bms_is_empty(PATH_REQ_OUTER(innerpath))) + return innerpath; + } + + return NULL; +} + /**************************************************************************** * NEW PATHKEY FORMATION ****************************************************************************/ diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index ebda308c41..bc0dcf4468 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -182,11 +182,13 @@ extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2); extern bool pathkeys_contained_in(List *keys1, List *keys2); extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, - CostSelector cost_criterion); + CostSelector cost_criterion, + bool require_parallel_safe); extern Path *get_cheapest_fractional_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, double fraction); +extern Path *get_cheapest_parallel_safe_total_inner(List *paths); extern List *build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index, ScanDirection scandir); extern List *build_expression_pathkey(PlannerInfo *root, Expr *expr, -- 2.39.5