diff options
| author | Tom Lane | 2012-04-19 19:52:46 +0000 |
|---|---|---|
| committer | Tom Lane | 2012-04-19 19:53:47 +0000 |
| commit | 5b7b5518d0ea56c422a197875f7efa5deddbb388 (patch) | |
| tree | 1e647989f2f6399fff7fe68a493200ccf9d2b01f /src/include | |
| parent | cd1f4db4aec0c4b71d2ed0d29bbe388dfcd11527 (diff) | |
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
Diffstat (limited to 'src/include')
| -rw-r--r-- | src/include/nodes/nodes.h | 1 | ||||
| -rw-r--r-- | src/include/nodes/relation.h | 87 | ||||
| -rw-r--r-- | src/include/optimizer/cost.h | 16 | ||||
| -rw-r--r-- | src/include/optimizer/pathnode.h | 33 | ||||
| -rw-r--r-- | src/include/optimizer/paths.h | 5 | ||||
| -rw-r--r-- | src/include/optimizer/restrictinfo.h | 9 |
6 files changed, 114 insertions, 37 deletions
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index fdcc80edbb..1e16088b7e 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -212,6 +212,7 @@ typedef enum NodeTag T_PlannerGlobal, T_RelOptInfo, T_IndexOptInfo, + T_ParamPathInfo, T_Path, T_IndexPath, T_BitmapHeapPath, diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 6606e67055..e1d5fc0319 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -304,6 +304,7 @@ typedef struct PlannerInfo * ConvertRowtypeExpr representing a whole-row Var. * pathlist - List of Path nodes, one for each potentially useful * method of generating the relation + * ppilist - ParamPathInfo nodes for parameterized Paths, if any * cheapest_startup_path - the pathlist member with lowest startup cost * (regardless of its ordering; but must be * unparameterized) @@ -400,6 +401,7 @@ typedef struct RelOptInfo /* materialization information */ List *reltargetlist; /* Vars to be output by scan of relation */ List *pathlist; /* Path structures */ + List *ppilist; /* ParamPathInfos used in pathlist */ struct Path *cheapest_startup_path; struct Path *cheapest_total_path; struct Path *cheapest_unique_path; @@ -628,6 +630,31 @@ typedef struct PathKey bool pk_nulls_first; /* do NULLs come before normal values? */ } PathKey; + +/* + * ParamPathInfo + * + * All parameterized paths for a given relation with given required outer rels + * link to a single ParamPathInfo, which stores common information such as + * the estimated rowcount for this parameterization. We do this partly to + * avoid recalculations, but mostly to ensure that the estimated rowcount + * is in fact the same for every such path. + * + * Note: ppi_clauses is only used in ParamPathInfos for base relation paths; + * in join cases it's NIL because the set of relevant clauses varies depending + * on how the join is formed. The relevant clauses will appear in each + * parameterized join path's joinrestrictinfo list, instead. + */ +typedef struct ParamPathInfo +{ + NodeTag type; + + Relids ppi_req_outer; /* rels supplying parameters used by path */ + double ppi_rows; /* estimated number of result tuples */ + List *ppi_clauses; /* join clauses available from outer rels */ +} ParamPathInfo; + + /* * Type "Path" is used as-is for sequential-scan paths, as well as some other * simple plan types that we don't need any extra information in the path for. @@ -638,25 +665,19 @@ typedef struct PathKey * the same Path type for multiple Plan types when there is no need to * distinguish the Plan type during path processing. * + * "param_info", if not NULL, links to a ParamPathInfo that identifies outer + * relation(s) that provide parameter values to each scan of this path. + * That means this path can only be joined to those rels by means of nestloop + * joins with this path on the inside. Also note that a parameterized path + * is responsible for testing all "movable" joinclauses involving this rel + * and the specified outer rel(s). + * * "rows" is the same as parent->rows in simple paths, but in parameterized * paths and UniquePaths it can be less than parent->rows, reflecting the * fact that we've filtered by extra join conditions or removed duplicates. * * "pathkeys" is a List of PathKey nodes (see above), describing the sort * ordering of the path's output rows. - * - * "required_outer", if not NULL, contains the relids of one or more relations - * that must provide parameter values to each scan of this path, because the - * path relies on join clauses using those rels. That means this path can only - * be joined to those rels by means of nestloop joins with this path on the - * inside. Note: for a normal unparameterized path, required_outer must be - * NULL, not an empty-but-not-null Bitmapset. - * - * "param_clauses" is a List of RestrictInfo nodes, containing the join - * clauses used by a parameterized path. Ideally param_clauses should be NIL - * if and only if required_outer is NULL. XXX for the moment, however, we do - * not compute param_clauses for Append and MergeAppend paths, so the list - * is inaccurate in those paths and possibly paths above them. */ typedef struct Path { @@ -665,6 +686,7 @@ typedef struct Path NodeTag pathtype; /* tag identifying scan/join method */ RelOptInfo *parent; /* the relation this path can build */ + ParamPathInfo *param_info; /* parameterization info, or NULL if none */ /* estimated size/costs for path (see costsize.c for more info) */ double rows; /* estimated number of result tuples */ @@ -672,11 +694,13 @@ typedef struct Path Cost total_cost; /* total cost (assuming all tuples fetched) */ List *pathkeys; /* sort ordering of path's output */ - - Relids required_outer; /* rels supplying parameters used by path */ - List *param_clauses; /* join clauses that use such parameters */ + /* pathkeys is a List of PathKey nodes; see above */ } Path; +/* Macro for extracting a path's parameterization relids; beware double eval */ +#define PATH_REQ_OUTER(path) \ + ((path)->param_info ? (path)->param_info->ppi_req_outer : (Relids) NULL) + /*---------- * IndexPath represents an index scan over a single index. * @@ -924,8 +948,9 @@ typedef struct JoinPath List *joinrestrictinfo; /* RestrictInfos to apply to join */ /* - * See the notes for RelOptInfo to understand why joinrestrictinfo is - * needed in JoinPath, and can't be merged into the parent RelOptInfo. + * See the notes for RelOptInfo and ParamPathInfo to understand why + * joinrestrictinfo is needed in JoinPath, and can't be merged into the + * parent RelOptInfo. */ } JoinPath; @@ -1061,13 +1086,22 @@ typedef struct HashPath * RestrictInfo nodes also contain an outerjoin_delayed flag, which is true * if the clause's applicability must be delayed due to any outer joins * appearing below it (ie, it has to be postponed to some join level higher - * than the set of relations it actually references). There is also a - * nullable_relids field, which is the set of rels it references that can be - * forced null by some outer join below the clause. outerjoin_delayed = true - * is subtly different from nullable_relids != NULL: a clause might reference - * some nullable rels and yet not be outerjoin_delayed because it also - * references all the other rels of the outer join(s). A clause that is not - * outerjoin_delayed can be enforced anywhere it is computable. + * than the set of relations it actually references). + * + * There is also an outer_relids field, which is NULL except for outer join + * clauses; for those, it is the set of relids on the outer side of the + * clause's outer join. (These are rels that the clause cannot be applied to + * in parameterized scans, since pushing it into the join's outer side would + * lead to wrong answers.) + * + * There is also a nullable_relids field, which is the set of rels the clause + * references that can be forced null by some outer join below the clause. + * + * outerjoin_delayed = true is subtly different from nullable_relids != NULL: + * a clause might reference some nullable rels and yet not be + * outerjoin_delayed because it also references all the other rels of the + * outer join(s). A clause that is not outerjoin_delayed can be enforced + * anywhere it is computable. * * In general, the referenced clause might be arbitrarily complex. The * kinds of clauses we can handle as indexscan quals, mergejoin clauses, @@ -1129,6 +1163,9 @@ typedef struct RestrictInfo /* The set of relids required to evaluate the clause: */ Relids required_relids; + /* If an outer-join clause, the outer-side relations, else NULL: */ + Relids outer_relids; + /* The relids used in the clause that are nullable by lower outer joins: */ Relids nullable_relids; diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index b1e664265b..c197e7c0c1 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -66,17 +66,20 @@ extern int constraint_exclusion; extern double clamp_row_est(double nrows); extern double index_pages_fetched(double tuples_fetched, BlockNumber pages, double index_pages, PlannerInfo *root); -extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel); +extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel, + ParamPathInfo *param_info); extern void cost_index(IndexPath *path, PlannerInfo *root, double loop_count); extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, + ParamPathInfo *param_info, Path *bitmapqual, double loop_count); extern void cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root); extern void cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root); extern void cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec); extern void cost_tidscan(Path *path, PlannerInfo *root, RelOptInfo *baserel, List *tidquals); -extern void cost_subqueryscan(Path *path, RelOptInfo *baserel); +extern void cost_subqueryscan(Path *path, PlannerInfo *root, + RelOptInfo *baserel, ParamPathInfo *param_info); extern void cost_functionscan(Path *path, PlannerInfo *root, RelOptInfo *baserel); extern void cost_valuesscan(Path *path, PlannerInfo *root, @@ -149,6 +152,15 @@ extern void compute_semi_anti_join_factors(PlannerInfo *root, List *restrictlist, SemiAntiJoinFactors *semifactors); extern void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel); +extern double get_parameterized_baserel_size(PlannerInfo *root, + RelOptInfo *rel, + List *param_clauses); +extern double get_parameterized_joinrel_size(PlannerInfo *root, + RelOptInfo *rel, + double outer_rows, + double inner_rows, + SpecialJoinInfo *sjinfo, + List *restrict_clauses); extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *outer_rel, RelOptInfo *inner_rel, diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 3f80ca3fe9..4b2483be60 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -30,7 +30,8 @@ extern bool add_path_precheck(RelOptInfo *parent_rel, Cost startup_cost, Cost total_cost, List *pathkeys, Relids required_outer); -extern Path *create_seqscan_path(PlannerInfo *root, RelOptInfo *rel); +extern Path *create_seqscan_path(PlannerInfo *root, RelOptInfo *rel, + Relids required_outer); extern IndexPath *create_index_path(PlannerInfo *root, IndexOptInfo *index, List *indexclauses, @@ -45,6 +46,7 @@ extern IndexPath *create_index_path(PlannerInfo *root, extern BitmapHeapPath *create_bitmap_heap_path(PlannerInfo *root, RelOptInfo *rel, Path *bitmapqual, + Relids required_outer, double loop_count); extern BitmapAndPath *create_bitmap_and_path(PlannerInfo *root, RelOptInfo *rel, @@ -54,16 +56,19 @@ extern BitmapOrPath *create_bitmap_or_path(PlannerInfo *root, List *bitmapquals); extern TidPath *create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals); -extern AppendPath *create_append_path(RelOptInfo *rel, List *subpaths); +extern AppendPath *create_append_path(RelOptInfo *rel, List *subpaths, + Relids required_outer); extern MergeAppendPath *create_merge_append_path(PlannerInfo *root, RelOptInfo *rel, List *subpaths, - List *pathkeys); + List *pathkeys, + Relids required_outer); extern ResultPath *create_result_path(List *quals); extern MaterialPath *create_material_path(RelOptInfo *rel, Path *subpath); extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, SpecialJoinInfo *sjinfo); -extern Path *create_subqueryscan_path(RelOptInfo *rel, List *pathkeys); +extern Path *create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel, + List *pathkeys, Relids required_outer); extern Path *create_functionscan_path(PlannerInfo *root, RelOptInfo *rel); extern Path *create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel); extern Path *create_ctescan_path(PlannerInfo *root, RelOptInfo *rel); @@ -71,7 +76,7 @@ extern Path *create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel); extern ForeignPath *create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel, double rows, Cost startup_cost, Cost total_cost, List *pathkeys, - Relids required_outer, List *param_clauses, + Relids required_outer, List *fdw_private); extern Relids calc_nestloop_required_outer(Path *outer_path, Path *inner_path); @@ -115,6 +120,10 @@ extern HashPath *create_hashjoin_path(PlannerInfo *root, Relids required_outer, List *hashclauses); +extern Path *reparameterize_path(PlannerInfo *root, Path *path, + Relids required_outer, + double loop_count); + /* * prototypes for relnode.c */ @@ -129,5 +138,19 @@ extern RelOptInfo *build_join_rel(PlannerInfo *root, RelOptInfo *inner_rel, SpecialJoinInfo *sjinfo, List **restrictlist_ptr); +extern AppendRelInfo *find_childrel_appendrelinfo(PlannerInfo *root, + RelOptInfo *rel); +extern ParamPathInfo *get_baserel_parampathinfo(PlannerInfo *root, + RelOptInfo *baserel, + Relids required_outer); +extern ParamPathInfo *get_joinrel_parampathinfo(PlannerInfo *root, + RelOptInfo *joinrel, + Path *outer_path, + Path *inner_path, + SpecialJoinInfo *sjinfo, + Relids required_outer, + List **restrict_clauses); +extern ParamPathInfo *get_appendrel_parampathinfo(RelOptInfo *appendrel, + Relids required_outer); #endif /* PATHNODE_H */ diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index caaa7c24a1..b3a2dc1d2d 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -114,8 +114,8 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root, bool create_it); extern void generate_base_implied_equalities(PlannerInfo *root); extern List *generate_join_implied_equalities(PlannerInfo *root, - RelOptInfo *joinrel, - RelOptInfo *outer_rel, + Relids join_relids, + Relids outer_relids, RelOptInfo *inner_rel); extern bool exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2); extern void add_child_rel_equivalences(PlannerInfo *root, @@ -134,6 +134,7 @@ extern bool has_relevant_eclass_joinclause(PlannerInfo *root, RelOptInfo *rel1); extern bool eclass_useful_for_merging(EquivalenceClass *eclass, RelOptInfo *rel); +extern bool is_redundant_derived_clause(RestrictInfo *rinfo, List *clauselist); /* * pathkeys.c diff --git a/src/include/optimizer/restrictinfo.h b/src/include/optimizer/restrictinfo.h index 34977f9b95..6c88aa679f 100644 --- a/src/include/optimizer/restrictinfo.h +++ b/src/include/optimizer/restrictinfo.h @@ -19,13 +19,14 @@ /* Convenience macro for the common case of a valid-everywhere qual */ #define make_simple_restrictinfo(clause) \ - make_restrictinfo(clause, true, false, false, NULL, NULL) + make_restrictinfo(clause, true, false, false, NULL, NULL, NULL) extern RestrictInfo *make_restrictinfo(Expr *clause, bool is_pushed_down, bool outerjoin_delayed, bool pseudoconstant, Relids required_relids, + Relids outer_relids, Relids nullable_relids); extern List *make_restrictinfo_from_bitmapqual(Path *bitmapqual, bool is_pushed_down, @@ -40,7 +41,9 @@ extern List *extract_actual_clauses(List *restrictinfo_list, extern void extract_actual_join_clauses(List *restrictinfo_list, List **joinquals, List **otherquals); -extern List *select_nonredundant_join_clauses(List *restrictinfo_list, - List *reference_list); +extern bool join_clause_is_movable_to(RestrictInfo *rinfo, Index baserelid); +extern bool join_clause_is_movable_into(RestrictInfo *rinfo, + Relids currentrelids, + Relids current_and_outer); #endif /* RESTRICTINFO_H */ |
