diff options
| author | Tom Lane | 2017-04-08 02:20:03 +0000 |
|---|---|---|
| committer | Tom Lane | 2017-04-08 02:20:13 +0000 |
| commit | 9c7f5229ad68d7e0e4dd149e3f80257893e404d4 (patch) | |
| tree | 0a167d403952550f43941b01b24ed5e7526c5351 /src/include | |
| parent | f13a9121f9822eafe05cc3178bf046155a248173 (diff) | |
Optimize joins when the inner relation can be proven unique.
If there can certainly be no more than one matching inner row for a given
outer row, then the executor can move on to the next outer row as soon as
it's found one match; there's no need to continue scanning the inner
relation for this outer row. This saves useless scanning in nestloop
and hash joins. In merge joins, it offers the opportunity to skip
mark/restore processing, because we know we have not advanced past the
first possible match for the next outer row.
Of course, the devil is in the details: the proof of uniqueness must
depend only on joinquals (not otherquals), and if we want to skip
mergejoin mark/restore then it must depend only on merge clauses.
To avoid adding more planning overhead than absolutely necessary,
the present patch errs in the conservative direction: there are cases
where inner_unique or skip_mark_restore processing could be used, but
it will not do so because it's not sure that the uniqueness proof
depended only on "safe" clauses. This could be improved later.
David Rowley, reviewed and rather heavily editorialized on by me
Discussion: https://postgr.es/m/CAApHDvqF6Sw-TK98bW48TdtFJ+3a7D2mFyZ7++=D-RyPsL76gw@mail.gmail.com
Diffstat (limited to 'src/include')
| -rw-r--r-- | src/include/nodes/execnodes.h | 4 | ||||
| -rw-r--r-- | src/include/nodes/plannodes.h | 8 | ||||
| -rw-r--r-- | src/include/nodes/relation.h | 41 | ||||
| -rw-r--r-- | src/include/optimizer/cost.h | 16 | ||||
| -rw-r--r-- | src/include/optimizer/pathnode.h | 8 | ||||
| -rw-r--r-- | src/include/optimizer/planmain.h | 3 |
6 files changed, 60 insertions, 20 deletions
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index fa992449f47..4330a851c32 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1536,6 +1536,8 @@ typedef struct JoinState { PlanState ps; JoinType jointype; + bool single_match; /* True if we should skip to next outer tuple + * after finding one inner match */ ExprState *joinqual; /* JOIN quals (in addition to ps.qual) */ } JoinState; @@ -1561,6 +1563,7 @@ typedef struct NestLoopState * NumClauses number of mergejoinable join clauses * Clauses info for each mergejoinable clause * JoinState current state of ExecMergeJoin state machine + * SkipMarkRestore true if we may skip Mark and Restore operations * ExtraMarks true to issue extra Mark operations on inner scan * ConstFalseJoin true if we have a constant-false joinqual * FillOuter true if should emit unjoined outer tuples anyway @@ -1585,6 +1588,7 @@ typedef struct MergeJoinState int mj_NumClauses; MergeJoinClause mj_Clauses; /* array of length mj_NumClauses */ int mj_JoinState; + bool mj_SkipMarkRestore; bool mj_ExtraMarks; bool mj_ConstFalseJoin; bool mj_FillOuter; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index a2dd26f8a9c..12f9f615fde 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -632,6 +632,7 @@ typedef struct CustomScan * Join node * * jointype: rule for joining tuples from left and right subtrees + * inner_unique each outer tuple can match to no more than one inner tuple * joinqual: qual conditions that came from JOIN/ON or JOIN/USING * (plan.qual contains conditions that came from WHERE) * @@ -642,12 +643,18 @@ typedef struct CustomScan * (But plan.qual is still applied before actually returning a tuple.) * For an outer join, only joinquals are allowed to be used as the merge * or hash condition of a merge or hash join. + * + * inner_unique is set if the joinquals are such that no more than one inner + * tuple could match any given outer tuple. This allows the executor to + * skip searching for additional matches. (This must be provable from just + * the joinquals, ignoring plan.qual, due to where the executor tests it.) * ---------------- */ typedef struct Join { Plan plan; JoinType jointype; + bool inner_unique; List *joinqual; /* JOIN quals (in addition to plan.qual) */ } Join; @@ -689,6 +696,7 @@ typedef struct NestLoopParam typedef struct MergeJoin { Join join; + bool skip_mark_restore; /* Can we skip mark/restore calls? */ List *mergeclauses; /* mergeclauses as expression trees */ /* these are arrays, but have the same length as the mergeclauses list: */ Oid *mergeFamilies; /* per-clause OIDs of btree opfamilies */ diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 6bad18e77c7..7a8e2fd2b87 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -442,6 +442,19 @@ typedef struct PlannerInfo * fdwroutine - function hooks for FDW, if foreign table (else NULL) * fdw_private - private state for FDW, if foreign table (else NULL) * + * Two fields are used to cache knowledge acquired during the join search + * about whether this rel is provably unique when being joined to given other + * relation(s), ie, it can have at most one row matching any given row from + * that join relation. Currently we only attempt such proofs, and thus only + * populate these fields, for base rels; but someday they might be used for + * join rels too: + * + * unique_for_rels - list of Relid sets, each one being a set of other + * rels for which this one has been proven unique + * non_unique_for_rels - list of Relid sets, each one being a set of + * other rels for which we have tried and failed to prove + * this one unique + * * The presence of the remaining fields depends on the restrictions * and joins that the relation participates in: * @@ -562,6 +575,10 @@ typedef struct RelOptInfo struct FdwRoutine *fdwroutine; void *fdw_private; + /* cache space for remembering if we have proven this relation unique */ + List *unique_for_rels; /* known unique for these other relid set(s) */ + List *non_unique_for_rels; /* known not unique for these set(s) */ + /* used by various scans and joins: */ List *baserestrictinfo; /* RestrictInfo structures (if base * rel) */ @@ -572,8 +589,8 @@ typedef struct RelOptInfo * involving this rel */ bool has_eclass_joins; /* T means joininfo is incomplete */ - /* used by "other" relations. */ - Relids top_parent_relids; /* Relids of topmost parents. */ + /* used by "other" relations */ + Relids top_parent_relids; /* Relids of topmost parents */ } RelOptInfo; /* @@ -1272,6 +1289,9 @@ typedef struct JoinPath JoinType jointype; + bool inner_unique; /* each outer tuple provably matches no more + * than one inner tuple */ + Path *outerjoinpath; /* path for the outer side of the join */ Path *innerjoinpath; /* path for the inner side of the join */ @@ -1314,6 +1334,13 @@ typedef JoinPath NestPath; * mergejoin. If it is not NIL then it is a PathKeys list describing * the ordering that must be created by an explicit Sort node. * + * skip_mark_restore is TRUE if the executor need not do mark/restore calls. + * Mark/restore overhead is usually required, but can be skipped if we know + * that the executor need find only one match per outer tuple, and that the + * mergeclauses are sufficient to identify a match. In such cases the + * executor can immediately advance the outer relation after processing a + * match, and therefoere it need never back up the inner relation. + * * materialize_inner is TRUE if a Material node should be placed atop the * inner input. This may appear with or without an inner Sort step. */ @@ -1324,6 +1351,7 @@ typedef struct MergePath List *path_mergeclauses; /* join clauses to be used for merge */ List *outersortkeys; /* keys for explicit sort, if any */ List *innersortkeys; /* keys for explicit sort, if any */ + bool skip_mark_restore; /* can executor skip mark/restore? */ bool materialize_inner; /* add Materialize to inner? */ } MergePath; @@ -2112,8 +2140,8 @@ typedef struct PlannerParamItem } PlannerParamItem; /* - * When making cost estimates for a SEMI or ANTI join, there are some - * correction factors that are needed in both nestloop and hash joins + * When making cost estimates for a SEMI/ANTI/inner_unique join, there are + * some correction factors that are needed in both nestloop and hash joins * to account for the fact that the executor can stop scanning inner rows * as soon as it finds a match to the current outer row. These numbers * depend only on the selected outer and inner join relations, not on the @@ -2140,14 +2168,17 @@ typedef struct SemiAntiJoinFactors * clauses that apply to this join * mergeclause_list is a list of RestrictInfo nodes for available * mergejoin clauses in this join + * inner_unique is true if each outer tuple provably matches no more + * than one inner tuple * sjinfo is extra info about special joins for selectivity estimation - * semifactors is as shown above (only valid for SEMI or ANTI joins) + * semifactors is as shown above (only valid for SEMI/ANTI/inner_unique joins) * param_source_rels are OK targets for parameterization of result paths */ typedef struct JoinPathExtraData { List *restrictlist; List *mergeclause_list; + bool inner_unique; SpecialJoinInfo *sjinfo; SemiAntiJoinFactors semifactors; Relids param_source_rels; diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 6909359bcff..ed70defa179 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -129,33 +129,29 @@ extern void initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, Path *outer_path, Path *inner_path, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors); + JoinPathExtraData *extra); extern void final_cost_nestloop(PlannerInfo *root, NestPath *path, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors); + JoinPathExtraData *extra); extern void initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, List *mergeclauses, Path *outer_path, Path *inner_path, List *outersortkeys, List *innersortkeys, - SpecialJoinInfo *sjinfo); + JoinPathExtraData *extra); extern void final_cost_mergejoin(PlannerInfo *root, MergePath *path, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo); + JoinPathExtraData *extra); extern void initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, List *hashclauses, Path *outer_path, Path *inner_path, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors); + JoinPathExtraData *extra); extern void final_cost_hashjoin(PlannerInfo *root, HashPath *path, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors); + JoinPathExtraData *extra); extern void cost_gather(GatherPath *path, PlannerInfo *root, RelOptInfo *baserel, ParamPathInfo *param_info, double *rows); extern void cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan); diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 2e712c6a35d..77bc7704acb 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -119,8 +119,7 @@ extern NestPath *create_nestloop_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors, + JoinPathExtraData *extra, Path *outer_path, Path *inner_path, List *restrict_clauses, @@ -131,7 +130,7 @@ extern MergePath *create_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo, + JoinPathExtraData *extra, Path *outer_path, Path *inner_path, List *restrict_clauses, @@ -145,8 +144,7 @@ extern HashPath *create_hashjoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors, + JoinPathExtraData *extra, Path *outer_path, Path *inner_path, List *restrict_clauses, diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 94ef84bca9c..5df68a22a60 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -105,6 +105,9 @@ extern void match_foreign_keys_to_quals(PlannerInfo *root); extern List *remove_useless_joins(PlannerInfo *root, List *joinlist); extern bool query_supports_distinctness(Query *query); extern bool query_is_distinct_for(Query *query, List *colnos, List *opids); +extern bool innerrel_is_unique(PlannerInfo *root, + RelOptInfo *outerrel, RelOptInfo *innerrel, + JoinType jointype, List *restrictlist); /* * prototypes for plan/setrefs.c |
