diff options
| author | Tom Lane | 2015-05-10 18:36:30 +0000 |
|---|---|---|
| committer | Tom Lane | 2015-05-10 18:36:36 +0000 |
| commit | 1a8a4e5cde2b7755e11bde2ea7897bd650622d3e (patch) | |
| tree | 17f08ac1fe14058a0b81a48fe437cc60b3e6e3a0 /src/include/nodes | |
| parent | c594c750789fd98a19dcdf974b87ba9833995cf5 (diff) | |
Code review for foreign/custom join pushdown patch.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design
decisions that seem pretty questionable to me, and there was quite a lot
of stuff not to like about the documentation and comments. Clean up
as follows:
* Consider foreign joins only between foreign tables on the same server,
rather than between any two foreign tables with the same underlying FDW
handler function. In most if not all cases, the FDW would simply have had
to apply the same-server restriction itself (far more expensively, both for
lack of caching and because it would be repeated for each combination of
input sub-joins), or else risk nasty bugs. Anyone who's really intent on
doing something outside this restriction can always use the
set_join_pathlist_hook.
* Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist
to better reflect what they're for, and allow these custom scan tlists
to be used even for base relations.
* Change make_foreignscan() API to include passing the fdw_scan_tlist
value, since the FDW is required to set that. Backwards compatibility
doesn't seem like an adequate reason to expect FDWs to set it in some
ad-hoc extra step, and anyway existing FDWs can just pass NIL.
* Change the API of path-generating subroutines of add_paths_to_joinrel,
and in particular that of GetForeignJoinPaths and set_join_pathlist_hook,
so that various less-used parameters are passed in a struct rather than
as separate parameter-list entries. The objective here is to reduce the
probability that future additions to those parameter lists will result in
source-level API breaks for users of these hooks. It's possible that this
is even a small win for the core code, since most CPU architectures can't
pass more than half a dozen parameters efficiently anyway. I kept root,
joinrel, outerrel, innerrel, and jointype as separate parameters to reduce
code churn in joinpath.c --- in particular, putting jointype into the
struct would have been problematic because of the subroutines' habit of
changing their local copies of that variable.
* Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all
right for it to know about IndexOnlyScan, but if the list is to grow
we should refactor the knowledge out to the callers.
* Restore nodeForeignscan.c's previous use of the relcache to avoid
extra GetFdwRoutine lookups for base-relation scans.
* Lots of cleanup of documentation and missed comments. Re-order some
code additions into more logical places.
Diffstat (limited to 'src/include/nodes')
| -rw-r--r-- | src/include/nodes/plannodes.h | 37 | ||||
| -rw-r--r-- | src/include/nodes/primnodes.h | 10 | ||||
| -rw-r--r-- | src/include/nodes/relation.h | 45 |
3 files changed, 69 insertions, 23 deletions
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index c63492fa0be..9313292222a 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -479,32 +479,46 @@ typedef struct WorkTableScan * fdw_exprs and fdw_private are both under the control of the foreign-data * wrapper, but fdw_exprs is presumed to contain expression trees and will * be post-processed accordingly by the planner; fdw_private won't be. - * An optional fdw_ps_tlist is used to map a reference to an attribute of - * underlying relation(s) onto a pair of INDEX_VAR and alternative varattno. - * When fdw_ps_tlist is used, this represents a remote join, and the FDW - * is responsible for setting this field to an appropriate value. - * Note that everything in above lists must be copiable by copyObject(). + * Note that everything in both lists must be copiable by copyObject(). * One way to store an arbitrary blob of bytes is to represent it as a bytea * Const. Usually, though, you'll be better off choosing a representation * that can be dumped usefully by nodeToString(). + * + * fdw_scan_tlist is a targetlist describing the contents of the scan tuple + * returned by the FDW; it can be NIL if the scan tuple matches the declared + * rowtype of the foreign table, which is the normal case for a simple foreign + * table scan. (If the plan node represents a foreign join, fdw_scan_tlist + * is required since there is no rowtype available from the system catalogs.) + * When fdw_scan_tlist is provided, Vars in the node's tlist and quals must + * have varno INDEX_VAR, and their varattnos correspond to resnos in the + * fdw_scan_tlist (which are also column numbers in the actual scan tuple). + * fdw_scan_tlist is never actually executed; it just holds expression trees + * describing what is in the scan tuple's columns. + * + * When the plan node represents a foreign join, scan.scanrelid is zero and + * fs_relids must be consulted to identify the join relation. (fs_relids + * is valid for simple scans as well, but will always match scan.scanrelid.) * ---------------- */ typedef struct ForeignScan { Scan scan; - Oid fdw_handler; /* OID of FDW handler */ + Oid fs_server; /* OID of foreign server */ List *fdw_exprs; /* expressions that FDW may evaluate */ - List *fdw_ps_tlist; /* tlist, if replacing a join */ List *fdw_private; /* private data for FDW */ - Bitmapset *fdw_relids; /* RTIs generated by this scan */ + List *fdw_scan_tlist; /* optional tlist describing scan tuple */ + Bitmapset *fs_relids; /* RTIs generated by this scan */ bool fsSystemCol; /* true if any "system column" is needed */ } ForeignScan; /* ---------------- * CustomScan node * - * The comments for ForeignScan's fdw_exprs, fdw_varmap and fdw_private fields - * apply equally to custom_exprs, custom_ps_tlist and custom_private. + * The comments for ForeignScan's fdw_exprs, fdw_private, fdw_scan_tlist, + * and fs_relids fields apply equally to CustomScan's custom_exprs, + * custom_private, custom_scan_tlist, and custom_relids fields. The + * convention of setting scan.scanrelid to zero for joins applies as well. + * * Note that since Plan trees can be copied, custom scan providers *must* * fit all plan data they need into those fields; embedding CustomScan in * a larger struct will not work. @@ -528,8 +542,9 @@ typedef struct CustomScan Scan scan; uint32 flags; /* mask of CUSTOMPATH_* flags, see relation.h */ List *custom_exprs; /* expressions that custom code may evaluate */ - List *custom_ps_tlist;/* tlist, if replacing a join */ List *custom_private; /* private data for custom code */ + List *custom_scan_tlist; /* optional tlist describing scan + * tuple */ Bitmapset *custom_relids; /* RTIs generated by this scan */ const CustomScanMethods *methods; } CustomScan; diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 8f2c64847e2..f10ae4efa88 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -127,9 +127,13 @@ typedef struct Expr * upper-level plan nodes are reassigned to point to the outputs of their * subplans; for example, in a join node varno becomes INNER_VAR or OUTER_VAR * and varattno becomes the index of the proper element of that subplan's - * target list. But varnoold/varoattno continue to hold the original values. - * The code doesn't really need varnoold/varoattno, but they are very useful - * for debugging and interpreting completed plans, so we keep them around. + * target list. Similarly, INDEX_VAR is used to identify Vars that reference + * an index column rather than a heap column. (In ForeignScan and CustomScan + * plan nodes, INDEX_VAR is abused to signify references to columns of a + * custom scan tuple type.) In all these cases, varnoold/varoattno hold the + * original values. The code doesn't really need varnoold/varoattno, but they + * are very useful for debugging and interpreting completed plans, so we keep + * them around. */ #define INNER_VAR 65000 /* reference to inner subplan */ #define OUTER_VAR 65001 /* reference to outer subplan */ diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 1713d298de2..d3ee61c4d04 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -365,18 +365,21 @@ typedef struct PlannerInfo * subplan - plan for subquery (NULL if it's not a subquery) * subroot - PlannerInfo for subquery (NULL if it's not a subquery) * subplan_params - list of PlannerParamItems to be passed to subquery - * fdwroutine - function hooks for FDW, if foreign table (else NULL) - * fdw_handler - OID of FDW handler, if foreign table (else InvalidOid) - * fdw_private - private state for FDW, if foreign table (else NULL) * * Note: for a subquery, tuples, subplan, subroot are not set immediately * upon creation of the RelOptInfo object; they are filled in when - * set_subquery_pathlist processes the object. Likewise, fdwroutine - * and fdw_private are filled during initial path creation. + * set_subquery_pathlist processes the object. * * For otherrels that are appendrel members, these fields are filled * in just as for a baserel. * + * If the relation is either a foreign table or a join of foreign tables that + * all belong to the same foreign server, these fields will be set: + * + * serverid - OID of foreign server, if foreign table (else InvalidOid) + * fdwroutine - function hooks for FDW, if foreign table (else NULL) + * fdw_private - private state for FDW, if foreign table (else NULL) + * * The presence of the remaining fields depends on the restrictions * and joins that the relation participates in: * @@ -460,10 +463,12 @@ typedef struct RelOptInfo struct Plan *subplan; /* if subquery */ PlannerInfo *subroot; /* if subquery */ List *subplan_params; /* if subquery */ + + /* Information about foreign tables and foreign joins */ + Oid serverid; /* identifies server for the table or join */ /* use "struct FdwRoutine" to avoid including fdwapi.h here */ - struct FdwRoutine *fdwroutine; /* if foreign table */ - Oid fdw_handler; /* if foreign table */ - void *fdw_private; /* if foreign table */ + struct FdwRoutine *fdwroutine; + void *fdw_private; /* used by various scans and joins: */ List *baserestrictinfo; /* RestrictInfo structures (if base @@ -523,7 +528,7 @@ typedef struct IndexOptInfo bool *reverse_sort; /* is sort order descending? */ bool *nulls_first; /* do NULLs come first in the sort order? */ bool *canreturn; /* which index cols can be returned in an - index-only scan? */ + * index-only scan? */ Oid relam; /* OID of the access method (in pg_am) */ RegProcedure amcostestimate; /* OID of the access method's cost fcn */ @@ -1668,6 +1673,28 @@ typedef struct SemiAntiJoinFactors } SemiAntiJoinFactors; /* + * Struct for extra information passed to subroutines of add_paths_to_joinrel + * + * restrictlist contains all of the RestrictInfo nodes for restriction + * clauses that apply to this join + * mergeclause_list is a list of RestrictInfo nodes for available + * mergejoin clauses in this join + * sjinfo is extra info about special joins for selectivity estimation + * semifactors is as shown above (only valid for SEMI or ANTI joins) + * param_source_rels are OK targets for parameterization of result paths + * extra_lateral_rels are additional parameterization for result paths + */ +typedef struct JoinPathExtraData +{ + List *restrictlist; + List *mergeclause_list; + SpecialJoinInfo *sjinfo; + SemiAntiJoinFactors semifactors; + Relids param_source_rels; + Relids extra_lateral_rels; +} JoinPathExtraData; + +/* * For speed reasons, cost estimation for join paths is performed in two * phases: the first phase tries to quickly derive a lower bound for the * join cost, and then we check if that's sufficient to reject the path. |
