diff options
| author | Robert Haas | 2017-10-06 15:11:10 +0000 |
|---|---|---|
| committer | Robert Haas | 2017-10-06 15:11:10 +0000 |
| commit | f49842d1ee31b976c681322f76025d7732e860f3 (patch) | |
| tree | bc86f11819247980137e89404162ddc88200ac1c /src/include | |
| parent | fe9ba28ee852bb968bc8948d172c6bc0c70c50df (diff) | |
Basic partition-wise join functionality.
Instead of joining two partitioned tables in their entirety we can, if
it is an equi-join on the partition keys, join the matching partitions
individually. This involves teaching the planner about "other join"
rels, which are related to regular join rels in the same way that
other member rels are related to baserels. This can use significantly
more CPU time and memory than regular join planning, because there may
now be a set of "other" rels not only for every base relation but also
for every join relation. In most practical cases, this probably
shouldn't be a problem, because (1) it's probably unusual to join many
tables each with many partitions using the partition keys for all
joins and (2) if you do that scenario then you probably have a big
enough machine to handle the increased memory cost of planning and (3)
the resulting plan is highly likely to be better, so what you spend in
planning you'll make up on the execution side. All the same, for now,
turn this feature off by default.
Currently, we can only perform joins between two tables whose
partitioning schemes are absolutely identical. It would be nice to
cope with other scenarios, such as extra partitions on one side or the
other with no match on the other side, but that will have to wait for
a future patch.
Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit
Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit
Khandekar, and by me. A few final adjustments by me.
Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com
Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
Diffstat (limited to 'src/include')
| -rw-r--r-- | src/include/foreign/fdwapi.h | 6 | ||||
| -rw-r--r-- | src/include/nodes/extensible.h | 3 | ||||
| -rw-r--r-- | src/include/nodes/relation.h | 50 | ||||
| -rw-r--r-- | src/include/optimizer/cost.h | 1 | ||||
| -rw-r--r-- | src/include/optimizer/pathnode.h | 6 | ||||
| -rw-r--r-- | src/include/optimizer/paths.h | 5 | ||||
| -rw-r--r-- | src/include/optimizer/placeholder.h | 2 | ||||
| -rw-r--r-- | src/include/optimizer/planner.h | 2 | ||||
| -rw-r--r-- | src/include/optimizer/prep.h | 6 |
9 files changed, 73 insertions, 8 deletions
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h index ef0fbe6f9c6..04e43cc5e5e 100644 --- a/src/include/foreign/fdwapi.h +++ b/src/include/foreign/fdwapi.h @@ -158,6 +158,9 @@ typedef void (*ShutdownForeignScan_function) (ForeignScanState *node); typedef bool (*IsForeignScanParallelSafe_function) (PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); +typedef List *(*ReparameterizeForeignPathByChild_function) (PlannerInfo *root, + List *fdw_private, + RelOptInfo *child_rel); /* * FdwRoutine is the struct returned by a foreign-data wrapper's handler @@ -230,6 +233,9 @@ typedef struct FdwRoutine ReInitializeDSMForeignScan_function ReInitializeDSMForeignScan; InitializeWorkerForeignScan_function InitializeWorkerForeignScan; ShutdownForeignScan_function ShutdownForeignScan; + + /* Support functions for path reparameterization. */ + ReparameterizeForeignPathByChild_function ReparameterizeForeignPathByChild; } FdwRoutine; diff --git a/src/include/nodes/extensible.h b/src/include/nodes/extensible.h index 0654e79c7ba..c3436c7a4e4 100644 --- a/src/include/nodes/extensible.h +++ b/src/include/nodes/extensible.h @@ -96,6 +96,9 @@ typedef struct CustomPathMethods List *tlist, List *clauses, List *custom_plans); + struct List *(*ReparameterizeCustomPathByChild) (PlannerInfo *root, + List *custom_private, + RelOptInfo *child_rel); } CustomPathMethods; /* diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 48e6012f7fe..e085cefb7ba 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -391,6 +391,11 @@ typedef struct PartitionSchemeData *PartitionScheme; * handling join alias Vars. Currently this is not needed because all join * alias Vars are expanded to non-aliased form during preprocess_expression. * + * We also have relations representing joins between child relations of + * different partitioned tables. These relations are not added to + * join_rel_level lists as they are not joined directly by the dynamic + * programming algorithm. + * * There is also a RelOptKind for "upper" relations, which are RelOptInfos * that describe post-scan/join processing steps, such as aggregation. * Many of the fields in these RelOptInfos are meaningless, but their Path @@ -525,14 +530,18 @@ typedef struct PartitionSchemeData *PartitionScheme; * boundinfo - Partition bounds * nparts - Number of partitions * part_rels - RelOptInfos for each partition - * partexprs - Partition key expressions + * partexprs, nullable_partexprs - Partition key expressions * * Note: A base relation always has only one set of partition keys, but a join * relation may have as many sets of partition keys as the number of relations - * being joined. partexprs is an array containing part_scheme->partnatts - * elements, each of which is a list of partition key expressions. For a base - * relation each list contains only one expression, but for a join relation - * there can be one per baserel. + * being joined. partexprs and nullable_partexprs are arrays containing + * part_scheme->partnatts elements each. Each of these elements is a list of + * partition key expressions. For a base relation each list in partexprs + * contains only one expression and nullable_partexprs is not populated. For a + * join relation, partexprs and nullable_partexprs contain partition key + * expressions from non-nullable and nullable relations resp. Lists at any + * given position in those arrays together contain as many elements as the + * number of joining relations. *---------- */ typedef enum RelOptKind @@ -540,6 +549,7 @@ typedef enum RelOptKind RELOPT_BASEREL, RELOPT_JOINREL, RELOPT_OTHER_MEMBER_REL, + RELOPT_OTHER_JOINREL, RELOPT_UPPER_REL, RELOPT_DEADREL } RelOptKind; @@ -553,13 +563,17 @@ typedef enum RelOptKind (rel)->reloptkind == RELOPT_OTHER_MEMBER_REL) /* Is the given relation a join relation? */ -#define IS_JOIN_REL(rel) ((rel)->reloptkind == RELOPT_JOINREL) +#define IS_JOIN_REL(rel) \ + ((rel)->reloptkind == RELOPT_JOINREL || \ + (rel)->reloptkind == RELOPT_OTHER_JOINREL) /* Is the given relation an upper relation? */ #define IS_UPPER_REL(rel) ((rel)->reloptkind == RELOPT_UPPER_REL) /* Is the given relation an "other" relation? */ -#define IS_OTHER_REL(rel) ((rel)->reloptkind == RELOPT_OTHER_MEMBER_REL) +#define IS_OTHER_REL(rel) \ + ((rel)->reloptkind == RELOPT_OTHER_MEMBER_REL || \ + (rel)->reloptkind == RELOPT_OTHER_JOINREL) typedef struct RelOptInfo { @@ -645,10 +659,30 @@ typedef struct RelOptInfo struct PartitionBoundInfoData *boundinfo; /* Partition bounds */ struct RelOptInfo **part_rels; /* Array of RelOptInfos of partitions, * stored in the same order of bounds */ - List **partexprs; /* Partition key expressions. */ + List **partexprs; /* Non-nullable partition key expressions. */ + List **nullable_partexprs; /* Nullable partition key expressions. */ } RelOptInfo; /* + * Is given relation partitioned? + * + * A join between two partitioned relations with same partitioning scheme + * without any matching partitions will not have any partition in it but will + * have partition scheme set. So a relation is deemed to be partitioned if it + * has a partitioning scheme, bounds and positive number of partitions. + */ +#define IS_PARTITIONED_REL(rel) \ + ((rel)->part_scheme && (rel)->boundinfo && (rel)->nparts > 0) + +/* + * Convenience macro to make sure that a partitioned relation has all the + * required members set. + */ +#define REL_HAS_ALL_PART_PROPS(rel) \ + ((rel)->part_scheme && (rel)->boundinfo && (rel)->nparts > 0 && \ + (rel)->part_rels && (rel)->partexprs && (rel)->nullable_partexprs) + +/* * IndexOptInfo * Per-index information for planning/optimization * diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 63feba06e7e..306d923a22a 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -67,6 +67,7 @@ extern bool enable_material; extern bool enable_mergejoin; extern bool enable_hashjoin; extern bool enable_gathermerge; +extern bool enable_partition_wise_join; extern int constraint_exclusion; extern double clamp_row_est(double nrows); diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index e372f8862b8..e9ed16ad321 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -251,6 +251,8 @@ extern LimitPath *create_limit_path(PlannerInfo *root, RelOptInfo *rel, extern Path *reparameterize_path(PlannerInfo *root, Path *path, Relids required_outer, double loop_count); +extern Path *reparameterize_path_by_child(PlannerInfo *root, Path *path, + RelOptInfo *child_rel); /* * prototypes for relnode.c @@ -290,5 +292,9 @@ extern ParamPathInfo *get_appendrel_parampathinfo(RelOptInfo *appendrel, Relids required_outer); extern ParamPathInfo *find_param_path_info(RelOptInfo *rel, Relids required_outer); +extern RelOptInfo *build_child_join_rel(PlannerInfo *root, + RelOptInfo *outer_rel, RelOptInfo *inner_rel, + RelOptInfo *parent_joinrel, List *restrictlist, + SpecialJoinInfo *sjinfo, JoinType jointype); #endif /* PATHNODE_H */ diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 4e06b2e2993..a15eee54bb8 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -58,6 +58,8 @@ extern int compute_parallel_worker(RelOptInfo *rel, double heap_pages, double index_pages); extern void create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel, Path *bitmapqual); +extern void generate_partition_wise_join_paths(PlannerInfo *root, + RelOptInfo *rel); #ifdef OPTIMIZER_DEBUG extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel); @@ -111,6 +113,9 @@ extern bool have_join_order_restriction(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2); extern bool have_dangerous_phv(PlannerInfo *root, Relids outer_relids, Relids inner_params); +extern void mark_dummy_rel(RelOptInfo *rel); +extern bool have_partkey_equi_join(RelOptInfo *rel1, RelOptInfo *rel2, + JoinType jointype, List *restrictlist); /* * equivclass.c diff --git a/src/include/optimizer/placeholder.h b/src/include/optimizer/placeholder.h index 5a4d46ba9d7..a4a7b79f4da 100644 --- a/src/include/optimizer/placeholder.h +++ b/src/include/optimizer/placeholder.h @@ -28,5 +28,7 @@ extern void fix_placeholder_input_needed_levels(PlannerInfo *root); extern void add_placeholders_to_base_rels(PlannerInfo *root); extern void add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outer_rel, RelOptInfo *inner_rel); +extern void add_placeholders_to_child_joinrel(PlannerInfo *root, + RelOptInfo *childrel, RelOptInfo *parentrel); #endif /* PLACEHOLDER_H */ diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h index 2a4cf71e102..2801bfdfbe8 100644 --- a/src/include/optimizer/planner.h +++ b/src/include/optimizer/planner.h @@ -58,5 +58,7 @@ extern Expr *preprocess_phv_expression(PlannerInfo *root, Expr *expr); extern bool plan_cluster_use_sort(Oid tableOid, Oid indexOid); extern List *get_partitioned_child_rels(PlannerInfo *root, Index rti); +extern List *get_partitioned_child_rels_for_join(PlannerInfo *root, + Relids join_relids); #endif /* PLANNER_H */ diff --git a/src/include/optimizer/prep.h b/src/include/optimizer/prep.h index 4be0afd5660..80fbfd6ea94 100644 --- a/src/include/optimizer/prep.h +++ b/src/include/optimizer/prep.h @@ -62,4 +62,10 @@ extern Node *adjust_appendrel_attrs_multilevel(PlannerInfo *root, Node *node, extern AppendRelInfo **find_appinfos_by_relids(PlannerInfo *root, Relids relids, int *nappinfos); +extern SpecialJoinInfo *build_child_join_sjinfo(PlannerInfo *root, + SpecialJoinInfo *parent_sjinfo, + Relids left_relids, Relids right_relids); +extern Relids adjust_child_relids_multilevel(PlannerInfo *root, Relids relids, + Relids child_relids, Relids top_parent_relids); + #endif /* PREP_H */ |
