diff options
| author | Ashutosh Bapat | 2012-06-14 06:21:13 +0000 |
|---|---|---|
| committer | Ashutosh Bapat | 2012-06-14 06:21:13 +0000 |
| commit | cd6b5d791a5fc99c8188566f1885d98164b44154 (patch) | |
| tree | aa127dd2bedb7e29d5249d43e6ad9d6ad888b88c /src | |
| parent | 139323f9ffe40e2e448ddeeff11559669e0d39f6 (diff) | |
The code to reduce to the number of data-nodes based on the quals in the query
is duplicated in two places. Move this code into a function
GetRelationNodesByQuals() to avoid duplication.
Diffstat (limited to 'src')
| -rw-r--r-- | src/backend/optimizer/plan/createplan.c | 71 | ||||
| -rw-r--r-- | src/backend/pgxc/locator/locator.c | 174 | ||||
| -rw-r--r-- | src/backend/pgxc/plan/planner.c | 159 | ||||
| -rw-r--r-- | src/include/pgxc/locator.h | 2 | ||||
| -rw-r--r-- | src/include/pgxc/planner.h | 2 |
5 files changed, 189 insertions, 219 deletions
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 918f07fe1f..0099d9d886 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -2584,15 +2584,10 @@ create_remotequery_plan(PlannerInfo *root, Path *best_path, List *remote_scan_clauses = NIL; List *local_scan_clauses = NIL; StringInfoData sql; - RelationLocInfo *rel_loc_info; Query *query; RangeTblRef *rtr; List *varlist; ListCell *varcell; - Expr *distcol_expr = NULL; - Datum distcol_value; - bool distcol_isnull; - Oid distcol_type; Node *tmp_node; List *rmlist; List *tvarlist; @@ -2760,10 +2755,6 @@ create_remotequery_plan(PlannerInfo *root, Path *best_path, if (rmlist != NULL) list_free_deep(rmlist); - rel_loc_info = GetRelationLocInfo(rte->relid); - if (!rel_loc_info) - elog(ERROR, "No distribution information found for relid %d", rte->relid); - if (tlist_is_simple) { scan_plan = make_remotequery(tlist, local_scan_clauses, scan_relid); @@ -2787,63 +2778,11 @@ create_remotequery_plan(PlannerInfo *root, Path *best_path, scan_plan->has_row_marks = query->hasForUpdate; scan_plan->sql_statement = sql.data; - /* - * If the table distributed by value, check if we can reduce the Datanodes - * by looking at the qualifiers for this relation - */ - if (IsLocatorDistributedByValue(rel_loc_info->locatorType)) - { - Oid disttype = get_atttype(rte->relid, rel_loc_info->partAttrNum); - int32 disttypmod = get_atttypmod(rte->relid, rel_loc_info->partAttrNum); - distcol_expr = pgxc_find_distcol_expr(rtr->rtindex, rel_loc_info->partAttrNum, - query->jointree->quals); - /* - * If the type of expression used to find the Datanode, is not same as - * the distribution column type, try casting it. This is same as what - * will happen in case of inserting that type of expression value as the - * distribution column value. - */ - if (distcol_expr) - { - distcol_expr = (Expr *)coerce_to_target_type(NULL, - (Node *)distcol_expr, - exprType((Node *)distcol_expr), - disttype, disttypmod, - COERCION_ASSIGNMENT, - COERCE_IMPLICIT_CAST, -1); - /* - * PGXC_FQS_TODO: We should set the bound parameters here, but we don't have - * PlannerInfo struct and we don't handle them right now. - * Even if constant expression mutator changes the expression, it will - * only simplify it, keeping the semantics same - */ - distcol_expr = (Expr *)eval_const_expressions(NULL, - (Node *)distcol_expr); - } - } - - if (distcol_expr && IsA(distcol_expr, Const)) - { - Const *const_expr = (Const *)distcol_expr; - distcol_value = const_expr->constvalue; - distcol_isnull = const_expr->constisnull; - distcol_type = const_expr->consttype; - } - else - { - distcol_value = (Datum) 0; - distcol_isnull = true; - distcol_type = InvalidOid; - } - - scan_plan->exec_nodes = GetRelationNodes(rel_loc_info, distcol_value, - distcol_isnull, distcol_type, - RELATION_ACCESS_READ); - Assert(scan_plan->exec_nodes); - if (rel_loc_info) - scan_plan->exec_nodes->baselocatortype = rel_loc_info->locatorType; - else - scan_plan->exec_nodes->baselocatortype = '\0'; + scan_plan->exec_nodes = GetRelationNodesByQuals(rte->relid, rtr->rtindex, + query->jointree->quals, + RELATION_ACCESS_READ); + if (!scan_plan->exec_nodes) + elog(ERROR, "No distribution information found for relid %d", rte->relid); copy_path_costsize(&scan_plan->scan.plan, best_path); diff --git a/src/backend/pgxc/locator/locator.c b/src/backend/pgxc/locator/locator.c index a6d1d287de..1bba86b379 100644 --- a/src/backend/pgxc/locator/locator.c +++ b/src/backend/pgxc/locator/locator.c @@ -28,6 +28,7 @@ #include "catalog/indexing.h" #include "catalog/pg_type.h" #include "nodes/pg_list.h" +#include "nodes/nodeFuncs.h" #include "utils/builtins.h" #include "utils/catcache.h" #include "utils/fmgroids.h" @@ -37,6 +38,8 @@ #include "utils/tqual.h" #include "utils/syscache.h" #include "nodes/nodes.h" +#include "optimizer/clauses.h" +#include "parser/parse_coerce.h" #include "pgxc/nodemgr.h" #include "pgxc/locator.h" #include "pgxc/pgxc.h" @@ -47,6 +50,8 @@ #include "catalog/namespace.h" #include "access/hash.h" +static Expr *pgxc_find_distcol_expr(Index varno, PartAttrNumber partAttrNum, + Node *quals); Oid primary_data_node = InvalidOid; int num_preferred_data_nodes = 0; @@ -655,6 +660,79 @@ GetRelationNodes(RelationLocInfo *rel_loc_info, Datum valueForDistCol, return exec_nodes; } +/* + * GetRelationNodesByQuals + * A wrapper around GetRelationNodes to reduce the node list by looking at the + * quals. varno is assumed to be the varno of reloid inside the quals. No check + * is made to see if that's correct. + */ +ExecNodes * +GetRelationNodesByQuals(Oid reloid, Index varno, Node *quals, + RelationAccessType relaccess) +{ + RelationLocInfo *rel_loc_info = GetRelationLocInfo(reloid); + Expr *distcol_expr = NULL; + ExecNodes *exec_nodes; + Datum distcol_value; + bool distcol_isnull; + Oid distcol_type; + + if (!rel_loc_info) + return NULL; + /* + * If the table distributed by value, check if we can reduce the Datanodes + * by looking at the qualifiers for this relation + */ + if (IsLocatorDistributedByValue(rel_loc_info->locatorType)) + { + Oid disttype = get_atttype(reloid, rel_loc_info->partAttrNum); + int32 disttypmod = get_atttypmod(reloid, rel_loc_info->partAttrNum); + distcol_expr = pgxc_find_distcol_expr(varno, rel_loc_info->partAttrNum, + quals); + /* + * If the type of expression used to find the Datanode, is not same as + * the distribution column type, try casting it. This is same as what + * will happen in case of inserting that type of expression value as the + * distribution column value. + */ + if (distcol_expr) + { + distcol_expr = (Expr *)coerce_to_target_type(NULL, + (Node *)distcol_expr, + exprType((Node *)distcol_expr), + disttype, disttypmod, + COERCION_ASSIGNMENT, + COERCE_IMPLICIT_CAST, -1); + /* + * PGXC_FQS_TODO: We should set the bound parameters here, but we don't have + * PlannerInfo struct and we don't handle them right now. + * Even if constant expression mutator changes the expression, it will + * only simplify it, keeping the semantics same + */ + distcol_expr = (Expr *)eval_const_expressions(NULL, + (Node *)distcol_expr); + } + } + + if (distcol_expr && IsA(distcol_expr, Const)) + { + Const *const_expr = (Const *)distcol_expr; + distcol_value = const_expr->constvalue; + distcol_isnull = const_expr->constisnull; + distcol_type = const_expr->consttype; + } + else + { + distcol_value = (Datum) 0; + distcol_isnull = true; + distcol_type = InvalidOid; + } + + exec_nodes = GetRelationNodes(rel_loc_info, distcol_value, + distcol_isnull, distcol_type, + relaccess); + return exec_nodes; +} /* * ConvertToLocatorType @@ -921,3 +999,99 @@ FreeExecNodes(ExecNodes **exec_nodes) pfree(tmp_en); *exec_nodes = NULL; } + +/* + * pgxc_find_distcol_expr + * Search through the quals provided and find out an expression which will give + * us value of distribution column if exists in the quals. Say for a table + * tab1 (val int, val2 int) distributed by hash(val), a query "SELECT * FROM + * tab1 WHERE val = fn(x, y, z) and val2 = 3", fn(x,y,z) is the expression which + * decides the distribution column value in the rows qualified by this query. + * Hence return fn(x, y, z). But for a query "SELECT * FROM tab1 WHERE val = + * fn(x, y, z) || val2 = 3", there is no expression which decides the values + * distribution column val can take in the qualified rows. So, in such cases + * this function returns NULL. + */ +static Expr * +pgxc_find_distcol_expr(Index varno, PartAttrNumber partAttrNum, + Node *quals) +{ + /* Convert the qualification into list of arguments of AND */ + List *lquals = make_ands_implicit((Expr *)quals); + ListCell *qual_cell; + /* + * For every ANDed expression, check if that expression is of the form + * <distribution_col> = <expr>. If so return expr. + */ + foreach(qual_cell, lquals) + { + Expr *qual_expr = (Expr *)lfirst(qual_cell); + OpExpr *op; + Expr *lexpr; + Expr *rexpr; + Var *var_expr; + Expr *distcol_expr; + + if (!IsA(qual_expr, OpExpr)) + continue; + op = (OpExpr *)qual_expr; + /* If not a binary operator, it can not be '='. */ + if (list_length(op->args) != 2) + continue; + + lexpr = linitial(op->args); + rexpr = lsecond(op->args); + + /* + * If either of the operands is a RelabelType, extract the Var in the RelabelType. + * A RelabelType represents a "dummy" type coercion between two binary compatible datatypes. + * If we do not handle these then our optimization does not work in case of varchar + * For example if col is of type varchar and is the dist key then + * select * from vc_tab where col = 'abcdefghijklmnopqrstuvwxyz'; + * should be shipped to one of the nodes only + */ + if (IsA(lexpr, RelabelType)) + lexpr = ((RelabelType*)lexpr)->arg; + if (IsA(rexpr, RelabelType)) + rexpr = ((RelabelType*)rexpr)->arg; + + /* + * If either of the operands is a Var expression, assume the other + * one is distribution column expression. If none is Var check next + * qual. + */ + if (IsA(lexpr, Var)) + { + var_expr = (Var *)lexpr; + distcol_expr = rexpr; + } + else if (IsA(rexpr, Var)) + { + var_expr = (Var *)rexpr; + distcol_expr = lexpr; + } + else + continue; + /* + * If Var found is not the distribution column of required relation, + * check next qual + */ + if (var_expr->varno != varno || var_expr->varattno != partAttrNum) + continue; + /* + * If the operator is not an assignment operator, check next + * constraint. An operator is an assignment operator if it's + * mergejoinable or hashjoinable. Beware that not every assignment + * operator is mergejoinable or hashjoinable, so we might leave some + * oportunity. But then we have to rely on the opname which may not + * be something we know to be equality operator as well. + */ + if (!op_mergejoinable(op->opno, exprType((Node *)lexpr)) && + !op_hashjoinable(op->opno, exprType((Node *)lexpr))) + continue; + /* Found the distribution column expression return it */ + return distcol_expr; + } + /* Exhausted all quals, but no distribution column expression */ + return NULL; +} diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c index 59bfa14152..0a2831afda 100644 --- a/src/backend/pgxc/plan/planner.c +++ b/src/backend/pgxc/plan/planner.c @@ -33,7 +33,6 @@ #include "optimizer/planner.h" #include "optimizer/tlist.h" #include "parser/parse_agg.h" -#include "parser/parse_coerce.h" #include "parser/parse_func.h" #include "parser/parse_relation.h" #include "parser/parsetree.h" @@ -1288,102 +1287,6 @@ pgxc_qual_hash_dist_equijoin(Relids varnos_1, Relids varnos_2, Oid distcol_type, return false; } -/* - * pgxc_find_distcol_expr - * Search through the quals provided and find out an expression which will give - * us value of distribution column if exists in the quals. Say for a table - * tab1 (val int, val2 int) distributed by hash(val), a query "SELECT * FROM - * tab1 WHERE val = fn(x, y, z) and val2 = 3", fn(x,y,z) is the expression which - * decides the distribution column value in the rows qualified by this query. - * Hence return fn(x, y, z). But for a query "SELECT * FROM tab1 WHERE val = - * fn(x, y, z) || val2 = 3", there is no expression which decides the values - * distribution column val can take in the qualified rows. So, in such cases - * this function returns NULL. - */ -Expr * -pgxc_find_distcol_expr(Index varno, PartAttrNumber partAttrNum, - Node *quals) -{ - /* Convert the qualification into list of arguments of AND */ - List *lquals = make_ands_implicit((Expr *)quals); - ListCell *qual_cell; - /* - * For every ANDed expression, check if that expression is of the form - * <distribution_col> = <expr>. If so return expr. - */ - foreach(qual_cell, lquals) - { - Expr *qual_expr = (Expr *)lfirst(qual_cell); - OpExpr *op; - Expr *lexpr; - Expr *rexpr; - Var *var_expr; - Expr *distcol_expr; - - if (!IsA(qual_expr, OpExpr)) - continue; - op = (OpExpr *)qual_expr; - /* If not a binary operator, it can not be '='. */ - if (list_length(op->args) != 2) - continue; - - lexpr = linitial(op->args); - rexpr = lsecond(op->args); - - /* - * If either of the operands is a RelabelType, extract the Var in the RelabelType. - * A RelabelType represents a "dummy" type coercion between two binary compatible datatypes. - * If we do not handle these then our optimization does not work in case of varchar - * For example if col is of type varchar and is the dist key then - * select * from vc_tab where col = 'abcdefghijklmnopqrstuvwxyz'; - * should be shipped to one of the nodes only - */ - if (IsA(lexpr, RelabelType)) - lexpr = ((RelabelType*)lexpr)->arg; - if (IsA(rexpr, RelabelType)) - rexpr = ((RelabelType*)rexpr)->arg; - - /* - * If either of the operands is a Var expression, assume the other - * one is distribution column expression. If none is Var check next - * qual. - */ - if (IsA(lexpr, Var)) - { - var_expr = (Var *)lexpr; - distcol_expr = rexpr; - } - else if (IsA(rexpr, Var)) - { - var_expr = (Var *)rexpr; - distcol_expr = lexpr; - } - else - continue; - /* - * If Var found is not the distribution column of required relation, - * check next qual - */ - if (var_expr->varno != varno || var_expr->varattno != partAttrNum) - continue; - /* - * If the operator is not an assignment operator, check next - * constraint. An operator is an assignment operator if it's - * mergejoinable or hashjoinable. Beware that not every assignment - * operator is mergejoinable or hashjoinable, so we might leave some - * oportunity. But then we have to rely on the opname which may not - * be something we know to be equality operator as well. - */ - if (!op_mergejoinable(op->opno, exprType((Node *)lexpr)) && - !op_hashjoinable(op->opno, exprType((Node *)lexpr))) - continue; - /* Found the distribution column expression return it */ - return distcol_expr; - } - /* Exhausted all quals, but no distribution column expression */ - return NULL; -} - static bool VarAttrIsPartAttr(Var *var, List *rtable) { RangeTblEntry *rte = rt_fetch(var->varno, rtable); @@ -1414,10 +1317,6 @@ pgxc_FQS_get_relation_nodes(RangeTblEntry *rte, Index varno, Query *query) ExecNodes *rel_exec_nodes; RelationAccessType rel_access; RelationLocInfo *rel_loc_info; - Expr *distcol_expr = NULL; - Datum distcol_value; - bool distcol_isnull; - Oid distcol_type; Assert(rte == rt_fetch(varno, (query->rtable))); @@ -1445,67 +1344,25 @@ pgxc_FQS_get_relation_nodes(RangeTblEntry *rte, Index varno, Query *query) break; } + rel_loc_info = GetRelationLocInfo(rte->relid); /* If we don't know about the distribution of relation, bail out */ if (!rel_loc_info) return NULL; /* - * If the table distributed by value, check if we can reduce the Datanodes - * by looking at the qualifiers for this relation. + * Find out the datanodes to execute this query on. * PGXC_FQS_TODO: for now, we apply node reduction only when there is only * one relation involved in the query. If there are multiple distributed * tables in the query and we apply node reduction here, we may fail to ship - * the entire join. We should some apply node reduction transitively. + * the entire join. We should apply node reduction transitively. */ - if (IsLocatorDistributedByValue(rel_loc_info->locatorType) && - list_length(query->rtable) == 1) - { - Oid disttype = get_atttype(rte->relid, rel_loc_info->partAttrNum); - int32 disttypmod = get_atttypmod(rte->relid, rel_loc_info->partAttrNum); - distcol_expr = pgxc_find_distcol_expr(varno, rel_loc_info->partAttrNum, - query->jointree->quals); - /* - * If the type of expression used to find the Datanode, is not same as - * the distribution column type, try casting it. This is same as what - * will happen in case of inserting that type of expression value as the - * distribution column value. - */ - if (distcol_expr) - { - distcol_expr = (Expr *)coerce_to_target_type(NULL, - (Node *)distcol_expr, - exprType((Node *)distcol_expr), - disttype, disttypmod, - COERCION_ASSIGNMENT, - COERCE_IMPLICIT_CAST, -1); - /* - * PGXC_FQS_TODO: We should set the bound parameters here, but we don't have - * PlannerInfo struct and we don't handle them right now. - * Even if constant expression mutator changes the expression, it will - * only simplify it, keeping the semantics same - */ - distcol_expr = (Expr *)eval_const_expressions(NULL, - (Node *)distcol_expr); - } - } - - if (distcol_expr && IsA(distcol_expr, Const)) - { - Const *const_expr = (Const *)distcol_expr; - distcol_value = const_expr->constvalue; - distcol_isnull = const_expr->constisnull; - distcol_type = const_expr->consttype; - } + if (list_length(query->rtable) == 1) + rel_exec_nodes = GetRelationNodesByQuals(rte->relid, varno, + query->jointree->quals, rel_access); else - { - distcol_value = (Datum) 0; - distcol_isnull = true; - distcol_type = InvalidOid; - } - - rel_exec_nodes = GetRelationNodes(rel_loc_info, distcol_value, - distcol_isnull, distcol_type, rel_access); + rel_exec_nodes = GetRelationNodes(rel_loc_info, (Datum) 0, + true, InvalidOid, rel_access); if (!rel_exec_nodes) return NULL; diff --git a/src/include/pgxc/locator.h b/src/include/pgxc/locator.h index 021e30f9cf..3a98eee4cc 100644 --- a/src/include/pgxc/locator.h +++ b/src/include/pgxc/locator.h @@ -105,6 +105,8 @@ extern bool IsTableDistOnPrimary(RelationLocInfo *rel_loc_info); extern ExecNodes *GetRelationNodes(RelationLocInfo *rel_loc_info, Datum valueForDistCol, bool isValueNull, Oid typeOfValueForDistCol, RelationAccessType accessType); +extern ExecNodes *GetRelationNodesByQuals(Oid reloid, Index varno, Node *quals, + RelationAccessType relaccess); extern bool IsHashColumn(RelationLocInfo *rel_loc_info, char *part_col_name); extern bool IsHashColumnForRelId(Oid relid, char *part_col_name); extern int GetRoundRobinNode(Oid relid); diff --git a/src/include/pgxc/planner.h b/src/include/pgxc/planner.h index e32f033682..6ebe223227 100644 --- a/src/include/pgxc/planner.h +++ b/src/include/pgxc/planner.h @@ -174,7 +174,5 @@ extern List *AddRemoteQueryNode(List *stmts, const char *queryString, RemoteQueryExecType remoteExecType, bool is_temp); extern bool pgxc_query_contains_temp_tables(List *queries); extern bool pgxc_query_contains_utility(List *queries); -extern Expr *pgxc_find_distcol_expr(Index varno, PartAttrNumber partAttrNum, - Node *quals); #endif /* PGXCPLANNER_H */ |
