summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAshutosh Bapat2012-06-14 06:21:13 +0000
committerAshutosh Bapat2012-06-14 06:21:13 +0000
commitcd6b5d791a5fc99c8188566f1885d98164b44154 (patch)
treeaa127dd2bedb7e29d5249d43e6ad9d6ad888b88c /src
parent139323f9ffe40e2e448ddeeff11559669e0d39f6 (diff)
The code to reduce to the number of data-nodes based on the quals in the query
is duplicated in two places. Move this code into a function GetRelationNodesByQuals() to avoid duplication.
Diffstat (limited to 'src')
-rw-r--r--src/backend/optimizer/plan/createplan.c71
-rw-r--r--src/backend/pgxc/locator/locator.c174
-rw-r--r--src/backend/pgxc/plan/planner.c159
-rw-r--r--src/include/pgxc/locator.h2
-rw-r--r--src/include/pgxc/planner.h2
5 files changed, 189 insertions, 219 deletions
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 918f07fe1f..0099d9d886 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -2584,15 +2584,10 @@ create_remotequery_plan(PlannerInfo *root, Path *best_path,
List *remote_scan_clauses = NIL;
List *local_scan_clauses = NIL;
StringInfoData sql;
- RelationLocInfo *rel_loc_info;
Query *query;
RangeTblRef *rtr;
List *varlist;
ListCell *varcell;
- Expr *distcol_expr = NULL;
- Datum distcol_value;
- bool distcol_isnull;
- Oid distcol_type;
Node *tmp_node;
List *rmlist;
List *tvarlist;
@@ -2760,10 +2755,6 @@ create_remotequery_plan(PlannerInfo *root, Path *best_path,
if (rmlist != NULL)
list_free_deep(rmlist);
- rel_loc_info = GetRelationLocInfo(rte->relid);
- if (!rel_loc_info)
- elog(ERROR, "No distribution information found for relid %d", rte->relid);
-
if (tlist_is_simple)
{
scan_plan = make_remotequery(tlist, local_scan_clauses, scan_relid);
@@ -2787,63 +2778,11 @@ create_remotequery_plan(PlannerInfo *root, Path *best_path,
scan_plan->has_row_marks = query->hasForUpdate;
scan_plan->sql_statement = sql.data;
- /*
- * If the table distributed by value, check if we can reduce the Datanodes
- * by looking at the qualifiers for this relation
- */
- if (IsLocatorDistributedByValue(rel_loc_info->locatorType))
- {
- Oid disttype = get_atttype(rte->relid, rel_loc_info->partAttrNum);
- int32 disttypmod = get_atttypmod(rte->relid, rel_loc_info->partAttrNum);
- distcol_expr = pgxc_find_distcol_expr(rtr->rtindex, rel_loc_info->partAttrNum,
- query->jointree->quals);
- /*
- * If the type of expression used to find the Datanode, is not same as
- * the distribution column type, try casting it. This is same as what
- * will happen in case of inserting that type of expression value as the
- * distribution column value.
- */
- if (distcol_expr)
- {
- distcol_expr = (Expr *)coerce_to_target_type(NULL,
- (Node *)distcol_expr,
- exprType((Node *)distcol_expr),
- disttype, disttypmod,
- COERCION_ASSIGNMENT,
- COERCE_IMPLICIT_CAST, -1);
- /*
- * PGXC_FQS_TODO: We should set the bound parameters here, but we don't have
- * PlannerInfo struct and we don't handle them right now.
- * Even if constant expression mutator changes the expression, it will
- * only simplify it, keeping the semantics same
- */
- distcol_expr = (Expr *)eval_const_expressions(NULL,
- (Node *)distcol_expr);
- }
- }
-
- if (distcol_expr && IsA(distcol_expr, Const))
- {
- Const *const_expr = (Const *)distcol_expr;
- distcol_value = const_expr->constvalue;
- distcol_isnull = const_expr->constisnull;
- distcol_type = const_expr->consttype;
- }
- else
- {
- distcol_value = (Datum) 0;
- distcol_isnull = true;
- distcol_type = InvalidOid;
- }
-
- scan_plan->exec_nodes = GetRelationNodes(rel_loc_info, distcol_value,
- distcol_isnull, distcol_type,
- RELATION_ACCESS_READ);
- Assert(scan_plan->exec_nodes);
- if (rel_loc_info)
- scan_plan->exec_nodes->baselocatortype = rel_loc_info->locatorType;
- else
- scan_plan->exec_nodes->baselocatortype = '\0';
+ scan_plan->exec_nodes = GetRelationNodesByQuals(rte->relid, rtr->rtindex,
+ query->jointree->quals,
+ RELATION_ACCESS_READ);
+ if (!scan_plan->exec_nodes)
+ elog(ERROR, "No distribution information found for relid %d", rte->relid);
copy_path_costsize(&scan_plan->scan.plan, best_path);
diff --git a/src/backend/pgxc/locator/locator.c b/src/backend/pgxc/locator/locator.c
index a6d1d287de..1bba86b379 100644
--- a/src/backend/pgxc/locator/locator.c
+++ b/src/backend/pgxc/locator/locator.c
@@ -28,6 +28,7 @@
#include "catalog/indexing.h"
#include "catalog/pg_type.h"
#include "nodes/pg_list.h"
+#include "nodes/nodeFuncs.h"
#include "utils/builtins.h"
#include "utils/catcache.h"
#include "utils/fmgroids.h"
@@ -37,6 +38,8 @@
#include "utils/tqual.h"
#include "utils/syscache.h"
#include "nodes/nodes.h"
+#include "optimizer/clauses.h"
+#include "parser/parse_coerce.h"
#include "pgxc/nodemgr.h"
#include "pgxc/locator.h"
#include "pgxc/pgxc.h"
@@ -47,6 +50,8 @@
#include "catalog/namespace.h"
#include "access/hash.h"
+static Expr *pgxc_find_distcol_expr(Index varno, PartAttrNumber partAttrNum,
+ Node *quals);
Oid primary_data_node = InvalidOid;
int num_preferred_data_nodes = 0;
@@ -655,6 +660,79 @@ GetRelationNodes(RelationLocInfo *rel_loc_info, Datum valueForDistCol,
return exec_nodes;
}
+/*
+ * GetRelationNodesByQuals
+ * A wrapper around GetRelationNodes to reduce the node list by looking at the
+ * quals. varno is assumed to be the varno of reloid inside the quals. No check
+ * is made to see if that's correct.
+ */
+ExecNodes *
+GetRelationNodesByQuals(Oid reloid, Index varno, Node *quals,
+ RelationAccessType relaccess)
+{
+ RelationLocInfo *rel_loc_info = GetRelationLocInfo(reloid);
+ Expr *distcol_expr = NULL;
+ ExecNodes *exec_nodes;
+ Datum distcol_value;
+ bool distcol_isnull;
+ Oid distcol_type;
+
+ if (!rel_loc_info)
+ return NULL;
+ /*
+ * If the table distributed by value, check if we can reduce the Datanodes
+ * by looking at the qualifiers for this relation
+ */
+ if (IsLocatorDistributedByValue(rel_loc_info->locatorType))
+ {
+ Oid disttype = get_atttype(reloid, rel_loc_info->partAttrNum);
+ int32 disttypmod = get_atttypmod(reloid, rel_loc_info->partAttrNum);
+ distcol_expr = pgxc_find_distcol_expr(varno, rel_loc_info->partAttrNum,
+ quals);
+ /*
+ * If the type of expression used to find the Datanode, is not same as
+ * the distribution column type, try casting it. This is same as what
+ * will happen in case of inserting that type of expression value as the
+ * distribution column value.
+ */
+ if (distcol_expr)
+ {
+ distcol_expr = (Expr *)coerce_to_target_type(NULL,
+ (Node *)distcol_expr,
+ exprType((Node *)distcol_expr),
+ disttype, disttypmod,
+ COERCION_ASSIGNMENT,
+ COERCE_IMPLICIT_CAST, -1);
+ /*
+ * PGXC_FQS_TODO: We should set the bound parameters here, but we don't have
+ * PlannerInfo struct and we don't handle them right now.
+ * Even if constant expression mutator changes the expression, it will
+ * only simplify it, keeping the semantics same
+ */
+ distcol_expr = (Expr *)eval_const_expressions(NULL,
+ (Node *)distcol_expr);
+ }
+ }
+
+ if (distcol_expr && IsA(distcol_expr, Const))
+ {
+ Const *const_expr = (Const *)distcol_expr;
+ distcol_value = const_expr->constvalue;
+ distcol_isnull = const_expr->constisnull;
+ distcol_type = const_expr->consttype;
+ }
+ else
+ {
+ distcol_value = (Datum) 0;
+ distcol_isnull = true;
+ distcol_type = InvalidOid;
+ }
+
+ exec_nodes = GetRelationNodes(rel_loc_info, distcol_value,
+ distcol_isnull, distcol_type,
+ relaccess);
+ return exec_nodes;
+}
/*
* ConvertToLocatorType
@@ -921,3 +999,99 @@ FreeExecNodes(ExecNodes **exec_nodes)
pfree(tmp_en);
*exec_nodes = NULL;
}
+
+/*
+ * pgxc_find_distcol_expr
+ * Search through the quals provided and find out an expression which will give
+ * us value of distribution column if exists in the quals. Say for a table
+ * tab1 (val int, val2 int) distributed by hash(val), a query "SELECT * FROM
+ * tab1 WHERE val = fn(x, y, z) and val2 = 3", fn(x,y,z) is the expression which
+ * decides the distribution column value in the rows qualified by this query.
+ * Hence return fn(x, y, z). But for a query "SELECT * FROM tab1 WHERE val =
+ * fn(x, y, z) || val2 = 3", there is no expression which decides the values
+ * distribution column val can take in the qualified rows. So, in such cases
+ * this function returns NULL.
+ */
+static Expr *
+pgxc_find_distcol_expr(Index varno, PartAttrNumber partAttrNum,
+ Node *quals)
+{
+ /* Convert the qualification into list of arguments of AND */
+ List *lquals = make_ands_implicit((Expr *)quals);
+ ListCell *qual_cell;
+ /*
+ * For every ANDed expression, check if that expression is of the form
+ * <distribution_col> = <expr>. If so return expr.
+ */
+ foreach(qual_cell, lquals)
+ {
+ Expr *qual_expr = (Expr *)lfirst(qual_cell);
+ OpExpr *op;
+ Expr *lexpr;
+ Expr *rexpr;
+ Var *var_expr;
+ Expr *distcol_expr;
+
+ if (!IsA(qual_expr, OpExpr))
+ continue;
+ op = (OpExpr *)qual_expr;
+ /* If not a binary operator, it can not be '='. */
+ if (list_length(op->args) != 2)
+ continue;
+
+ lexpr = linitial(op->args);
+ rexpr = lsecond(op->args);
+
+ /*
+ * If either of the operands is a RelabelType, extract the Var in the RelabelType.
+ * A RelabelType represents a "dummy" type coercion between two binary compatible datatypes.
+ * If we do not handle these then our optimization does not work in case of varchar
+ * For example if col is of type varchar and is the dist key then
+ * select * from vc_tab where col = 'abcdefghijklmnopqrstuvwxyz';
+ * should be shipped to one of the nodes only
+ */
+ if (IsA(lexpr, RelabelType))
+ lexpr = ((RelabelType*)lexpr)->arg;
+ if (IsA(rexpr, RelabelType))
+ rexpr = ((RelabelType*)rexpr)->arg;
+
+ /*
+ * If either of the operands is a Var expression, assume the other
+ * one is distribution column expression. If none is Var check next
+ * qual.
+ */
+ if (IsA(lexpr, Var))
+ {
+ var_expr = (Var *)lexpr;
+ distcol_expr = rexpr;
+ }
+ else if (IsA(rexpr, Var))
+ {
+ var_expr = (Var *)rexpr;
+ distcol_expr = lexpr;
+ }
+ else
+ continue;
+ /*
+ * If Var found is not the distribution column of required relation,
+ * check next qual
+ */
+ if (var_expr->varno != varno || var_expr->varattno != partAttrNum)
+ continue;
+ /*
+ * If the operator is not an assignment operator, check next
+ * constraint. An operator is an assignment operator if it's
+ * mergejoinable or hashjoinable. Beware that not every assignment
+ * operator is mergejoinable or hashjoinable, so we might leave some
+ * oportunity. But then we have to rely on the opname which may not
+ * be something we know to be equality operator as well.
+ */
+ if (!op_mergejoinable(op->opno, exprType((Node *)lexpr)) &&
+ !op_hashjoinable(op->opno, exprType((Node *)lexpr)))
+ continue;
+ /* Found the distribution column expression return it */
+ return distcol_expr;
+ }
+ /* Exhausted all quals, but no distribution column expression */
+ return NULL;
+}
diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c
index 59bfa14152..0a2831afda 100644
--- a/src/backend/pgxc/plan/planner.c
+++ b/src/backend/pgxc/plan/planner.c
@@ -33,7 +33,6 @@
#include "optimizer/planner.h"
#include "optimizer/tlist.h"
#include "parser/parse_agg.h"
-#include "parser/parse_coerce.h"
#include "parser/parse_func.h"
#include "parser/parse_relation.h"
#include "parser/parsetree.h"
@@ -1288,102 +1287,6 @@ pgxc_qual_hash_dist_equijoin(Relids varnos_1, Relids varnos_2, Oid distcol_type,
return false;
}
-/*
- * pgxc_find_distcol_expr
- * Search through the quals provided and find out an expression which will give
- * us value of distribution column if exists in the quals. Say for a table
- * tab1 (val int, val2 int) distributed by hash(val), a query "SELECT * FROM
- * tab1 WHERE val = fn(x, y, z) and val2 = 3", fn(x,y,z) is the expression which
- * decides the distribution column value in the rows qualified by this query.
- * Hence return fn(x, y, z). But for a query "SELECT * FROM tab1 WHERE val =
- * fn(x, y, z) || val2 = 3", there is no expression which decides the values
- * distribution column val can take in the qualified rows. So, in such cases
- * this function returns NULL.
- */
-Expr *
-pgxc_find_distcol_expr(Index varno, PartAttrNumber partAttrNum,
- Node *quals)
-{
- /* Convert the qualification into list of arguments of AND */
- List *lquals = make_ands_implicit((Expr *)quals);
- ListCell *qual_cell;
- /*
- * For every ANDed expression, check if that expression is of the form
- * <distribution_col> = <expr>. If so return expr.
- */
- foreach(qual_cell, lquals)
- {
- Expr *qual_expr = (Expr *)lfirst(qual_cell);
- OpExpr *op;
- Expr *lexpr;
- Expr *rexpr;
- Var *var_expr;
- Expr *distcol_expr;
-
- if (!IsA(qual_expr, OpExpr))
- continue;
- op = (OpExpr *)qual_expr;
- /* If not a binary operator, it can not be '='. */
- if (list_length(op->args) != 2)
- continue;
-
- lexpr = linitial(op->args);
- rexpr = lsecond(op->args);
-
- /*
- * If either of the operands is a RelabelType, extract the Var in the RelabelType.
- * A RelabelType represents a "dummy" type coercion between two binary compatible datatypes.
- * If we do not handle these then our optimization does not work in case of varchar
- * For example if col is of type varchar and is the dist key then
- * select * from vc_tab where col = 'abcdefghijklmnopqrstuvwxyz';
- * should be shipped to one of the nodes only
- */
- if (IsA(lexpr, RelabelType))
- lexpr = ((RelabelType*)lexpr)->arg;
- if (IsA(rexpr, RelabelType))
- rexpr = ((RelabelType*)rexpr)->arg;
-
- /*
- * If either of the operands is a Var expression, assume the other
- * one is distribution column expression. If none is Var check next
- * qual.
- */
- if (IsA(lexpr, Var))
- {
- var_expr = (Var *)lexpr;
- distcol_expr = rexpr;
- }
- else if (IsA(rexpr, Var))
- {
- var_expr = (Var *)rexpr;
- distcol_expr = lexpr;
- }
- else
- continue;
- /*
- * If Var found is not the distribution column of required relation,
- * check next qual
- */
- if (var_expr->varno != varno || var_expr->varattno != partAttrNum)
- continue;
- /*
- * If the operator is not an assignment operator, check next
- * constraint. An operator is an assignment operator if it's
- * mergejoinable or hashjoinable. Beware that not every assignment
- * operator is mergejoinable or hashjoinable, so we might leave some
- * oportunity. But then we have to rely on the opname which may not
- * be something we know to be equality operator as well.
- */
- if (!op_mergejoinable(op->opno, exprType((Node *)lexpr)) &&
- !op_hashjoinable(op->opno, exprType((Node *)lexpr)))
- continue;
- /* Found the distribution column expression return it */
- return distcol_expr;
- }
- /* Exhausted all quals, but no distribution column expression */
- return NULL;
-}
-
static bool VarAttrIsPartAttr(Var *var, List *rtable)
{
RangeTblEntry *rte = rt_fetch(var->varno, rtable);
@@ -1414,10 +1317,6 @@ pgxc_FQS_get_relation_nodes(RangeTblEntry *rte, Index varno, Query *query)
ExecNodes *rel_exec_nodes;
RelationAccessType rel_access;
RelationLocInfo *rel_loc_info;
- Expr *distcol_expr = NULL;
- Datum distcol_value;
- bool distcol_isnull;
- Oid distcol_type;
Assert(rte == rt_fetch(varno, (query->rtable)));
@@ -1445,67 +1344,25 @@ pgxc_FQS_get_relation_nodes(RangeTblEntry *rte, Index varno, Query *query)
break;
}
+
rel_loc_info = GetRelationLocInfo(rte->relid);
/* If we don't know about the distribution of relation, bail out */
if (!rel_loc_info)
return NULL;
/*
- * If the table distributed by value, check if we can reduce the Datanodes
- * by looking at the qualifiers for this relation.
+ * Find out the datanodes to execute this query on.
* PGXC_FQS_TODO: for now, we apply node reduction only when there is only
* one relation involved in the query. If there are multiple distributed
* tables in the query and we apply node reduction here, we may fail to ship
- * the entire join. We should some apply node reduction transitively.
+ * the entire join. We should apply node reduction transitively.
*/
- if (IsLocatorDistributedByValue(rel_loc_info->locatorType) &&
- list_length(query->rtable) == 1)
- {
- Oid disttype = get_atttype(rte->relid, rel_loc_info->partAttrNum);
- int32 disttypmod = get_atttypmod(rte->relid, rel_loc_info->partAttrNum);
- distcol_expr = pgxc_find_distcol_expr(varno, rel_loc_info->partAttrNum,
- query->jointree->quals);
- /*
- * If the type of expression used to find the Datanode, is not same as
- * the distribution column type, try casting it. This is same as what
- * will happen in case of inserting that type of expression value as the
- * distribution column value.
- */
- if (distcol_expr)
- {
- distcol_expr = (Expr *)coerce_to_target_type(NULL,
- (Node *)distcol_expr,
- exprType((Node *)distcol_expr),
- disttype, disttypmod,
- COERCION_ASSIGNMENT,
- COERCE_IMPLICIT_CAST, -1);
- /*
- * PGXC_FQS_TODO: We should set the bound parameters here, but we don't have
- * PlannerInfo struct and we don't handle them right now.
- * Even if constant expression mutator changes the expression, it will
- * only simplify it, keeping the semantics same
- */
- distcol_expr = (Expr *)eval_const_expressions(NULL,
- (Node *)distcol_expr);
- }
- }
-
- if (distcol_expr && IsA(distcol_expr, Const))
- {
- Const *const_expr = (Const *)distcol_expr;
- distcol_value = const_expr->constvalue;
- distcol_isnull = const_expr->constisnull;
- distcol_type = const_expr->consttype;
- }
+ if (list_length(query->rtable) == 1)
+ rel_exec_nodes = GetRelationNodesByQuals(rte->relid, varno,
+ query->jointree->quals, rel_access);
else
- {
- distcol_value = (Datum) 0;
- distcol_isnull = true;
- distcol_type = InvalidOid;
- }
-
- rel_exec_nodes = GetRelationNodes(rel_loc_info, distcol_value,
- distcol_isnull, distcol_type, rel_access);
+ rel_exec_nodes = GetRelationNodes(rel_loc_info, (Datum) 0,
+ true, InvalidOid, rel_access);
if (!rel_exec_nodes)
return NULL;
diff --git a/src/include/pgxc/locator.h b/src/include/pgxc/locator.h
index 021e30f9cf..3a98eee4cc 100644
--- a/src/include/pgxc/locator.h
+++ b/src/include/pgxc/locator.h
@@ -105,6 +105,8 @@ extern bool IsTableDistOnPrimary(RelationLocInfo *rel_loc_info);
extern ExecNodes *GetRelationNodes(RelationLocInfo *rel_loc_info, Datum valueForDistCol,
bool isValueNull, Oid typeOfValueForDistCol,
RelationAccessType accessType);
+extern ExecNodes *GetRelationNodesByQuals(Oid reloid, Index varno, Node *quals,
+ RelationAccessType relaccess);
extern bool IsHashColumn(RelationLocInfo *rel_loc_info, char *part_col_name);
extern bool IsHashColumnForRelId(Oid relid, char *part_col_name);
extern int GetRoundRobinNode(Oid relid);
diff --git a/src/include/pgxc/planner.h b/src/include/pgxc/planner.h
index e32f033682..6ebe223227 100644
--- a/src/include/pgxc/planner.h
+++ b/src/include/pgxc/planner.h
@@ -174,7 +174,5 @@ extern List *AddRemoteQueryNode(List *stmts, const char *queryString,
RemoteQueryExecType remoteExecType, bool is_temp);
extern bool pgxc_query_contains_temp_tables(List *queries);
extern bool pgxc_query_contains_utility(List *queries);
-extern Expr *pgxc_find_distcol_expr(Index varno, PartAttrNumber partAttrNum,
- Node *quals);
#endif /* PGXCPLANNER_H */