summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/commands/explain.c4
-rw-r--r--src/backend/nodes/copyfuncs.c18
-rw-r--r--src/backend/nodes/outfuncs.c26
-rw-r--r--src/backend/nodes/readfuncs.c59
-rw-r--r--src/backend/optimizer/path/costsize.c3
-rw-r--r--src/backend/optimizer/plan/createplan.c861
-rw-r--r--src/backend/optimizer/plan/planner.c3
-rw-r--r--src/backend/optimizer/plan/setrefs.c26
-rw-r--r--src/backend/optimizer/util/relnode.c18
-rw-r--r--src/backend/optimizer/util/var.c60
-rw-r--r--src/backend/parser/parse_relation.c10
-rw-r--r--src/backend/pgxc/plan/planner.c196
-rw-r--r--src/backend/pgxc/pool/execRemote.c19
-rw-r--r--src/backend/pgxc/pool/postgresql_fdw.c15
-rw-r--r--src/backend/utils/adt/ruleutils.c61
-rw-r--r--src/backend/utils/misc/guc.c10
-rw-r--r--src/include/nodes/parsenodes.h8
-rw-r--r--src/include/nodes/relation.h9
-rw-r--r--src/include/optimizer/cost.h3
-rw-r--r--src/include/optimizer/planmain.h3
-rw-r--r--src/include/optimizer/var.h3
-rw-r--r--src/include/pgxc/planner.h17
-rw-r--r--src/include/utils/builtins.h3
23 files changed, 1377 insertions, 58 deletions
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index b5d0a904d5..cd70302858 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -686,7 +686,11 @@ explain_outNode(StringInfo str,
Assert(rte->rtekind == RTE_RELATION);
/* We only show the rel name, not schema name */
+#ifdef PGXC
+ relname = rte->relname;
+#else
relname = get_rel_name(rte->relid);
+#endif
appendStringInfo(str, " on %s",
quote_identifier(relname));
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 08f8a712bd..78c5543dab 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -840,6 +840,17 @@ _copyRemoteQuery(RemoteQuery *from)
COPY_SCALAR_FIELD(read_only);
COPY_SCALAR_FIELD(force_autocommit);
+ COPY_STRING_FIELD(relname);
+ COPY_SCALAR_FIELD(remotejoin);
+ COPY_SCALAR_FIELD(reduce_level);
+ COPY_NODE_FIELD(base_tlist);
+ COPY_STRING_FIELD(outer_alias);
+ COPY_STRING_FIELD(inner_alias);
+ COPY_SCALAR_FIELD(outer_reduce_level);
+ COPY_SCALAR_FIELD(inner_reduce_level);
+ COPY_BITMAPSET_FIELD(outer_relids);
+ COPY_BITMAPSET_FIELD(inner_relids);
+
return newnode;
}
@@ -1836,6 +1847,13 @@ _copyRangeTblEntry(RangeTblEntry *from)
RangeTblEntry *newnode = makeNode(RangeTblEntry);
COPY_SCALAR_FIELD(rtekind);
+
+#ifdef PGXC
+ COPY_STRING_FIELD(relname);
+ if (from->reltupdesc)
+ newnode->reltupdesc = CreateTupleDescCopy(from->reltupdesc);
+#endif
+
COPY_SCALAR_FIELD(relid);
COPY_NODE_FIELD(subquery);
COPY_SCALAR_FIELD(jointype);
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index d79bd750d0..56af702d4e 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1502,6 +1502,9 @@ _outPlannerInfo(StringInfo str, PlannerInfo *node)
WRITE_BOOL_FIELD(hasHavingQual);
WRITE_BOOL_FIELD(hasPseudoConstantQuals);
WRITE_BOOL_FIELD(hasRecursion);
+#ifdef PGXC
+ WRITE_INT_FIELD(rs_alias_index);
+#endif
WRITE_INT_FIELD(wt_param_id);
}
@@ -2015,16 +2018,39 @@ _outSetOperationStmt(StringInfo str, SetOperationStmt *node)
static void
_outRangeTblEntry(StringInfo str, RangeTblEntry *node)
{
+#ifdef PGXC
+ int i;
+#endif
+
WRITE_NODE_TYPE("RTE");
/* put alias + eref first to make dump more legible */
WRITE_NODE_FIELD(alias);
WRITE_NODE_FIELD(eref);
WRITE_ENUM_FIELD(rtekind, RTEKind);
+#ifdef PGXC
+ WRITE_STRING_FIELD(relname);
+#endif
switch (node->rtekind)
{
case RTE_RELATION:
+#ifdef PGXC
+ /* write tuple descriptor */
+ appendStringInfo(str, " :tupdesc_natts %d (", node->reltupdesc->natts);
+
+ for (i = 0 ; i < node->reltupdesc->natts ; i++)
+ {
+ appendStringInfo(str, ":colname ");
+ _outToken(str, NameStr(node->reltupdesc->attrs[i]->attname));
+ appendStringInfo(str, " :coltypid %u ",
+ node->reltupdesc->attrs[i]->atttypid);
+ appendStringInfo(str, ":coltypmod %d ",
+ node->reltupdesc->attrs[i]->atttypmod);
+ }
+
+ appendStringInfo(str, ") ");
+ #endif
case RTE_SPECIAL:
WRITE_OID_FIELD(relid);
break;
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 98d3c4c9ef..4cae711aa4 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -31,7 +31,9 @@
#include "nodes/parsenodes.h"
#include "nodes/readfuncs.h"
-
+#ifdef PGXC
+#include "access/htup.h"
+#endif
/*
* Macros to simplify reading of different kinds of fields. Use these
@@ -1104,16 +1106,71 @@ _readFromExpr(void)
static RangeTblEntry *
_readRangeTblEntry(void)
{
+#ifdef PGXC
+ int natts, i;
+ char *colname;
+ Oid typid, typmod;
+#endif
+
READ_LOCALS(RangeTblEntry);
/* put alias + eref first to make dump more legible */
READ_NODE_FIELD(alias);
READ_NODE_FIELD(eref);
READ_ENUM_FIELD(rtekind, RTEKind);
+#ifdef PGXC
+ READ_STRING_FIELD(relname);
+#endif
switch (local_node->rtekind)
{
case RTE_RELATION:
+#ifdef PGXC
+ /* read tuple descriptor */
+ token = pg_strtok(&length); /* skip :tupdesc_natts */
+ token = pg_strtok(&length); /* get field value */
+
+ natts = atoi(token);
+
+ if (natts > 0 && natts <= MaxTupleAttributeNumber)
+ local_node->reltupdesc = CreateTemplateTupleDesc(natts, false);
+ else
+ elog(ERROR, "invalid node field to read");
+
+ token = pg_strtok(&length); /* skip '(' */
+
+ if (length == 1 && pg_strncasecmp(token, "(", length) == 0)
+ {
+ for (i = 0 ; i < natts ; i++)
+ {
+ token = pg_strtok(&length); /* skip :colname */
+ token = pg_strtok(&length); /* get colname */
+ colname = nullable_string(token, length);
+
+ if (colname == NULL)
+ elog(ERROR, "invalid node field to read");
+
+ token = pg_strtok(&length); /* skip :coltypid */
+ token = pg_strtok(&length); /* get typid */
+ typid = atooid(token);
+
+ token = pg_strtok(&length); /* skip :coltypmod */
+ token = pg_strtok(&length); /* get typmod */
+ typmod = atoi(token);
+
+ TupleDescInitEntry(local_node->reltupdesc,
+ (i + 1), colname, typid, typmod, 0);
+ }
+ }
+ else
+ elog(ERROR, "invalid node field to read");
+
+ token = pg_strtok(&length); /* skip '(' */
+
+ if (!(length == 1 && pg_strncasecmp(token, ")", length) == 0))
+ elog(ERROR, "invalid node field to read");
+#endif
+
case RTE_SPECIAL:
READ_OID_FIELD(relid);
break;
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 08f1d361ba..4182787b6f 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -109,6 +109,9 @@ bool enable_hashagg = true;
bool enable_nestloop = true;
bool enable_mergejoin = true;
bool enable_hashjoin = true;
+#ifdef PGXC
+bool enable_remotejoin = true;
+#endif
typedef struct
{
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 3d32f65e76..30084633e4 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -34,6 +34,11 @@
#include "parser/parsetree.h"
#ifdef PGXC
#include "pgxc/planner.h"
+#include "access/sysattr.h"
+#include "utils/builtins.h"
+#include "utils/syscache.h"
+#include "catalog/pg_proc.h"
+#include "executor/executor.h"
#endif
#include "utils/lsyscache.h"
@@ -72,6 +77,14 @@ static WorkTableScan *create_worktablescan_plan(PlannerInfo *root, Path *best_pa
#ifdef PGXC
static RemoteQuery *create_remotequery_plan(PlannerInfo *root, Path *best_path,
List *tlist, List *scan_clauses);
+static Plan *create_remotejoin_plan(PlannerInfo *root, JoinPath *best_path,
+ Plan *parent, Plan *outer_plan, Plan *inner_plan);
+static void create_remote_target_list(PlannerInfo *root,
+ StringInfo targets, List *out_tlist, List *in_tlist,
+ char *out_alias, int out_index,
+ char *in_alias, int in_index);
+static Alias *generate_remote_rte_alias(RangeTblEntry *rte, int varno,
+ char *aliasname, int reduce_level);
#endif
static NestLoop *create_nestloop_plan(PlannerInfo *root, NestPath *best_path,
Plan *outer_plan, Plan *inner_plan);
@@ -141,6 +154,14 @@ static Sort *make_sort(PlannerInfo *root, Plan *lefttree, int numCols,
double limit_tuples);
static Material *make_material(Plan *lefttree);
+#ifdef PGXC
+static void findReferencedVars(List *parent_vars, Plan *plan, List **out_tlist, Relids *out_relids);
+extern bool is_foreign_qual(Node *clause);
+static void create_remote_clause_expr(PlannerInfo *root, Plan *parent, StringInfo clauses,
+ List *qual, RemoteQuery *scan);
+static void create_remote_expr(PlannerInfo *root, Plan *parent, StringInfo expr,
+ Node *node, RemoteQuery *scan);
+#endif
/*
* create_plan
@@ -221,9 +242,6 @@ create_scan_plan(PlannerInfo *root, Path *best_path)
List *tlist;
List *scan_clauses;
Plan *plan;
-#ifdef PGXC
- Plan *matplan;
-#endif
/*
* For table scans, rather than using the relation targetlist (which is
@@ -445,9 +463,6 @@ disuse_physical_tlist(Plan *plan, Path *path)
case T_ValuesScan:
case T_CteScan:
case T_WorkTableScan:
-#ifdef PGXC
- case T_RemoteQuery:
-#endif
plan->targetlist = build_relation_tlist(path->parent);
break;
default:
@@ -557,9 +572,627 @@ create_join_plan(PlannerInfo *root, JoinPath *best_path)
get_actual_clauses(get_loc_restrictinfo(best_path))));
#endif
+#ifdef PGXC
+ /* check if this join can be reduced to an equiv. remote scan node */
+ plan = create_remotejoin_plan(root, best_path, plan, outer_plan, inner_plan);
+#endif
+
return plan;
}
+#ifdef PGXC
+/*
+ * create_remotejoin_plan
+ * check if the children plans involve remote entities from the same remote
+ * node. If so, this join can be reduced to an equivalent remote scan plan
+ * node
+ *
+ * RULES:
+ *
+ * * provide unique aliases to both inner and outer nodes to represent their
+ * corresponding subqueries
+ *
+ * * identify target entries from both inner and outer that appear in the join
+ * targetlist, only those need to be selected from these aliased subqueries
+ *
+ * * a join node has a joinqual list which represents the join condition. E.g.
+ * SELECT * from emp e LEFT JOIN emp2 d ON e.x = d.x
+ * Here the joinqual contains "e.x = d.x". If the joinqual itself has a local
+ * dependency, e.g "e.x = localfunc(d.x)", then this join cannot be reduced
+ *
+ * * other than the joinqual, the join node can contain additional quals. Even
+ * if they have any local dependencies, we can reduce the join and just
+ * append these quals into the reduced remote scan node. We DO do a pass to
+ * identify remote quals and ship those in the squery though
+ *
+ * * these quals (both joinqual and normal quals with no local dependencies)
+ * need to be converted into expressions referring to the aliases assigned to
+ * the nodes. These expressions will eventually become part of the squery of
+ * the reduced remote scan node
+ *
+ * * the children remote scan nodes themselves can have local dependencies in
+ * their quals (the remote ones are already part of the squery). We can still
+ * reduce the join and just append these quals into the reduced remote scan
+ * node
+ *
+ * * if we reached successfully so far, generate a new remote scan node with
+ * this new squery generated using the aliased references
+ *
+ * One important point to note here about targetlists is that this function
+ * does not set any DUMMY var references in the Var nodes appearing in it. It
+ * follows the standard mechanism as is followed by other nodes. Similar to the
+ * existing nodes, the references which point to DUMMY vars is done in
+ * set_remote_references() function in set_plan_references phase at the fag
+ * end. Avoiding such DUMMY references manipulations till the end also makes
+ * this code a lot much readable and easier.
+ */
+static Plan *
+create_remotejoin_plan(PlannerInfo *root, JoinPath *best_path, Plan *parent, Plan *outer_plan, Plan *inner_plan)
+{
+ NestLoop *nest_parent;
+
+ if (!enable_remotejoin)
+ return parent;
+
+ /* meh, what are these for :( */
+ if (root->hasPseudoConstantQuals)
+ return parent;
+
+ /* Works only for SELECT commands right now */
+ if (root->parse->commandType != CMD_SELECT)
+ return parent;
+
+ /* do not optimize CURSOR based select statements */
+ if (root->parse->rowMarks != NIL)
+ return parent;
+
+ /*
+ * optimize only simple NestLoop joins for now. Other joins like Merge and
+ * Hash can be reduced too. But they involve additional intermediate nodes
+ * and we need to understand them a bit more as yet
+ */
+ if (!IsA(parent, NestLoop))
+ return parent;
+ else
+ nest_parent = (NestLoop *)parent;
+
+ /* check if both the nodes qualify for reduction */
+ if (IsA(outer_plan, Material) &&
+ IsA(((Material *) outer_plan)->plan.lefttree, RemoteQuery) &&
+ IsA(inner_plan, Material) &&
+ IsA(((Material *) inner_plan)->plan.lefttree, RemoteQuery))
+ {
+ int i;
+ List *rtable_list = NIL;
+ bool partitioned_replicated_join = false;
+
+ Material *outer_mat = (Material *)outer_plan;
+ Material *inner_mat = (Material *)inner_plan;
+
+ RemoteQuery *outer = (RemoteQuery *)outer_mat->plan.lefttree;
+ RemoteQuery *inner = (RemoteQuery *)inner_mat->plan.lefttree;
+
+ /*
+ * Check if both these plans are from the same remote node. If yes,
+ * replace this JOIN along with it's two children with one equivalent
+ * remote node
+ */
+
+ /*
+ * Build up rtable for XC Walker
+ * (was not sure I could trust this, but it seems to work in various cases)
+ */
+ for (i = 0; i < root->simple_rel_array_size; i++)
+ {
+ RangeTblEntry *rte = root->simple_rte_array[i];
+
+ /* Check for NULL first, sometimes it is NULL at position 0 */
+ if (rte)
+ rtable_list = lappend(rtable_list, root->simple_rte_array[i]);
+ }
+
+ /* XXX Check if the join optimization is possible */
+ if (IsJoinReducible(inner, outer, rtable_list, best_path, &partitioned_replicated_join))
+ {
+ RemoteQuery *result;
+ Plan *result_plan;
+ StringInfoData targets, clauses, scan_clauses, fromlist;
+ StringInfoData squery;
+ List *parent_vars, *out_tlist = NIL, *in_tlist = NIL, *base_tlist;
+ ListCell *l;
+ char in_alias[15], out_alias[15];
+ Relids out_relids = NULL, in_relids = NULL;
+ bool use_where = false;
+ Index dummy_rtindex;
+ RangeTblEntry *dummy_rte;
+ List *local_scan_clauses = NIL, *remote_scan_clauses = NIL;
+ char *pname;
+
+
+ /* KISS! As long as distinct aliases are provided for all the objects in
+ * involved in query, remote server should not crib! */
+ sprintf(in_alias, "out_%d", root->rs_alias_index);
+ sprintf(out_alias, "in_%d", root->rs_alias_index);
+
+ /*
+ * Walk the left, right trees and identify which vars appear in the
+ * parent targetlist, only those need to be selected. Note that
+ * depending on whether the parent targetlist is top-level or
+ * intermediate, the children vars may or may not be referenced
+ * multiple times in it.
+ */
+ parent_vars = pull_var_clause((Node *)parent->targetlist, PVC_REJECT_PLACEHOLDERS);
+
+ findReferencedVars(parent_vars, outer_plan, &out_tlist, &out_relids);
+ findReferencedVars(parent_vars, inner_plan, &in_tlist, &in_relids);
+
+ /*
+ * If the JOIN ON clause has a local dependency then we cannot ship
+ * the join to the remote side at all, bail out immediately.
+ */
+ if (!is_foreign_qual((Node *)nest_parent->join.joinqual))
+ {
+ elog(DEBUG1, "cannot reduce: local dependencies in the joinqual");
+ return parent;
+ }
+
+ /*
+ * If the normal plan qual has local dependencies, the join can
+ * still be shipped. Try harder to ship remote clauses out of the
+ * entire list. These local quals will become part of the quals
+ * list of the reduced remote scan node down later.
+ */
+ if (!is_foreign_qual((Node *)nest_parent->join.plan.qual))
+ {
+ elog(DEBUG1, "local dependencies in the join plan qual");
+
+ /*
+ * trawl through each entry and come up with remote and local
+ * clauses... sigh
+ */
+ foreach(l, nest_parent->join.plan.qual)
+ {
+ Node *clause = lfirst(l);
+
+ /*
+ * if the currentof in the above call to
+ * clause_is_local_bound is set, somewhere in the list there
+ * is currentof clause, so keep that information intact and
+ * pass a dummy argument here.
+ */
+ if (!is_foreign_qual((Node *)clause))
+ local_scan_clauses = lappend(local_scan_clauses, clause);
+ else
+ remote_scan_clauses = lappend(remote_scan_clauses, clause);
+ }
+ }
+ else
+ {
+ /*
+ * there is no local bound clause, all the clauses are remote
+ * scan clauses
+ */
+ remote_scan_clauses = nest_parent->join.plan.qual;
+ }
+
+ /* generate the tlist for the new RemoteScan node using out_tlist, in_tlist */
+ initStringInfo(&targets);
+ create_remote_target_list(root, &targets, out_tlist, in_tlist,
+ out_alias, outer->reduce_level, in_alias, inner->reduce_level);
+
+ /*
+ * generate the fromlist now. The code has to appropriately mention
+ * the JOIN type in the string being generated.
+ */
+ initStringInfo(&fromlist);
+ appendStringInfo(&fromlist, " (%s) %s ",
+ outer->sql_statement, quote_identifier(out_alias));
+
+ use_where = false;
+ switch (nest_parent->join.jointype)
+ {
+ case JOIN_INNER:
+ pname = ", ";
+ use_where = true;
+ break;
+ case JOIN_LEFT:
+ pname = "LEFT JOIN";
+ break;
+ case JOIN_FULL:
+ pname = "FULL JOIN";
+ break;
+ case JOIN_RIGHT:
+ pname = "RIGHT JOIN";
+ break;
+ case JOIN_SEMI:
+ case JOIN_ANTI:
+ default:
+ return parent;
+ }
+
+ /*
+ * splendid! we can actually replace this join hierarchy with a
+ * single RemoteScan node now. Start off by constructing the
+ * appropriate new tlist and tupdescriptor
+ */
+ result = makeNode(RemoteQuery);
+
+ /*
+ * Save various information about the inner and the outer plans. We
+ * may need this information later if more entries are added to it
+ * as part of the remote expression optimization
+ */
+ result->remotejoin = true;
+ result->inner_alias = pstrdup(in_alias);
+ result->outer_alias = pstrdup(out_alias);
+ result->inner_reduce_level = inner->reduce_level;
+ result->outer_reduce_level = outer->reduce_level;
+ result->inner_relids = in_relids;
+ result->outer_relids = out_relids;
+
+ appendStringInfo(&fromlist, " %s (%s) %s",
+ pname, inner->sql_statement, quote_identifier(in_alias));
+
+ /* generate join.joinqual remote clause string representation */
+ initStringInfo(&clauses);
+ if (nest_parent->join.joinqual != NIL)
+ {
+ create_remote_clause_expr(root, parent, &clauses,
+ nest_parent->join.joinqual, result);
+ }
+
+ /* generate join.plan.qual remote clause string representation */
+ initStringInfo(&scan_clauses);
+ if (remote_scan_clauses != NIL)
+ {
+ create_remote_clause_expr(root, parent, &scan_clauses,
+ remote_scan_clauses, result);
+ }
+
+ /*
+ * set the base tlist of the involved base relations, useful in
+ * set_plan_refs later. Additionally the tupledescs should be
+ * generated using this base_tlist and not the parent targetlist.
+ * This is because we want to take into account any additional
+ * column references from the scan clauses too
+ */
+ base_tlist = add_to_flat_tlist(NIL, list_concat(out_tlist, in_tlist));
+
+ /* cook up the reltupdesc using this base_tlist */
+ dummy_rte = makeNode(RangeTblEntry);
+ dummy_rte->reltupdesc = ExecTypeFromTL(base_tlist, false);
+ dummy_rte->rtekind = RTE_RELATION;
+
+ /* use a dummy relname... */
+ dummy_rte->relname = "__FOREIGN_QUERY__";
+ dummy_rte->eref = makeAlias("__FOREIGN_QUERY__", NIL);
+ /* not sure if we need to set the below explicitly.. */
+ dummy_rte->inh = false;
+ dummy_rte->inFromCl = false;
+ dummy_rte->requiredPerms = 0;
+ dummy_rte->checkAsUser = 0;
+ dummy_rte->selectedCols = NULL;
+ dummy_rte->modifiedCols = NULL;
+
+ /*
+ * Append the dummy range table entry to the range table.
+ * Note that this modifies the master copy the caller passed us, otherwise
+ * e.g EXPLAIN VERBOSE will fail to find the rte the Vars built below refer
+ * to.
+ */
+ root->parse->rtable = lappend(root->parse->rtable, dummy_rte);
+ dummy_rtindex = list_length(root->parse->rtable);
+
+ result_plan = &result->scan.plan;
+
+ /* the join targetlist becomes this node's tlist */
+ result_plan->targetlist = parent->targetlist;
+ result_plan->lefttree = NULL;
+ result_plan->righttree = NULL;
+ result->scan.scanrelid = dummy_rtindex;
+
+ /* generate the squery for this node */
+
+ /* NOTE: it's assumed that the remote_paramNums array is
+ * filled in the same order as we create the query here.
+ *
+ * TODO: we need some way to ensure that the remote_paramNums
+ * is filled in the same order as the order in which the clauses
+ * are added in the query below.
+ */
+ initStringInfo(&squery);
+ appendStringInfo(&squery, "SELECT %s FROM %s", targets.data, fromlist.data);
+
+ if (clauses.data[0] != '\0')
+ appendStringInfo(&squery, " %s %s", use_where? " WHERE " : " ON ", clauses.data);
+
+ if (scan_clauses.data[0] != '\0')
+ appendStringInfo(&squery, " %s %s", use_where? " AND " : " WHERE ", scan_clauses.data);
+
+ result->sql_statement = squery.data;
+ /* don't forget to increment the index for the next time around! */
+ result->reduce_level = root->rs_alias_index++;
+
+
+ /* set_plan_refs needs this later */
+ result->base_tlist = base_tlist;
+ result->relname = "__FOREIGN_QUERY__";
+
+ result->partitioned_replicated = partitioned_replicated_join;
+
+ /*
+ * if there were any local scan clauses stick them up here. They
+ * can come from the join node or from remote scan node themselves.
+ * Because of the processing being done earlier in
+ * create_remotescan_plan, all of the clauses if present will be
+ * local ones and hence can be stuck without checking for
+ * remoteness again here into result_plan->qual
+ */
+ result_plan->qual = list_concat(result_plan->qual, outer_plan->qual);
+ result_plan->qual = list_concat(result_plan->qual, inner_plan->qual);
+ result_plan->qual = list_concat(result_plan->qual, local_scan_clauses);
+
+ /* we actually need not worry about costs since this is the final plan */
+ result_plan->startup_cost = outer_plan->startup_cost;
+ result_plan->total_cost = outer_plan->total_cost;
+ result_plan->plan_rows = outer_plan->plan_rows;
+ result_plan->plan_width = outer_plan->plan_width;
+
+ return (Plan *)make_material(result_plan);
+ }
+ }
+
+ return parent;
+}
+
+/*
+ * Generate aliases for columns of remote tables using the
+ * colname_varno_varattno_reduce_level nomenclature
+ */
+static Alias *
+generate_remote_rte_alias(RangeTblEntry *rte, int varno, char *aliasname, int reduce_level)
+{
+ TupleDesc tupdesc;
+ int maxattrs;
+ int varattno;
+ List *colnames = NIL;
+ StringInfo attr = makeStringInfo();
+
+ if (rte->rtekind != RTE_RELATION)
+ elog(ERROR, "called in improper context");
+
+ if (reduce_level == 0)
+ return makeAlias(aliasname, NIL);
+
+ tupdesc = rte->reltupdesc;
+ maxattrs = tupdesc->natts;
+
+ for (varattno = 0; varattno < maxattrs; varattno++)
+ {
+ Form_pg_attribute att = tupdesc->attrs[varattno];
+ Value *attrname;
+
+ resetStringInfo(attr);
+ appendStringInfo(attr, "%s_%d_%d_%d",
+ NameStr(att->attname), varno, varattno + 1, reduce_level);
+
+ attrname = makeString(pstrdup(attr->data));
+
+ colnames = lappend(colnames, attrname);
+ }
+
+ return makeAlias(aliasname, colnames);
+}
+
+/* create_remote_target_list
+ * generate a targetlist using out_alias and in_alias appropriately. It is
+ * possible that in case of multiple-hierarchy reduction, both sides can have
+ * columns with the same name. E.g. consider the following:
+ *
+ * select * from emp e join emp f on e.x = f.x, emp g;
+ *
+ * So if we just use new_alias.columnname it can
+ * very easily clash with other columnname from the same side of an already
+ * reduced join. To avoid this, we generate unique column aliases using the
+ * following convention:
+ * colname_varno_varattno_reduce_level_index
+ *
+ * Each RemoteScan node carries it's reduce_level index to indicate the
+ * convention that should be adopted while referring to it's columns. If the
+ * level is 0, then normal column names can be used because they will never
+ * clash at the join level
+ */
+static void
+create_remote_target_list(PlannerInfo *root, StringInfo targets, List *out_tlist, List *in_tlist,
+ char *out_alias, int out_index, char *in_alias, int in_index)
+{
+ int i = 0;
+ ListCell *l;
+ StringInfo attrname = makeStringInfo();
+ bool add_null_target = true;
+
+ foreach(l, out_tlist)
+ {
+ Var *var = (Var *) lfirst(l);
+ RangeTblEntry *rte = planner_rt_fetch(var->varno, root);
+ char *attname;
+
+
+ if (i++ > 0)
+ appendStringInfo(targets, ", ");
+
+ attname = get_rte_attribute_name(rte, var->varattno);
+
+ if (out_index)
+ {
+ resetStringInfo(attrname);
+ /* varattno can be negative for sys attributes, hence the abs! */
+ appendStringInfo(attrname, "%s_%d_%d_%d",
+ attname, var->varno, abs(var->varattno), out_index);
+ appendStringInfo(targets, "%s.%s",
+ quote_identifier(out_alias), quote_identifier(attrname->data));
+ }
+ else
+ appendStringInfo(targets, "%s.%s",
+ quote_identifier(out_alias), quote_identifier(attname));
+
+ /* generate the new alias now using root->rs_alias_index */
+ resetStringInfo(attrname);
+ appendStringInfo(attrname, "%s_%d_%d_%d",
+ attname, var->varno, abs(var->varattno), root->rs_alias_index);
+ appendStringInfo(targets, " AS %s", quote_identifier(attrname->data));
+ add_null_target = false;
+ }
+
+ foreach(l, in_tlist)
+ {
+ Var *var = (Var *) lfirst(l);
+ RangeTblEntry *rte = planner_rt_fetch(var->varno, root);
+ char *attname;
+
+ if (i++ > 0)
+ appendStringInfo(targets, ", ");
+
+ attname = get_rte_attribute_name(rte, var->varattno);
+
+ if (in_index)
+ {
+ resetStringInfo(attrname);
+ /* varattno can be negative for sys attributes, hence the abs! */
+ appendStringInfo(attrname, "%s_%d_%d_%d",
+ attname, var->varno, abs(var->varattno), in_index);
+ appendStringInfo(targets, "%s.%s",
+ quote_identifier(in_alias), quote_identifier(attrname->data));
+ }
+ else
+ appendStringInfo(targets, "%s.%s",
+ quote_identifier(in_alias), quote_identifier(attname));
+
+ /* generate the new alias now using root->rs_alias_index */
+ resetStringInfo(attrname);
+ appendStringInfo(attrname, "%s_%d_%d_%d",
+ attname, var->varno, abs(var->varattno), root->rs_alias_index);
+ appendStringInfo(targets, " AS %s", quote_identifier(attrname->data));
+ add_null_target = false;
+ }
+
+ /*
+ * It's possible that in some cases, the targetlist might not refer to any
+ * vars from the joined relations, eg.
+ * select count(*) from t1, t2; select const from t1, t2; etc
+ * For such cases just add a NULL selection into this targetlist
+ */
+ if (add_null_target)
+ appendStringInfo(targets, " NULL ");
+}
+
+/*
+ * create_remote_clause_expr
+ * generate a string to represent the clause list expression using out_alias
+ * and in_alias references. This function does a cute hack by temporarily
+ * modifying the rte->eref entries of the involved relations to point to
+ * out_alias and in_alias appropriately. The deparse_expression call then
+ * generates a string using these erefs which is exactly what is desired here.
+ *
+ * Additionally it creates aliases for the column references based on the
+ * reduce_level values too. This handles the case when both sides have same
+ * named columns..
+ *
+ * Obviously this function restores the eref, alias values to their former selves
+ * appropriately too, after use
+ */
+static void
+create_remote_clause_expr(PlannerInfo *root, Plan *parent, StringInfo clauses,
+ List *qual, RemoteQuery *scan)
+{
+ Node *node = (Node *) make_ands_explicit(qual);
+
+ return create_remote_expr(root, parent, clauses, node, scan);
+}
+
+static void
+create_remote_expr(PlannerInfo *root, Plan *parent, StringInfo expr,
+ Node *node, RemoteQuery *scan)
+{
+ List *context;
+ List *leref = NIL;
+ ListCell *cell;
+ char *exprstr;
+ int rtindex;
+ Relids tmprelids, relids;
+
+ relids = pull_varnos((Node *)node);
+
+ tmprelids = bms_copy(relids);
+
+ while ((rtindex = bms_first_member(tmprelids)) >= 0)
+ {
+ RangeTblEntry *rte = planner_rt_fetch(rtindex, root);
+
+ /*
+ * This rtindex should be a member of either out_relids or
+ * in_relids and never both
+ */
+ if (bms_is_member(rtindex, scan->outer_relids) &&
+ bms_is_member(rtindex, scan->inner_relids))
+ elog(ERROR, "improper relid references in the join clause list");
+
+ /*
+ * save the current rte->eref and rte->alias values and stick in a new
+ * one in the rte with the proper inner or outer alias
+ */
+ leref = lappend(leref, rte->eref);
+ leref = lappend(leref, rte->alias);
+
+ if (bms_is_member(rtindex, scan->outer_relids))
+ {
+ rte->eref = makeAlias(scan->outer_alias, NIL);
+
+ /* attach proper column aliases.. */
+ rte->alias = generate_remote_rte_alias(rte, rtindex,
+ scan->outer_alias, scan->outer_reduce_level);
+ }
+ if (bms_is_member(rtindex, scan->inner_relids))
+ {
+ rte->eref = makeAlias(scan->inner_alias, NIL);
+
+ /* attach proper column aliases.. */
+ rte->alias = generate_remote_rte_alias(rte, rtindex,
+ scan->inner_alias, scan->inner_reduce_level);
+ }
+ }
+ bms_free(tmprelids);
+
+ /* Set up deparsing context */
+ context = deparse_context_for_plan((Node *) parent,
+ NULL,
+ root->parse->rtable,
+ NULL);
+
+ exprstr = deparse_expression(node, context, true, false);
+
+ /* revert back the saved eref entries in the same order now! */
+ cell = list_head(leref);
+ tmprelids = bms_copy(relids);
+ while ((rtindex = bms_first_member(tmprelids)) >= 0)
+ {
+ RangeTblEntry *rte = planner_rt_fetch(rtindex, root);
+
+ Assert(cell != NULL);
+
+ rte->eref = lfirst(cell);
+ cell = lnext(cell);
+
+ rte->alias = lfirst(cell);
+ cell = lnext(cell);
+ }
+ bms_free(tmprelids);
+
+ appendStringInfo(expr, " %s", exprstr);
+ return;
+}
+#endif
+
/*
* create_append_plan
* Create an Append plan for 'best_path' and (recursively) plans
@@ -1583,9 +2216,23 @@ create_remotequery_plan(PlannerInfo *root, Path *best_path,
List *tlist, List *scan_clauses)
{
RemoteQuery *scan_plan;
+ bool prefix;
Index scan_relid = best_path->parent->relid;
RangeTblEntry *rte;
-
+ char *wherestr = NULL;
+ Bitmapset *varattnos = NULL;
+ List *remote_scan_clauses = NIL;
+ List *local_scan_clauses = NIL;
+ Oid nspid;
+ char *nspname;
+ char *relname;
+ const char *nspname_q;
+ const char *relname_q;
+ const char *aliasname_q;
+ int i;
+ TupleDesc tupdesc;
+ bool first;
+ StringInfoData sql;
Assert(scan_relid > 0);
rte = planner_rt_fetch(scan_relid, root);
@@ -1598,16 +2245,159 @@ create_remotequery_plan(PlannerInfo *root, Path *best_path,
/* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */
scan_clauses = extract_actual_clauses(scan_clauses, false);
+ if (scan_clauses)
+ {
+ ListCell *l;
+
+ foreach(l, (List *)scan_clauses)
+ {
+ Node *clause = lfirst(l);
+
+ if (is_foreign_qual(clause))
+ remote_scan_clauses = lappend(remote_scan_clauses, clause);
+ else
+ local_scan_clauses = lappend(local_scan_clauses, clause);
+ }
+ }
+
+ /*
+ * Incorporate any remote_scan_clauses into the WHERE clause that
+ * we intend to push to the remote server.
+ */
+ if (remote_scan_clauses)
+ {
+ char *sep = "";
+ ListCell *l;
+ StringInfoData buf;
+ List *deparse_context;
+
+ initStringInfo(&buf);
+
+ deparse_context = deparse_context_for_remotequery(
+ get_rel_name(rte->relid), rte->relid);
+
+ /*
+ * remote_scan_clauses is a list of scan clauses (restrictions) that we
+ * can push to the remote server. We want to deparse each of those
+ * expressions (that is, each member of the List) and AND them together
+ * into a WHERE clause.
+ */
+
+ foreach(l, (List *)remote_scan_clauses)
+ {
+ Node *clause = lfirst(l);
+
+ appendStringInfo(&buf, "%s", sep );
+ appendStringInfo(&buf, "%s", deparse_expression(clause, deparse_context, false, false));
+ sep = " AND ";
+ }
+
+ wherestr = buf.data;
+ }
+
+ /*
+ * Now walk through the target list and the scan clauses to get the
+ * interesting attributes. Only those attributes will be fetched from the
+ * remote side.
+ */
+ varattnos = pull_varattnos_varno((Node *) best_path->parent->reltargetlist, best_path->parent->relid,
+ varattnos);
+ varattnos = pull_varattnos_varno((Node *) local_scan_clauses,
+ best_path->parent->relid, varattnos);
+ /*
+ * Scanning multiple relations in a RemoteQuery node is not supported.
+ */
+ prefix = false;
+#if 0
+ prefix = list_length(estate->es_range_table) > 1;
+#endif
+
+ /* Get quoted names of schema, table and alias */
+ nspid = get_rel_namespace(rte->relid);
+ nspname = get_namespace_name(nspid);
+ relname = get_rel_name(rte->relid);
+ nspname_q = quote_identifier(nspname);
+ relname_q = quote_identifier(relname);
+ aliasname_q = quote_identifier(rte->eref->aliasname);
+
+ initStringInfo(&sql);
+
+ /* deparse SELECT clause */
+ appendStringInfo(&sql, "SELECT ");
+
+ /*
+ * TODO: omit (deparse to "NULL") columns which are not used in the
+ * original SQL.
+ *
+ * We must parse nodes parents of this RemoteQuery node to determine unused
+ * columns because some columns may be used only in parent Sort/Agg/Limit
+ * nodes.
+ */
+ tupdesc = best_path->parent->reltupdesc;
+ first = true;
+ for (i = 0; i < tupdesc->natts; i++)
+ {
+ /* skip dropped attributes */
+ if (tupdesc->attrs[i]->attisdropped)
+ continue;
+
+ if (!first)
+ appendStringInfoString(&sql, ", ");
+
+ if (bms_is_member(i + 1 - FirstLowInvalidHeapAttributeNumber, varattnos))
+ {
+ if (prefix)
+ appendStringInfo(&sql, "%s.%s",
+ aliasname_q, tupdesc->attrs[i]->attname.data);
+ else
+ appendStringInfo(&sql, "%s", tupdesc->attrs[i]->attname.data);
+ }
+ else
+ appendStringInfo(&sql, "%s", "NULL");
+ first = false;
+ }
+
+ /* if target list is composed only of system attributes, add dummy column */
+ if (first)
+ appendStringInfo(&sql, "NULL");
+
+ /* deparse FROM clause */
+ appendStringInfo(&sql, " FROM ");
+ /*
+ * XXX: should use GENERIC OPTIONS like 'foreign_relname' or something for
+ * the foreign table name instead of the local name ?
+ */
+ appendStringInfo(&sql, "%s.%s %s", nspname_q, relname_q, aliasname_q);
+ pfree(nspname);
+ pfree(relname);
+ if (nspname_q != nspname_q)
+ pfree((char *) nspname_q);
+ if (relname_q != relname_q)
+ pfree((char *) relname_q);
+ if (aliasname_q != rte->eref->aliasname)
+ pfree((char *) aliasname_q);
+
+ if (wherestr)
+ {
+ appendStringInfo(&sql, " WHERE ");
+ appendStringInfo(&sql, "%s", wherestr);
+ pfree(wherestr);
+ }
+
+ bms_free(varattnos);
+
scan_plan = make_remotequery(tlist,
rte,
- scan_clauses,
+ local_scan_clauses,
scan_relid);
+ scan_plan->sql_statement = sql.data;
+
copy_path_costsize(&scan_plan->scan.plan, best_path);
/* PGXCTODO - get better estimates */
scan_plan->scan.plan.plan_rows = 1000;
-
+
return scan_plan;
}
#endif
@@ -3745,3 +4535,56 @@ is_projection_capable_plan(Plan *plan)
}
return true;
}
+
+#ifdef PGXC
+/*
+ * findReferencedVars()
+ *
+ * Constructs a list of those Vars in targetlist which are found in
+ * parent_vars (in other words, the intersection of targetlist and
+ * parent_vars). Returns a new list in *out_tlist and a bitmap of
+ * those relids found in the result.
+ *
+ * Additionally do look at the qual references to other vars! They
+ * also need to be selected..
+ */
+static void
+findReferencedVars(List *parent_vars, Plan *plan, List **out_tlist, Relids *out_relids)
+{
+ List *vars;
+ Relids relids = NULL;
+ List *tlist = NIL;
+ ListCell *l;
+
+ /* Pull vars from both the targetlist and the clauses attached to this plan */
+ vars = pull_var_clause((Node *)plan->targetlist, PVC_REJECT_PLACEHOLDERS);
+
+ foreach(l, vars)
+ {
+ Var *var = lfirst(l);
+
+ if (search_tlist_for_var(var, parent_vars))
+ tlist = lappend(tlist, var);
+
+ if (!bms_is_member(var->varno, relids))
+ relids = bms_add_member(relids, var->varno);
+ }
+
+ /* now consider the local quals */
+ vars = pull_var_clause((Node *)plan->qual, PVC_REJECT_PLACEHOLDERS);
+
+ foreach(l, vars)
+ {
+ Var *var = lfirst(l);
+
+ if (search_tlist_for_var(var, tlist) == NULL)
+ tlist = lappend(tlist, var);
+
+ if (!bms_is_member(var->varno, relids))
+ relids = bms_add_member(relids, var->varno);
+ }
+
+ *out_tlist = tlist;
+ *out_relids = relids;
+}
+#endif
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 62893e0eef..87864876a9 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -301,6 +301,9 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
root->eq_classes = NIL;
root->append_rel_list = NIL;
+#ifdef PGXC
+ root->rs_alias_index = 1;
+#endif
root->hasRecursion = hasRecursion;
if (hasRecursion)
root->wt_param_id = SS_assign_worktable_param(root);
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index cbee7e9e9b..54d460238d 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -1382,6 +1382,32 @@ search_indexed_tlist_for_non_var(Node *node,
return NULL; /* no match */
}
+#ifdef PGXC
+/*
+ * search_tlist_for_var --- find a Var in the provided tlist. This does a
+ * basic scan through the list. So not very efficient...
+ *
+ * If no match, return NULL.
+ *
+ */
+Var *
+search_tlist_for_var(Var *var, List *jtlist)
+{
+ Index varno = var->varno;
+ AttrNumber varattno = var->varattno;
+ ListCell *l;
+
+ foreach(l, jtlist)
+ {
+ Var *listvar = (Var *) lfirst(l);
+
+ if (listvar->varno == varno && listvar->varattno == varattno)
+ return var;
+ }
+ return NULL; /* no match */
+}
+#endif
+
/*
* fix_join_expr
* Create a new set of targetlist entries or join qual clauses by
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 4ca3eeaaf2..ce62c8032f 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -92,12 +92,30 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
rel->index_outer_relids = NULL;
rel->index_inner_paths = NIL;
+#ifdef PGXC
+ rel->reltupdesc = rte->reltupdesc;
+#endif
+
/* Check type of rtable entry */
switch (rte->rtekind)
{
case RTE_RELATION:
/* Table --- retrieve statistics from the system catalogs */
get_relation_info(root, rte->relid, rte->inh, rel);
+#ifdef PGXC
+ /*
+ * This is a remote table... we have no idea how many pages/rows
+ * we may get from a scan of this table. However, we should set the
+ * costs in such a manner that cheapest paths should pick up the
+ * ones involving these remote rels
+ *
+ * These allow for maximum query shipping to the remote
+ * side later during the planning phase
+ */
+ rel->pages = 1;
+ rel->tuples = 1;
+ rel->rows = 1;
+#endif
break;
case RTE_SUBQUERY:
case RTE_FUNCTION:
diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c
index deb9ef8ebd..2132156648 100644
--- a/src/backend/optimizer/util/var.c
+++ b/src/backend/optimizer/util/var.c
@@ -34,6 +34,14 @@ typedef struct
int sublevels_up;
} pull_varnos_context;
+#ifdef PGXC
+typedef struct
+{
+ Index varno;
+ Bitmapset *varattnos;
+} pull_varattnos_context;
+#endif
+
typedef struct
{
int var_location;
@@ -68,6 +76,10 @@ typedef struct
static bool pull_varnos_walker(Node *node,
pull_varnos_context *context);
static bool pull_varattnos_walker(Node *node, Bitmapset **varattnos);
+#ifdef PGXC
+static bool pull_varattnos_varno_walker(Node *node,
+ pull_varattnos_context *context);
+#endif
static bool contain_var_clause_walker(Node *node, void *context);
static bool contain_vars_of_level_walker(Node *node, int *sublevels_up);
static bool locate_var_of_level_walker(Node *node,
@@ -228,6 +240,54 @@ contain_var_clause(Node *node)
return contain_var_clause_walker(node, NULL);
}
+#ifdef PGXC
+/*
+ * pull_varattnos_varno
+ * Find all the distinct attribute numbers present in an expression tree,
+ * and add them to the initial contents of *varattnos.
+ *
+ * Attribute numbers are offset by FirstLowInvalidHeapAttributeNumber so that
+ * we can include system attributes (e.g., OID) in the bitmap representation.
+ *
+ * This is same as pull_varattnos except for the fact that it gets attributes
+ * for the given varno
+ */
+Bitmapset *
+pull_varattnos_varno(Node *node, Index varno, Bitmapset *varattnos)
+{
+ pull_varattnos_context context;
+
+ context.varno = varno;
+ context.varattnos = varattnos;
+
+ (void) pull_varattnos_varno_walker(node, &context);
+
+ return context.varattnos;
+}
+
+static bool
+pull_varattnos_varno_walker(Node *node, pull_varattnos_context *context)
+{
+ if (node == NULL)
+ return false;
+
+ Assert(context != NULL);
+
+ if (IsA(node, Var))
+ {
+ Var *var = (Var *) node;
+
+ if (var->varno == context->varno)
+ context->varattnos = bms_add_member(context->varattnos,
+ var->varattno - FirstLowInvalidHeapAttributeNumber);
+ return false;
+ }
+
+ return expression_tree_walker(node, pull_varattnos_varno_walker,
+ (void *) context);
+}
+#endif
+
static bool
contain_var_clause_walker(Node *node, void *context)
{
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index b506c042c5..df4b4fa8f7 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -924,6 +924,11 @@ addRangeTableEntry(ParseState *pstate,
rel = parserOpenTable(pstate, relation, lockmode);
rte->relid = RelationGetRelid(rel);
+#ifdef PGXC
+ rte->reltupdesc = CreateTupleDescCopyConstr(rel->rd_att);
+ rte->relname = RelationGetRelationName(rel);
+#endif
+
/*
* Build the list of effective column names using user-supplied aliases
* and/or actual column names.
@@ -986,6 +991,11 @@ addRangeTableEntryForRelation(ParseState *pstate,
rte->alias = alias;
rte->relid = RelationGetRelid(rel);
+#ifdef PGXC
+ rte->reltupdesc = CreateTupleDescCopyConstr(rel->rd_att);
+ rte->relname = RelationGetRelationName(rel);
+#endif
+
/*
* Build the list of effective column names using user-supplied aliases
* and/or actual column names.
diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c
index 29e4ee035a..e678a14fc1 100644
--- a/src/backend/pgxc/plan/planner.c
+++ b/src/backend/pgxc/plan/planner.c
@@ -87,7 +87,8 @@ typedef struct
/* If two relations are joined based on special location information */
typedef enum PGXCJoinType
{
- JOIN_REPLICATED,
+ JOIN_REPLICATED_ONLY,
+ JOIN_REPLICATED_PARTITIONED,
JOIN_COLOCATED_PARTITIONED,
JOIN_OTHER
} PGXCJoinType;
@@ -144,6 +145,7 @@ static ExecNodes *get_plan_nodes(Query *query, bool isRead);
static bool get_plan_nodes_walker(Node *query_node, XCWalkerContext *context);
static bool examine_conditions_walker(Node *expr_node, XCWalkerContext *context);
static int handle_limit_offset(RemoteQuery *query_step, Query *query, PlannedStmt *plan_stmt);
+static void InitXCWalkerContext(XCWalkerContext *context);
/*
* True if both lists contain only one node and are the same
@@ -693,15 +695,20 @@ examine_conditions_walker(Node *expr_node, XCWalkerContext *context)
if (rel_loc_info1->locatorType == LOCATOR_TYPE_REPLICATED)
{
+
/* add to replicated join conditions */
context->conditions->replicated_joins =
- lappend(context->conditions->replicated_joins, opexpr);
+ lappend(context->conditions->replicated_joins, pgxc_join);
if (colvar->varlevelsup != colvar2->varlevelsup)
context->multilevel_join = true;
- if (rel_loc_info2->locatorType != LOCATOR_TYPE_REPLICATED)
+ if (rel_loc_info2->locatorType == LOCATOR_TYPE_REPLICATED)
+ pgxc_join->join_type = JOIN_REPLICATED_ONLY;
+ else
{
+ pgxc_join->join_type = JOIN_REPLICATED_PARTITIONED;
+
/* Note other relation, saves us work later. */
context->conditions->base_rel_name = column_base2->relname;
context->conditions->base_rel_loc_info = rel_loc_info2;
@@ -717,23 +724,21 @@ examine_conditions_walker(Node *expr_node, XCWalkerContext *context)
FreeRelationLocInfo(rel_loc_info2);
}
- /* note nature of join between the two relations */
- pgxc_join->join_type = JOIN_REPLICATED;
return false;
}
else if (rel_loc_info2->locatorType == LOCATOR_TYPE_REPLICATED)
{
+ /* note nature of join between the two relations */
+ pgxc_join->join_type = JOIN_REPLICATED_PARTITIONED;
+
/* add to replicated join conditions */
context->conditions->replicated_joins =
- lappend(context->conditions->replicated_joins, opexpr);
+ lappend(context->conditions->replicated_joins, pgxc_join);
/* other relation not replicated, note it for later */
context->conditions->base_rel_name = column_base->relname;
context->conditions->base_rel_loc_info = rel_loc_info1;
- /* note nature of join between the two relations */
- pgxc_join->join_type = JOIN_REPLICATED;
-
if (rel_loc_info2)
FreeRelationLocInfo(rel_loc_info2);
@@ -1259,6 +1264,23 @@ get_plan_nodes_walker(Node *query_node, XCWalkerContext *context)
return false;
}
+/*
+ * Set initial values for expression walker
+ */
+static void
+InitXCWalkerContext(XCWalkerContext *context)
+{
+ context->isRead = true;
+ context->exec_nodes = NULL;
+ context->conditions = (Special_Conditions *) palloc0(sizeof(Special_Conditions));
+ context->rtables = NIL;
+ context->multilevel_join = false;
+ context->varno = 0;
+ context->within_or = false;
+ context->within_not = false;
+ context->exec_on_coord = false;
+ context->join_list = NIL;
+}
/*
* Top level entry point before walking query to determine plan nodes
@@ -1271,18 +1293,9 @@ get_plan_nodes(Query *query, bool isRead)
XCWalkerContext context;
- context.query = query;
+ InitXCWalkerContext(&context);
context.isRead = isRead;
- context.exec_nodes = NULL;
- context.conditions = (Special_Conditions *) palloc0(sizeof(Special_Conditions));
- context.rtables = NIL;
context.rtables = lappend(context.rtables, query->rtable);
- context.multilevel_join = false;
- context.varno = 0;
- context.within_or = false;
- context.within_not = false;
- context.exec_on_coord = false;
- context.join_list = NIL;
if (!get_plan_nodes_walker((Node *) query, &context))
result_nodes = context.exec_nodes;
@@ -2315,3 +2328,148 @@ free_query_step(RemoteQuery *query_step)
list_free_deep(query_step->simple_aggregates);
pfree(query_step);
}
+
+
+/*
+ * See if we can reduce the passed in RemoteQuery nodes to a single step.
+ *
+ * We need to check when we can further collapse already collapsed nodes.
+ * We cannot always collapse- we do not want to allow a replicated table
+ * to be used twice. That is if we have
+ *
+ * partitioned_1 -- replicated -- partitioned_2
+ *
+ * partitioned_1 and partitioned_2 cannot (usually) be safely joined only
+ * locally.
+ * We can do this by checking (may need tracking) what type it is,
+ * and looking at context->conditions->replicated_joins
+ *
+ * The following cases are possible, and whether or not it is ok
+ * to reduce.
+ *
+ * If the join between the two RemoteQuery nodes is replicated
+ *
+ * Node 1 Node 2
+ * rep-part folded rep-part folded ok to reduce?
+ * 0 0 0 1 1
+ * 0 0 1 1 1
+ * 0 1 0 1 1
+ * 0 1 1 1 1
+ * 1 1 1 1 0
+ *
+ *
+ * If the join between the two RemoteQuery nodes is replicated - partitioned
+ *
+ * Node 1 Node 2
+ * rep-part folded rep-part folded ok to reduce?
+ * 0 0 0 1 1
+ * 0 0 1 1 0
+ * 0 1 0 1 1
+ * 0 1 1 1 0
+ * 1 1 1 1 0
+ *
+ *
+ * If the join between the two RemoteQuery nodes is partitioned - partitioned
+ * it is always reducibile safely,
+ *
+ * RemoteQuery *innernode - the inner node
+ * RemoteQuery *outernode - the outer node
+ * bool *partitioned_replicated - set to true if we have a partitioned-replicated
+ * join. We want to use replicated tables with non-replicated
+ * tables ony once. Only use this value if this function
+ * returns true.
+ */
+bool
+IsJoinReducible(RemoteQuery *innernode, RemoteQuery *outernode,
+ List *rtable_list, JoinPath *join_path, bool *partitioned_replicated)
+{
+ XCWalkerContext context;
+ ListCell *cell;
+ bool maybe_reducible = false;
+ bool result = false;
+
+
+ *partitioned_replicated = false;
+
+ InitXCWalkerContext(&context);
+ context.isRead = true; /* PGXCTODO - determine */
+ context.rtables = NIL;
+ context.rtables = lappend(context.rtables, rtable_list); /* add to list of lists */
+
+
+
+ foreach(cell, join_path->joinrestrictinfo)
+ {
+ RestrictInfo *node = (RestrictInfo *) lfirst(cell);
+
+ /*
+ * Check if we can fold these safely.
+ *
+ * If examine_conditions_walker() returns true,
+ * then it definitely is not collapsable.
+ * If it returns false, it may or may not be, we have to check
+ * context.conditions at the end.
+ * We keep trying, because another condition may fulfill the criteria.
+ */
+ maybe_reducible = !examine_conditions_walker((Node *) node->clause, &context);
+
+ if (!maybe_reducible)
+ break;
+
+ }
+
+ /* check to see if we found any partitioned or replicated joins */
+ if (maybe_reducible &&
+ (context.conditions->partitioned_parent_child
+ || context.conditions->replicated_joins))
+ {
+ /*
+ * If we get here, we think that we can fold the
+ * RemoteQuery nodes into a single one.
+ */
+ result = true;
+
+ /* Check replicated-replicated and replicated-partitioned joins */
+ if (context.conditions->replicated_joins)
+ {
+ ListCell *cell;
+
+ /* if we already reduced with replicated tables already, we
+ * cannot here.
+ * PGXCTODO - handle more cases and use outer_relids and inner_relids
+ * For now we just give up.
+ */
+ if ((innernode->remotejoin && innernode->partitioned_replicated) &&
+ (outernode->remotejoin && outernode->partitioned_replicated))
+ {
+ /* not reducible after all */
+ return false;
+ }
+
+ foreach(cell, context.conditions->replicated_joins)
+ {
+ PGXC_Join *pgxc_join = (PGXC_Join *) lfirst(cell);
+
+ if (pgxc_join->join_type == JOIN_REPLICATED_PARTITIONED)
+ {
+ *partitioned_replicated = true;
+
+ /*
+ * If either of these already have such a join, we do not
+ * want to add it a second time.
+ */
+ if ((innernode->remotejoin && innernode->partitioned_replicated) ||
+ (outernode->remotejoin && outernode->partitioned_replicated))
+ {
+ /* not reducible after all */
+ return false;
+ }
+ }
+ }
+ }
+ }
+
+ return result;
+}
+
+
diff --git a/src/backend/pgxc/pool/execRemote.c b/src/backend/pgxc/pool/execRemote.c
index 14dce33cee..7fe08beacb 100644
--- a/src/backend/pgxc/pool/execRemote.c
+++ b/src/backend/pgxc/pool/execRemote.c
@@ -2388,20 +2388,6 @@ ExecInitRemoteQuery(RemoteQuery *node, EState *estate, int eflags)
ExecInitScanTupleSlot(estate, &remotestate->ss);
- /*
- * Initialize scan relation. get the relation object id from the
- * relid'th entry in the range table, open that relation and acquire
- * appropriate lock on it.
- * This is needed for deparseSQL
- * We should remove these lines once we plan and deparse earlier.
- */
- if (!node->is_single_step)
- {
- currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid);
- remotestate->ss.ss_currentRelation = currentRelation;
- ExecAssignScanType(&remotestate->ss, RelationGetDescr(currentRelation));
- }
-
remotestate->ss.ps.ps_TupFromTlist = false;
/*
@@ -2723,11 +2709,6 @@ ExecRemoteQuery(RemoteQueryState *node)
errmsg("Could not begin transaction on data nodes.")));
}
- /* Get the SQL string */
- /* only do if not single step */
- if (!step->is_single_step)
- step->sql_statement = deparseSql(node);
-
/* See if we have a primary node, execute on it first before the others */
if (primaryconnection)
{
diff --git a/src/backend/pgxc/pool/postgresql_fdw.c b/src/backend/pgxc/pool/postgresql_fdw.c
index dabf5da0aa..14c0ddbe85 100644
--- a/src/backend/pgxc/pool/postgresql_fdw.c
+++ b/src/backend/pgxc/pool/postgresql_fdw.c
@@ -45,7 +45,7 @@
/* deparse SQL from the request */
bool is_immutable_func(Oid funcid);
-static bool is_foreign_qual(ExprState *state);
+bool is_foreign_qual(Node *node);
static bool foreign_qual_walker(Node *node, void *context);
char *deparseSql(RemoteQueryState *scanstate);
@@ -103,10 +103,10 @@ is_immutable_func(Oid funcid)
* local server in the foreign server.
* - scalar array operator (ANY/ALL)
*/
-static bool
-is_foreign_qual(ExprState *state)
+bool
+is_foreign_qual(Node *node)
{
- return !foreign_qual_walker((Node *) state->expr, NULL);
+ return !foreign_qual_walker(node, NULL);
}
/*
@@ -120,6 +120,9 @@ foreign_qual_walker(Node *node, void *context)
switch (nodeTag(node))
{
+ case T_ExprState:
+ return foreign_qual_walker((Node *) ((ExprState *) node)->expr, NULL);
+
case T_Param:
/* TODO: pass internal parameters to the foreign server */
if (((Param *) node)->paramkind != PARAM_EXTERN)
@@ -286,7 +289,7 @@ elog(DEBUG2, "%s(%u) called", __FUNCTION__, __LINE__);
{
ExprState *state = lfirst(lc);
- if (is_foreign_qual(state))
+ if (is_foreign_qual((Node *) state))
{
elog(DEBUG1, "foreign qual: %s", nodeToString(state->expr));
foreign_qual = lappend(foreign_qual, state);
@@ -317,7 +320,7 @@ elog(DEBUG2, "%s(%u) called", __FUNCTION__, __LINE__);
Node *node;
node = (Node *) make_ands_explicit(foreign_expr);
appendStringInfo(&sql, " WHERE ");
- appendStringInfo(&sql,
+ appendStringInfo(&sql, "%s",
deparse_expression(node, context, prefix, false));
/*
* The contents of the list MUST NOT be free-ed because they are
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index f52bf8b5a1..646149594e 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -114,6 +114,8 @@ typedef struct
List *subplans; /* List of subplans, in plan-tree case */
Plan *outer_plan; /* OUTER subplan, or NULL if none */
Plan *inner_plan; /* INNER subplan, or NULL if none */
+
+ bool remotequery; /* deparse context for remote query */
} deparse_namespace;
@@ -1934,10 +1936,42 @@ deparse_context_for(const char *aliasname, Oid relid)
dpns->ctes = NIL;
dpns->subplans = NIL;
dpns->outer_plan = dpns->inner_plan = NULL;
+#ifdef PGXC
+ dpns->remotequery = false;
+#endif
+
+ /* Return a one-deep namespace stack */
+ return list_make1(dpns);
+}
+
+#ifdef PGXC
+List *
+deparse_context_for_remotequery(const char *aliasname, Oid relid)
+{
+ deparse_namespace *dpns;
+ RangeTblEntry *rte;
+
+ dpns = (deparse_namespace *) palloc(sizeof(deparse_namespace));
+
+ /* Build a minimal RTE for the rel */
+ rte = makeNode(RangeTblEntry);
+ rte->rtekind = RTE_RELATION;
+ rte->relid = relid;
+ rte->eref = makeAlias(aliasname, NIL);
+ rte->inh = false;
+ rte->inFromCl = true;
+
+ /* Build one-element rtable */
+ dpns->rtable = list_make1(rte);
+ dpns->ctes = NIL;
+ dpns->subplans = NIL;
+ dpns->outer_plan = dpns->inner_plan = NULL;
+ dpns->remotequery = true;
/* Return a one-deep namespace stack */
return list_make1(dpns);
}
+#endif
/*
* deparse_context_for_plan - Build deparse context for a plan node
@@ -1972,7 +2006,9 @@ deparse_context_for_plan(Node *plan, Node *outer_plan,
dpns->rtable = rtable;
dpns->ctes = NIL;
dpns->subplans = subplans;
-
+#ifdef PGXC
+ dpns->remotequery = false;
+#endif
/*
* Set up outer_plan and inner_plan from the Plan node (this includes
* various special cases for particular Plan types).
@@ -2136,7 +2172,9 @@ make_ruledef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
dpns.ctes = query->cteList;
dpns.subplans = NIL;
dpns.outer_plan = dpns.inner_plan = NULL;
-
+#ifdef PGXC
+ dpns.remotequery = false;
+#endif
get_rule_expr(qual, &context, false);
}
@@ -2283,7 +2321,9 @@ get_query_def(Query *query, StringInfo buf, List *parentnamespace,
dpns.ctes = query->cteList;
dpns.subplans = NIL;
dpns.outer_plan = dpns.inner_plan = NULL;
-
+#ifdef PGXC
+ dpns.remotequery = false;
+#endif
switch (query->commandType)
{
case CMD_SELECT:
@@ -3377,6 +3417,14 @@ get_variable(Var *var, int levelsup, bool showstar, deparse_context *context)
* likely that varno is OUTER or INNER, in which case we must dig down
* into the subplans.
*/
+#ifdef PGXC
+ if (dpns->remotequery)
+ {
+ rte = rt_fetch(1, dpns->rtable);
+ attnum = var->varattno;
+ }
+ else
+#endif
if (var->varno >= 1 && var->varno <= list_length(dpns->rtable))
{
rte = rt_fetch(var->varno, dpns->rtable);
@@ -3703,6 +3751,9 @@ get_name_for_var_field(Var *var, int fieldno,
mydpns.ctes = rte->subquery->cteList;
mydpns.subplans = NIL;
mydpns.outer_plan = mydpns.inner_plan = NULL;
+#ifdef PGXC
+ mydpns.remotequery = false;
+#endif
context->namespaces = lcons(&mydpns,
context->namespaces);
@@ -3826,7 +3877,9 @@ get_name_for_var_field(Var *var, int fieldno,
mydpns.ctes = ctequery->cteList;
mydpns.subplans = NIL;
mydpns.outer_plan = mydpns.inner_plan = NULL;
-
+#ifdef PGXC
+ mydpns.remotequery = false;
+#endif
new_nslist = list_copy_tail(context->namespaces,
ctelevelsup);
context->namespaces = lcons(&mydpns, new_nslist);
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 968126190e..fd2eba0adc 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -686,6 +686,16 @@ static struct config_bool ConfigureNamesBool[] =
&enable_hashjoin,
true, NULL, NULL
},
+#ifdef PGXC
+ {
+ {"enable_remotejoin", PGC_USERSET, QUERY_TUNING_METHOD,
+ gettext_noop("Enables the planner's use of remote join plans."),
+ NULL
+ },
+ &enable_remotejoin,
+ true, NULL, NULL
+ },
+#endif
{
{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
gettext_noop("Enables genetic query optimization."),
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 6250de3774..355c44847e 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -24,6 +24,9 @@
#include "nodes/bitmapset.h"
#include "nodes/primnodes.h"
#include "nodes/value.h"
+#ifdef PGXC
+#include "access/tupdesc.h"
+#endif
/* Possible sources of a Query */
typedef enum QuerySource
@@ -660,6 +663,11 @@ typedef struct RangeTblEntry
* code that is being actively worked on. FIXME someday.
*/
+#ifdef PGXC
+ char *relname;
+ TupleDesc reltupdesc;
+#endif
+
/*
* Fields valid for a plain relation RTE (else zero):
*/
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 7eb15dbeec..4acd48bbcb 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -189,6 +189,11 @@ typedef struct PlannerInfo
* pseudoconstant = true */
bool hasRecursion; /* true if planning a recursive WITH item */
+#ifdef PGXC
+ /* This field is used only when RemoteScan nodes are involved */
+ int rs_alias_index; /* used to build the alias reference */
+#endif
+
/* These fields are used only when hasRecursion is true: */
int wt_param_id; /* PARAM_EXEC ID for the work table */
struct Plan *non_recursive_plan; /* plan for non-recursive term */
@@ -377,6 +382,10 @@ typedef struct RelOptInfo
* clauses */
List *index_inner_paths; /* InnerIndexscanInfo nodes */
+#ifdef PGXC
+ TupleDesc reltupdesc;
+#endif
+
/*
* Inner indexscans are not in the main pathlist because they are not
* usable except in specific join contexts. We use the index_inner_paths
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 839270f355..15cdf599a2 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -59,6 +59,9 @@ extern bool enable_hashagg;
extern bool enable_nestloop;
extern bool enable_mergejoin;
extern bool enable_hashjoin;
+#ifdef PGXC
+extern bool enable_remotejoin;
+#endif
extern int constraint_exclusion;
extern double clamp_row_est(double nrows);
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 3ffd80003a..2c3dcd4dbd 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -119,4 +119,7 @@ extern void extract_query_dependencies(List *queries,
List **relationOids,
List **invalItems);
+#ifdef PGXC
+extern Var *search_tlist_for_var(Var *var, List *jtlist);
+#endif
#endif /* PLANMAIN_H */
diff --git a/src/include/optimizer/var.h b/src/include/optimizer/var.h
index 5d19572e0d..0e7ac5f212 100644
--- a/src/include/optimizer/var.h
+++ b/src/include/optimizer/var.h
@@ -25,6 +25,9 @@ typedef enum
extern Relids pull_varnos(Node *node);
extern void pull_varattnos(Node *node, Bitmapset **varattnos);
+#ifdef PGXC
+extern Bitmapset * pull_varattnos_varno(Node *node, Index varno, Bitmapset *varattnos);
+#endif
extern bool contain_var_clause(Node *node);
extern bool contain_vars_of_level(Node *node, int levelsup);
extern int locate_var_of_level(Node *node, int levelsup);
diff --git a/src/include/pgxc/planner.h b/src/include/pgxc/planner.h
index 1e31fa38c1..8aae356fd2 100644
--- a/src/include/pgxc/planner.h
+++ b/src/include/pgxc/planner.h
@@ -23,6 +23,7 @@
#include "nodes/primnodes.h"
#include "pgxc/locator.h"
#include "tcop/dest.h"
+#include "nodes/relation.h"
typedef enum
@@ -85,6 +86,18 @@ typedef struct
bool read_only; /* do not use 2PC when committing read only steps */
bool force_autocommit; /* some commands like VACUUM require autocommit mode */
RemoteQueryExecType exec_type;
+
+ char *relname;
+ bool remotejoin; /* True if this is a reduced remote join */
+ bool partitioned_replicated; /* True if reduced and contains replicated-partitioned join */
+ int reduce_level; /* in case of reduced JOIN, it's level */
+ List *base_tlist; /* in case of isReduced, the base tlist */
+ char *outer_alias;
+ char *inner_alias;
+ int outer_reduce_level;
+ int inner_reduce_level;
+ Relids outer_relids;
+ Relids inner_relids;
} RemoteQuery;
@@ -165,4 +178,8 @@ extern PlannedStmt *pgxc_planner(Query *query, int cursorOptions,
extern bool IsHashDistributable(Oid col_type);
extern bool is_immutable_func(Oid funcid);
+
+extern bool IsJoinReducible(RemoteQuery *innernode, RemoteQuery *outernode,
+ List *rtable_list, JoinPath *join_path, bool *partitioned_replicated);
+
#endif /* PGXCPLANNER_H */
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 90955b73f9..4b1463ff74 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -595,7 +595,10 @@ extern Datum pg_get_function_identity_arguments(PG_FUNCTION_ARGS);
extern Datum pg_get_function_result(PG_FUNCTION_ARGS);
extern char *deparse_expression(Node *expr, List *dpcontext,
bool forceprefix, bool showimplicit);
+extern List *deparse_context_for_remotequery(const char *aliasname, Oid relid);
+#ifdef PGXC
extern List *deparse_context_for(const char *aliasname, Oid relid);
+#endif
extern List *deparse_context_for_plan(Node *plan, Node *outer_plan,
List *rtable, List *subplans);
extern const char *quote_identifier(const char *ident);