summaryrefslogtreecommitdiff
path: root/src/backend/optimizer
diff options
context:
space:
mode:
authorM S2010-08-23 04:11:31 +0000
committerPavan Deolasee2011-05-19 16:45:15 +0000
commitdb8408c3637a9af6fa36a7a90513a261076a1e46 (patch)
tree9991c3e746165f0c1ed28ef192cce430c5cfe34f /src/backend/optimizer
parentbd68c7342a793aa6b7c8a835196e85bb127b2f5b (diff)
Initial support for multi-step queries, including cross-node joins.
Note that this is a "version 1.0" implementation, borrowing some code from the SQL/MED patch. This means that all cross-node joins take place on a Coordinator by pulling up data from the data nodes. Some queries will therefore execute quite slowly, but they will at least execute. In this patch, all columns are SELECTed from the remote table, but at least simple WHERE clauses are pushed down to the remote nodes. We will optimize query processing in the future. Note that the same connections to remote nodes are used in multiple steps. To get around that problem, we just add a materialization node above each RemoteQuery node, and force all results to be fetched first on the Coordinator. This patch also allows UNION, EXCEPT and INTERSECT, and other more complex SELECT statements to run now. It includes a fix for single-step, multi-node LIMIT and OFFSET. It also includes EXPLAIN output from the Coordinator's point of view. Adding these changes introduced a problem with AVG(), which is currently not working.
Diffstat (limited to 'src/backend/optimizer')
-rw-r--r--src/backend/optimizer/path/allpaths.c20
-rw-r--r--src/backend/optimizer/plan/createplan.c99
-rw-r--r--src/backend/optimizer/plan/setrefs.c16
-rw-r--r--src/backend/optimizer/plan/subselect.c6
-rw-r--r--src/backend/optimizer/util/pathnode.c22
5 files changed, 163 insertions, 0 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 4a0a1012c0..21581b07fb 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -17,6 +17,7 @@
#include <math.h>
+#include "catalog/pg_namespace.h"
#include "nodes/nodeFuncs.h"
#ifdef OPTIMIZER_DEBUG
#include "nodes/print.h"
@@ -32,7 +33,11 @@
#include "optimizer/var.h"
#include "parser/parse_clause.h"
#include "parser/parsetree.h"
+#ifdef PGXC
+#include "pgxc/pgxc.h"
+#endif
#include "rewrite/rewriteManip.h"
+#include "utils/lsyscache.h"
/* These parameters are set by GUC */
@@ -253,6 +258,18 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
* least one dimension of cost or sortedness.
*/
+#ifdef PGXC
+ /*
+ * If we are on the coordinator, we always want to use
+ * the remote query path unless it is a pg_catalog table.
+ */
+ if (IS_PGXC_COORDINATOR
+ && get_rel_namespace(rte->relid) != PG_CATALOG_NAMESPACE)
+ add_path(rel, create_remotequery_path(root, rel));
+ else
+ {
+#endif
+
/* Consider sequential scan */
add_path(rel, create_seqscan_path(root, rel));
@@ -261,6 +278,9 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
/* Consider TID scans */
create_tidscan_paths(root, rel);
+#ifdef PGXC
+ }
+#endif
/* Now find the cheapest of the paths for this rel */
set_cheapest(rel);
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index ab07a0dbea..ca9cfbc371 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -32,6 +32,9 @@
#include "optimizer/var.h"
#include "parser/parse_clause.h"
#include "parser/parsetree.h"
+#ifdef PGXC
+#include "pgxc/planner.h"
+#endif
#include "utils/lsyscache.h"
@@ -66,6 +69,10 @@ static CteScan *create_ctescan_plan(PlannerInfo *root, Path *best_path,
List *tlist, List *scan_clauses);
static WorkTableScan *create_worktablescan_plan(PlannerInfo *root, Path *best_path,
List *tlist, List *scan_clauses);
+#ifdef PGXC
+static RemoteQuery *create_remotequery_plan(PlannerInfo *root, Path *best_path,
+ List *tlist, List *scan_clauses);
+#endif
static NestLoop *create_nestloop_plan(PlannerInfo *root, NestPath *best_path,
Plan *outer_plan, Plan *inner_plan);
static MergeJoin *create_mergejoin_plan(PlannerInfo *root, MergePath *best_path,
@@ -101,6 +108,10 @@ static CteScan *make_ctescan(List *qptlist, List *qpqual,
Index scanrelid, int ctePlanId, int cteParam);
static WorkTableScan *make_worktablescan(List *qptlist, List *qpqual,
Index scanrelid, int wtParam);
+#ifdef PGXC
+static RemoteQuery *make_remotequery(List *qptlist, RangeTblEntry *rte,
+ List *qpqual, Index scanrelid);
+#endif
static BitmapAnd *make_bitmap_and(List *bitmapplans);
static BitmapOr *make_bitmap_or(List *bitmapplans);
static NestLoop *make_nestloop(List *tlist,
@@ -162,6 +173,9 @@ create_plan(PlannerInfo *root, Path *best_path)
case T_ValuesScan:
case T_CteScan:
case T_WorkTableScan:
+#ifdef PGXC
+ case T_RemoteQuery:
+#endif
plan = create_scan_plan(root, best_path);
break;
case T_HashJoin:
@@ -207,6 +221,9 @@ create_scan_plan(PlannerInfo *root, Path *best_path)
List *tlist;
List *scan_clauses;
Plan *plan;
+#ifdef PGXC
+ Plan *matplan;
+#endif
/*
* For table scans, rather than using the relation targetlist (which is
@@ -298,6 +315,23 @@ create_scan_plan(PlannerInfo *root, Path *best_path)
scan_clauses);
break;
+#ifdef PGXC
+ case T_RemoteQuery:
+ plan = (Plan *) create_remotequery_plan(root,
+ best_path,
+ tlist,
+ scan_clauses);
+
+ /*
+ * Insert a materialization plan above this temporarily
+ * until we better handle multiple steps using the same connection.
+ */
+ matplan = (Plan *) make_material(plan);
+ copy_plan_costsize(matplan, plan);
+ matplan->total_cost += cpu_tuple_cost * matplan->plan_rows;
+ plan = matplan;
+ break;
+#endif
default:
elog(ERROR, "unrecognized node type: %d",
(int) best_path->pathtype);
@@ -420,6 +454,9 @@ disuse_physical_tlist(Plan *plan, Path *path)
case T_ValuesScan:
case T_CteScan:
case T_WorkTableScan:
+#ifdef PGXC
+ case T_RemoteQuery:
+#endif
plan->targetlist = build_relation_tlist(path->parent);
break;
default:
@@ -1544,6 +1581,46 @@ create_worktablescan_plan(PlannerInfo *root, Path *best_path,
return scan_plan;
}
+#ifdef PGXC
+/*
+ * create_remotequery_plan
+ * Returns a remotequery plan for the base relation scanned by 'best_path'
+ * with restriction clauses 'scan_clauses' and targetlist 'tlist'.
+ */
+static RemoteQuery *
+create_remotequery_plan(PlannerInfo *root, Path *best_path,
+ List *tlist, List *scan_clauses)
+{
+ RemoteQuery *scan_plan;
+ Index scan_relid = best_path->parent->relid;
+ RangeTblEntry *rte;
+
+
+ Assert(scan_relid > 0);
+ rte = planner_rt_fetch(scan_relid, root);
+ Assert(best_path->parent->rtekind == RTE_RELATION);
+ Assert(rte->rtekind == RTE_RELATION);
+
+ /* Sort clauses into best execution order */
+ scan_clauses = order_qual_clauses(root, scan_clauses);
+
+ /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */
+ scan_clauses = extract_actual_clauses(scan_clauses, false);
+
+ scan_plan = make_remotequery(tlist,
+ rte,
+ scan_clauses,
+ scan_relid);
+
+ copy_path_costsize(&scan_plan->scan.plan, best_path);
+
+ /* PGXCTODO - get better estimates */
+ scan_plan->scan.plan.plan_rows = 1000;
+
+ return scan_plan;
+}
+#endif
+
/*****************************************************************************
*
@@ -2541,6 +2618,28 @@ make_worktablescan(List *qptlist,
return node;
}
+#ifdef PGXC
+static RemoteQuery *
+make_remotequery(List *qptlist,
+ RangeTblEntry *rte,
+ List *qpqual,
+ Index scanrelid)
+{
+ RemoteQuery *node = makeNode(RemoteQuery);
+ Plan *plan = &node->scan.plan;
+
+ /* cost should be inserted by caller */
+ plan->targetlist = qptlist;
+ plan->qual = qpqual;
+ plan->lefttree = NULL;
+ plan->righttree = NULL;
+ node->scan.scanrelid = scanrelid;
+ node->read_only = true;
+
+ return node;
+}
+#endif
+
Append *
make_append(List *appendplans, bool isTarget, List *tlist)
{
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 11e14f96c5..cbee7e9e9b 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -22,6 +22,9 @@
#include "optimizer/clauses.h"
#include "optimizer/planmain.h"
#include "optimizer/tlist.h"
+#ifdef PGXC
+#include "pgxc/planner.h"
+#endif
#include "parser/parsetree.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
@@ -369,6 +372,19 @@ set_plan_refs(PlannerGlobal *glob, Plan *plan, int rtoffset)
fix_scan_list(glob, splan->scan.plan.qual, rtoffset);
}
break;
+#ifdef PGXC
+ case T_RemoteQuery:
+ {
+ RemoteQuery *splan = (RemoteQuery *) plan;
+
+ splan->scan.scanrelid += rtoffset;
+ splan->scan.plan.targetlist =
+ fix_scan_list(glob, splan->scan.plan.targetlist, rtoffset);
+ splan->scan.plan.qual =
+ fix_scan_list(glob, splan->scan.plan.qual, rtoffset);
+ }
+ break;
+#endif
case T_NestLoop:
case T_MergeJoin:
case T_HashJoin:
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index cdff123828..3e813c4f71 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -1926,6 +1926,12 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params)
bms_add_member(context.paramids,
((WorkTableScan *) plan)->wtParam);
break;
+#ifdef PGXC
+ case T_RemoteQuery:
+ //PGXCTODO
+ context.paramids = bms_add_members(context.paramids, valid_params);
+ break;
+#endif
case T_Append:
{
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index b0358cb112..5f1462f4e1 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1310,6 +1310,28 @@ create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel)
return pathnode;
}
+#ifdef PGXC
+/*
+ * create_remotequery_path
+ * Creates a path corresponding to a scan of a remote query,
+ * returning the pathnode.
+ */
+Path *
+create_remotequery_path(PlannerInfo *root, RelOptInfo *rel)
+{
+ Path *pathnode = makeNode(Path);
+
+ pathnode->pathtype = T_RemoteQuery;
+ pathnode->parent = rel;
+ pathnode->pathkeys = NIL; /* result is always unordered */
+
+ // PGXCTODO - set cost properly
+ cost_seqscan(pathnode, root, rel);
+
+ return pathnode;
+}
+#endif
+
/*
* create_nestloop_path
* Creates a pathnode corresponding to a nestloop join between two