diff options
Diffstat (limited to 'src/backend/optimizer')
43 files changed, 5860 insertions, 2448 deletions
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index 775bcc3b73..fc0fca4107 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -375,6 +375,7 @@ RelOptInfo - a relation or joined relations UniquePath - remove duplicate rows (either by hashing or sorting) GatherPath - collect the results of parallel workers ProjectionPath - a Result plan node with child (used for projection) + ProjectSetPath - a ProjectSet plan node applied to some sub-path SortPath - a Sort plan node applied to some sub-path GroupPath - a Group plan node applied to some sub-path UpperUniquePath - a Unique plan node applied to some sub-path @@ -756,9 +757,8 @@ to create a plan like -> Seq Scan on SmallTable1 A NestLoop -> Seq Scan on SmallTable2 B - NestLoop - -> Index Scan using XYIndex on LargeTable C - Index Condition: C.X = A.AID and C.Y = B.BID + -> Index Scan using XYIndex on LargeTable C + Index Condition: C.X = A.AID and C.Y = B.BID so we should be willing to pass down A.AID through a join even though there is no join order constraint forcing the plan to look like this. @@ -877,6 +877,108 @@ lateral reference. (Perhaps now that that stuff works, we could relax the pullup restriction?) +Security-level constraints on qual clauses +------------------------------------------ + +To support row-level security and security-barrier views efficiently, +we mark qual clauses (RestrictInfo nodes) with a "security_level" field. +The basic concept is that a qual with a lower security_level must be +evaluated before one with a higher security_level. This ensures that +"leaky" quals that might expose sensitive data are not evaluated until +after the security barrier quals that are supposed to filter out +security-sensitive rows. However, many qual conditions are "leakproof", +that is we trust the functions they use to not expose data. To avoid +unnecessarily inefficient plans, a leakproof qual is not delayed by +security-level considerations, even if it has a higher syntactic +security_level than another qual. + +In a query that contains no use of RLS or security-barrier views, all +quals will have security_level zero, so that none of these restrictions +kick in; we don't even need to check leakproofness of qual conditions. + +If there are security-barrier quals, they get security_level zero (and +possibly higher, if there are multiple layers of barriers). Regular quals +coming from the query text get a security_level one more than the highest +level used for barrier quals. + +When new qual clauses are generated by EquivalenceClass processing, +they must be assigned a security_level. This is trickier than it seems. +One's first instinct is that it would be safe to use the largest level +found among the source quals for the EquivalenceClass, but that isn't +safe at all, because it allows unwanted delays of security-barrier quals. +Consider a barrier qual "t.x = t.y" plus a query qual "t.x = constant", +and suppose there is another query qual "leaky_function(t.z)" that +we mustn't evaluate before the barrier qual has been checked. +We will have an EC {t.x, t.y, constant} which will lead us to replace +the EC quals with "t.x = constant AND t.y = constant". (We do not want +to give up that behavior, either, since the latter condition could allow +use of an index on t.y, which we would never discover from the original +quals.) If these generated quals are assigned the same security_level as +the query quals, then it's possible for the leaky_function qual to be +evaluated first, allowing leaky_function to see data from rows that +possibly don't pass the barrier condition. + +Instead, our handling of security levels with ECs works like this: +* Quals are not accepted as source clauses for ECs in the first place +unless they are leakproof or have security_level zero. +* EC-derived quals are assigned the minimum (not maximum) security_level +found among the EC's source clauses. +* If the maximum security_level found among the EC's source clauses is +above zero, then the equality operators selected for derived quals must +be leakproof. When no such operator can be found, the EC is treated as +"broken" and we fall back to emitting its source clauses without any +additional derived quals. + +These rules together ensure that an untrusted qual clause (one with +security_level above zero) cannot cause an EC to generate a leaky derived +clause. This makes it safe to use the minimum not maximum security_level +for derived clauses. The rules could result in poor plans due to not +being able to generate derived clauses at all, but the risk of that is +small in practice because most btree equality operators are leakproof. +Also, by making exceptions for level-zero quals, we ensure that there is +no plan degradation when no barrier quals are present. + +Once we have security levels assigned to all clauses, enforcement +of barrier-qual ordering restrictions boils down to two rules: + +* Table scan plan nodes must not select quals for early execution +(for example, use them as index qualifiers in an indexscan) unless +they are leakproof or have security_level no higher than any other +qual that is due to be executed at the same plan node. (Use the +utility function restriction_is_securely_promotable() to check +whether it's okay to select a qual for early execution.) + +* Normal execution of a list of quals must execute them in an order +that satisfies the same security rule, ie higher security_levels must +be evaluated later unless leakproof. (This is handled in a single place +by order_qual_clauses() in createplan.c.) + +order_qual_clauses() uses a heuristic to decide exactly what to do with +leakproof clauses. Normally it sorts clauses by security_level then cost, +being careful that the sort is stable so that we don't reorder clauses +without a clear reason. But this could result in a very expensive qual +being done before a cheaper one that is of higher security_level. +If the cheaper qual is leaky we have no choice, but if it is leakproof +we could put it first. We choose to sort leakproof quals as if they +have security_level zero, but only when their cost is less than 10X +cpu_operator_cost; that restriction alleviates the opposite problem of +doing expensive quals first just because they're leakproof. + +Additional rules will be needed to support safe handling of join quals +when there is a mix of security levels among join quals; for example, it +will be necessary to prevent leaky higher-security-level quals from being +evaluated at a lower join level than other quals of lower security level. +Currently there is no need to consider that since security-prioritized +quals can only be single-table restriction quals coming from RLS policies +or security-barrier views, and security-barrier view subqueries are never +flattened into the parent query. Hence enforcement of security-prioritized +quals only happens at the table scan level. With extra rules for safe +handling of security levels among join quals, it should be possible to let +security-barrier views be flattened into the parent query, allowing more +flexibility of planning while still preserving required ordering of qual +evaluation. But that will come later. + + Post scan/join planning ----------------------- diff --git a/src/backend/optimizer/geqo/geqo_copy.c b/src/backend/optimizer/geqo/geqo_copy.c index 475d221dd2..8fd20c5986 100644 --- a/src/backend/optimizer/geqo/geqo_copy.c +++ b/src/backend/optimizer/geqo/geqo_copy.c @@ -2,7 +2,7 @@ * * geqo_copy.c * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/optimizer/geqo/geqo_copy.c diff --git a/src/backend/optimizer/geqo/geqo_erx.c b/src/backend/optimizer/geqo/geqo_erx.c index 1a43ab7288..133fe32348 100644 --- a/src/backend/optimizer/geqo/geqo_erx.c +++ b/src/backend/optimizer/geqo/geqo_erx.c @@ -111,7 +111,7 @@ gimme_edge_table(PlannerInfo *root, Gene *tour1, Gene *tour2, for (index1 = 0; index1 < num_gene; index1++) { /* - * presume the tour is circular, i.e. 1->2, 2->3, 3->1 this operaton + * presume the tour is circular, i.e. 1->2, 2->3, 3->1 this operation * maps n back to 1 */ @@ -314,7 +314,7 @@ gimme_gene(PlannerInfo *root, Edge edge, Edge *edge_table) /* * give priority to candidates with fewest remaining unused edges; * find out what the minimum number of unused edges is - * (minimum_edges); if there is more than one cadidate with the + * (minimum_edges); if there is more than one candidate with the * minimum number of unused edges keep count of this number * (minimum_count); */ @@ -458,7 +458,7 @@ edge_failure(PlannerInfo *root, Gene *gene, int index, Edge *edge_table, int num if (edge_table[i].unused_edges >= 0) return (Gene) i; - elog(LOG, "no edge found via looking for the last ununsed point"); + elog(LOG, "no edge found via looking for the last unused point"); } diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c index 88acebc1f2..b5cab0c351 100644 --- a/src/backend/optimizer/geqo/geqo_eval.c +++ b/src/backend/optimizer/geqo/geqo_eval.c @@ -3,7 +3,7 @@ * geqo_eval.c * Routines to evaluate query trees * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/optimizer/geqo/geqo_eval.c @@ -74,9 +74,7 @@ geqo_eval(PlannerInfo *root, Gene *tour, int num_gene) */ mycontext = AllocSetContextCreate(CurrentMemoryContext, "GEQO", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); + ALLOCSET_DEFAULT_SIZES); oldcxt = MemoryContextSwitchTo(mycontext); /* diff --git a/src/backend/optimizer/geqo/geqo_main.c b/src/backend/optimizer/geqo/geqo_main.c index 73fc38b907..52bd428187 100644 --- a/src/backend/optimizer/geqo/geqo_main.c +++ b/src/backend/optimizer/geqo/geqo_main.c @@ -4,7 +4,7 @@ * solution to the query optimization problem * by means of a Genetic Algorithm (GA) * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/optimizer/geqo/geqo_main.c diff --git a/src/backend/optimizer/geqo/geqo_misc.c b/src/backend/optimizer/geqo/geqo_misc.c index b1d99cc0b1..503a19f6d6 100644 --- a/src/backend/optimizer/geqo/geqo_misc.c +++ b/src/backend/optimizer/geqo/geqo_misc.c @@ -3,7 +3,7 @@ * geqo_misc.c * misc. printout and debug stuff * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/optimizer/geqo/geqo_misc.c diff --git a/src/backend/optimizer/geqo/geqo_pool.c b/src/backend/optimizer/geqo/geqo_pool.c index 727c356032..0f7a26c9a1 100644 --- a/src/backend/optimizer/geqo/geqo_pool.c +++ b/src/backend/optimizer/geqo/geqo_pool.c @@ -3,7 +3,7 @@ * geqo_pool.c * Genetic Algorithm (GA) pool stuff * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/optimizer/geqo/geqo_pool.c diff --git a/src/backend/optimizer/geqo/geqo_random.c b/src/backend/optimizer/geqo/geqo_random.c index 2368b8fa96..6f3500649c 100644 --- a/src/backend/optimizer/geqo/geqo_random.c +++ b/src/backend/optimizer/geqo/geqo_random.c @@ -3,7 +3,7 @@ * geqo_random.c * random number generator * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/optimizer/geqo/geqo_random.c diff --git a/src/backend/optimizer/geqo/geqo_selection.c b/src/backend/optimizer/geqo/geqo_selection.c index 991b2e36f9..4d0f6b0881 100644 --- a/src/backend/optimizer/geqo/geqo_selection.c +++ b/src/backend/optimizer/geqo/geqo_selection.c @@ -3,7 +3,7 @@ * geqo_selection.c * linear selection scheme for the genetic query optimizer * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/backend/optimizer/geqo/geqo_selection.c diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 34bc42b196..196c6194cb 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -4,7 +4,7 @@ * Routines to find possible search paths for processing a query * * Portions Copyright (c) 2012-2014, TransLattice, Inc. - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -63,7 +63,8 @@ typedef struct pushdown_safety_info /* These parameters are set by GUC */ bool enable_geqo = false; /* just in case GUC doesn't set it */ int geqo_threshold; -int min_parallel_relation_size; +int min_parallel_table_scan_size; +int min_parallel_index_scan_size; /* Hook for plugins to get control in set_rel_pathlist() */ set_rel_pathlist_hook_type set_rel_pathlist_hook = NULL; @@ -84,7 +85,6 @@ static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, static void create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel); static void set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); -static bool function_rte_parallel_ok(RangeTblEntry *rte); static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); static void set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, @@ -101,7 +101,8 @@ static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, List *live_childrels, - List *all_child_pathkeys); + List *all_child_pathkeys, + List *partitioned_rels); static Path *get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer); @@ -112,8 +113,12 @@ static void set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); static void set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); +static void set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); +static void set_namedtuplestore_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist); @@ -133,6 +138,8 @@ static void subquery_push_qual(Query *subquery, static void recurse_push_qual(Node *setOp, Query *topquery, RangeTblEntry *rte, Index rti, Node *qual); static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel); +static void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, + List *live_childrels); /* @@ -348,6 +355,14 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel, /* Foreign table */ set_foreign_size(root, rel, rte); } + else if (rte->relkind == RELKIND_PARTITIONED_TABLE) + { + /* + * A partitioned table without leaf partitions is marked + * as a dummy rel. + */ + set_dummy_rel_pathlist(rel); + } else if (rte->tablesample != NULL) { /* Sampled relation */ @@ -371,6 +386,9 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel, case RTE_FUNCTION: set_function_size_estimates(root, rel); break; + case RTE_TABLEFUNC: + set_tablefunc_size_estimates(root, rel); + break; case RTE_VALUES: set_values_size_estimates(root, rel); break; @@ -386,6 +404,9 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel, else set_cte_pathlist(root, rel, rte); break; + case RTE_NAMEDTUPLESTORE: + set_namedtuplestore_pathlist(root, rel, rte); + break; default: elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind); break; @@ -443,6 +464,10 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, /* RangeFunction */ set_function_pathlist(root, rel, rte); break; + case RTE_TABLEFUNC: + /* Table Function */ + set_tablefunc_pathlist(root, rel, rte); + break; case RTE_VALUES: /* Values list */ set_values_pathlist(root, rel, rte); @@ -450,6 +475,9 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, case RTE_CTE: /* CTE reference --- fully handled during set_rel_size */ break; + case RTE_NAMEDTUPLESTORE: + /* tuplestore reference --- fully handled during set_rel_size */ + break; default: elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind); break; @@ -518,8 +546,7 @@ set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, Assert(root->glob->parallelModeOK); /* This should only be called for baserels and appendrel children. */ - Assert(rel->reloptkind == RELOPT_BASEREL || - rel->reloptkind == RELOPT_OTHER_MEMBER_REL); + Assert(IS_SIMPLE_REL(rel)); /* Assorted checks based on rtekind. */ switch (rte->rtekind) @@ -545,12 +572,11 @@ set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, */ if (rte->tablesample != NULL) { - Oid proparallel = func_parallel(rte->tablesample->tsmhandler); + char proparallel = func_parallel(rte->tablesample->tsmhandler); if (proparallel != PROPARALLEL_SAFE) return; - if (has_parallel_hazard((Node *) rte->tablesample->args, - false)) + if (!is_parallel_safe(root, (Node *) rte->tablesample->args)) return; } @@ -603,16 +629,18 @@ set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, case RTE_FUNCTION: /* Check for parallel-restricted functions. */ - if (!function_rte_parallel_ok(rte)) + if (!is_parallel_safe(root, (Node *) rte->functions)) return; break; - case RTE_VALUES: + case RTE_TABLEFUNC: + /* not parallel safe */ + return; - /* - * The data for a VALUES clause is stored in the plan tree itself, - * so scanning it in a worker is fine. - */ + case RTE_VALUES: + /* Check for parallel-restricted functions. */ + if (!is_parallel_safe(root, (Node *) rte->values_lists)) + return; break; case RTE_CTE: @@ -625,6 +653,17 @@ set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, * executed only once. */ return; + + case RTE_NAMEDTUPLESTORE: + + /* + * tuplestore cannot be shared, at least without more + * infrastructure to support that. + */ + return; + + case RTE_REMOTE_DUMMY: + return; } /* @@ -636,14 +675,14 @@ set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, * outer join clauses work correctly. It would likely break equivalence * classes, too. */ - if (has_parallel_hazard((Node *) rel->baserestrictinfo, false)) + if (!is_parallel_safe(root, (Node *) rel->baserestrictinfo)) return; /* * Likewise, if the relation's outputs are not parallel-safe, give up. * (Usually, they're just Vars, but sometimes they're not.) */ - if (has_parallel_hazard((Node *) rel->reltarget->exprs, false)) + if (!is_parallel_safe(root, (Node *) rel->reltarget->exprs)) return; /* We have a winner. */ @@ -651,26 +690,6 @@ set_rel_consider_parallel(PlannerInfo *root, RelOptInfo *rel, } /* - * Check whether a function RTE is scanning something parallel-restricted. - */ -static bool -function_rte_parallel_ok(RangeTblEntry *rte) -{ - ListCell *lc; - - foreach(lc, rte->functions) - { - RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc); - - Assert(IsA(rtfunc, RangeTblFunction)); - if (has_parallel_hazard(rtfunc->funcexpr, false)) - return false; - } - - return true; -} - -/* * set_plain_rel_pathlist * Build access paths for a plain relation (no subquery, no inheritance) */ @@ -709,49 +728,7 @@ create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel) { int parallel_workers; - /* - * If the user has set the parallel_workers reloption, use that; otherwise - * select a default number of workers. - */ - if (rel->rel_parallel_workers != -1) - parallel_workers = rel->rel_parallel_workers; - else - { - int parallel_threshold; - - /* - * If this relation is too small to be worth a parallel scan, just - * return without doing anything ... unless it's an inheritance child. - * In that case, we want to generate a parallel path here anyway. It - * might not be worthwhile just for this relation, but when combined - * with all of its inheritance siblings it may well pay off. - */ - if (rel->pages < (BlockNumber) min_parallel_relation_size && - rel->reloptkind == RELOPT_BASEREL) - return; - - /* - * Select the number of workers based on the log of the size of the - * relation. This probably needs to be a good deal more - * sophisticated, but we need something here for now. Note that the - * upper limit of the min_parallel_relation_size GUC is chosen to - * prevent overflow here. - */ - parallel_workers = 1; - parallel_threshold = Max(min_parallel_relation_size, 1); - while (rel->pages >= (BlockNumber) (parallel_threshold * 3)) - { - parallel_workers++; - parallel_threshold *= 3; - if (parallel_threshold > INT_MAX / 3) - break; /* avoid overflow */ - } - } - - /* - * In no case use more than max_parallel_workers_per_gather workers. - */ - parallel_workers = Min(parallel_workers, max_parallel_workers_per_gather); + parallel_workers = compute_parallel_worker(rel, rel->pages, -1); /* If any limit was set to zero, the user doesn't want a parallel scan. */ if (parallel_workers <= 0) @@ -879,7 +856,7 @@ set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) /* * set_append_rel_size - * Set size estimates for an "append relation" + * Set size estimates for a simple "append relation" * * The passed-in rel and RTE represent the entire append relation. The * relation's contents are computed by appending together the output of @@ -900,6 +877,8 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, int nattrs; ListCell *l; + Assert(IS_SIMPLE_REL(rel)); + /* * Initialize to compute size estimates for whole append relation. * @@ -927,9 +906,11 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *childRTE; RelOptInfo *childrel; List *childquals; - Node *childqual; + Index cq_min_security; + bool have_const_false_cq; ListCell *parentvars; ListCell *childvars; + ListCell *lc; /* append_rel_list contains all append rels; ignore others */ if (appinfo->parent_relid != parentRTindex) @@ -952,34 +933,120 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, * constraint exclusion; so do that first and then check to see if we * can disregard this child. * - * As of 8.4, the child rel's targetlist might contain non-Var - * expressions, which means that substitution into the quals could - * produce opportunities for const-simplification, and perhaps even - * pseudoconstant quals. To deal with this, we strip the RestrictInfo - * nodes, do the substitution, do const-simplification, and then - * reconstitute the RestrictInfo layer. + * The child rel's targetlist might contain non-Var expressions, which + * means that substitution into the quals could produce opportunities + * for const-simplification, and perhaps even pseudoconstant quals. + * Therefore, transform each RestrictInfo separately to see if it + * reduces to a constant or pseudoconstant. (We must process them + * separately to keep track of the security level of each qual.) + */ + childquals = NIL; + cq_min_security = UINT_MAX; + have_const_false_cq = false; + foreach(lc, rel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + Node *childqual; + ListCell *lc2; + + Assert(IsA(rinfo, RestrictInfo)); + childqual = adjust_appendrel_attrs(root, + (Node *) rinfo->clause, + appinfo); + childqual = eval_const_expressions(root, childqual); + /* check for flat-out constant */ + if (childqual && IsA(childqual, Const)) + { + if (((Const *) childqual)->constisnull || + !DatumGetBool(((Const *) childqual)->constvalue)) + { + /* Restriction reduces to constant FALSE or NULL */ + have_const_false_cq = true; + break; + } + /* Restriction reduces to constant TRUE, so drop it */ + continue; + } + /* might have gotten an AND clause, if so flatten it */ + foreach(lc2, make_ands_implicit((Expr *) childqual)) + { + Node *onecq = (Node *) lfirst(lc2); + bool pseudoconstant; + + /* check for pseudoconstant (no Vars or volatile functions) */ + pseudoconstant = + !contain_vars_of_level(onecq, 0) && + !contain_volatile_functions(onecq); + if (pseudoconstant) + { + /* tell createplan.c to check for gating quals */ + root->hasPseudoConstantQuals = true; + } + /* reconstitute RestrictInfo with appropriate properties */ + childquals = lappend(childquals, + make_restrictinfo((Expr *) onecq, + rinfo->is_pushed_down, + rinfo->outerjoin_delayed, + pseudoconstant, + rinfo->security_level, + NULL, NULL, NULL)); + /* track minimum security level among child quals */ + cq_min_security = Min(cq_min_security, rinfo->security_level); + } + } + + /* + * In addition to the quals inherited from the parent, we might have + * securityQuals associated with this particular child node. + * (Currently this can only happen in appendrels originating from + * UNION ALL; inheritance child tables don't have their own + * securityQuals, see expand_inherited_rtentry().) Pull any such + * securityQuals up into the baserestrictinfo for the child. This is + * similar to process_security_barrier_quals() for the parent rel, + * except that we can't make any general deductions from such quals, + * since they don't hold for the whole appendrel. */ - childquals = get_all_actual_clauses(rel->baserestrictinfo); - childquals = (List *) adjust_appendrel_attrs(root, - (Node *) childquals, - appinfo); - childqual = eval_const_expressions(root, (Node *) - make_ands_explicit(childquals)); - if (childqual && IsA(childqual, Const) && - (((Const *) childqual)->constisnull || - !DatumGetBool(((Const *) childqual)->constvalue))) + if (childRTE->securityQuals) + { + Index security_level = 0; + + foreach(lc, childRTE->securityQuals) + { + List *qualset = (List *) lfirst(lc); + ListCell *lc2; + + foreach(lc2, qualset) + { + Expr *qual = (Expr *) lfirst(lc2); + + /* not likely that we'd see constants here, so no check */ + childquals = lappend(childquals, + make_restrictinfo(qual, + true, false, false, + security_level, + NULL, NULL, NULL)); + cq_min_security = Min(cq_min_security, security_level); + } + security_level++; + } + Assert(security_level <= root->qual_security_level); + } + + /* + * OK, we've got all the baserestrictinfo quals for this child. + */ + childrel->baserestrictinfo = childquals; + childrel->baserestrict_min_security = cq_min_security; + + if (have_const_false_cq) { /* - * Restriction reduces to constant FALSE or constant NULL after + * Some restriction clause reduced to constant FALSE or NULL after * substitution, so this child need not be scanned. */ set_dummy_rel_pathlist(childrel); continue; } - childquals = make_ands_implicit((Expr *) childqual); - childquals = make_restrictinfos_from_actual_clauses(root, - childquals); - childrel->baserestrictinfo = childquals; if (relation_excluded_by_constraints(root, childrel, childRTE)) { @@ -1153,19 +1220,11 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, { int parentRTindex = rti; List *live_childrels = NIL; - List *subpaths = NIL; - bool subpaths_valid = true; - List *partial_subpaths = NIL; - bool partial_subpaths_valid = true; - List *all_child_pathkeys = NIL; - List *all_child_outers = NIL; ListCell *l; /* * Generate access paths for each member relation, and remember the - * cheapest path for each one. Also, identify all pathkeys (orderings) - * and parameterizations (required_outer sets) available for the member - * relations. + * non-dummy children. */ foreach(l, root->append_rel_list) { @@ -1173,7 +1232,6 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, int childRTindex; RangeTblEntry *childRTE; RelOptInfo *childrel; - ListCell *lcp; /* append_rel_list contains all append rels; ignore others */ if (appinfo->parent_relid != parentRTindex) @@ -1208,6 +1266,55 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, * Child is live, so add it to the live_childrels list for use below. */ live_childrels = lappend(live_childrels, childrel); + } + + /* Add paths to the "append" relation. */ + add_paths_to_append_rel(root, rel, live_childrels); +} + + +/* + * add_paths_to_append_rel + * Generate paths for given "append" relation given the set of non-dummy + * child rels. + * + * The function collects all parameterizations and orderings supported by the + * non-dummy children. For every such parameterization or ordering, it creates + * an append path collecting one path from each non-dummy child with given + * parameterization or ordering. Similarly it collects partial paths from + * non-dummy children to create partial append paths. + */ +static void +add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, + List *live_childrels) +{ + List *subpaths = NIL; + bool subpaths_valid = true; + List *partial_subpaths = NIL; + bool partial_subpaths_valid = true; + List *all_child_pathkeys = NIL; + List *all_child_outers = NIL; + ListCell *l; + List *partitioned_rels = NIL; + RangeTblEntry *rte; + + rte = planner_rt_fetch(rel->relid, root); + if (rte->relkind == RELKIND_PARTITIONED_TABLE) + { + partitioned_rels = get_partitioned_child_rels(root, rel->relid); + /* The root partitioned table is included as a child rel */ + Assert(list_length(partitioned_rels) >= 1); + } + + /* + * For every non-dummy child, remember the cheapest path. Also, identify + * all pathkeys (orderings) and parameterizations (required_outer sets) + * available for the non-dummy member relations. + */ + foreach(l, live_childrels) + { + RelOptInfo *childrel = lfirst(l); + ListCell *lcp; /* * If child has an unparameterized cheapest-total path, add that to @@ -1298,7 +1405,8 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, * if we have zero or one live subpath due to constraint exclusion.) */ if (subpaths_valid) - add_path(rel, (Path *) create_append_path(rel, subpaths, NULL, 0)); + add_path(rel, (Path *) create_append_path(rel, subpaths, NULL, 0, + partitioned_rels)); /* * Consider an append of partial unordered, unparameterized partial paths. @@ -1325,7 +1433,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, /* Generate a partial append path. */ appendpath = create_append_path(rel, partial_subpaths, NULL, - parallel_workers); + parallel_workers, partitioned_rels); /* * XL: In case we had to re-distribute the child relations, don't @@ -1341,7 +1449,8 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, */ if (subpaths_valid) generate_mergeappend_paths(root, rel, live_childrels, - all_child_pathkeys); + all_child_pathkeys, + partitioned_rels); /* * Build Append paths for each parameterization seen among the child rels. @@ -1383,7 +1492,8 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, if (subpaths_valid) add_path(rel, (Path *) - create_append_path(rel, subpaths, required_outer, 0)); + create_append_path(rel, subpaths, required_outer, 0, + partitioned_rels)); } } @@ -1413,7 +1523,8 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, List *live_childrels, - List *all_child_pathkeys) + List *all_child_pathkeys, + List *partitioned_rels) { ListCell *lcp; @@ -1437,12 +1548,14 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, get_cheapest_path_for_pathkeys(childrel->pathlist, pathkeys, NULL, - STARTUP_COST); + STARTUP_COST, + false); cheapest_total = get_cheapest_path_for_pathkeys(childrel->pathlist, pathkeys, NULL, - TOTAL_COST); + TOTAL_COST, + false); /* * If we can't find any paths with the right order just use the @@ -1475,13 +1588,15 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel, rel, startup_subpaths, pathkeys, - NULL)); + NULL, + partitioned_rels)); if (startup_neq_total) add_path(rel, (Path *) create_merge_append_path(root, rel, total_subpaths, pathkeys, - NULL)); + NULL, + partitioned_rels)); } } @@ -1507,7 +1622,8 @@ get_cheapest_parameterized_child_path(PlannerInfo *root, RelOptInfo *rel, cheapest = get_cheapest_path_for_pathkeys(rel->pathlist, NIL, required_outer, - TOTAL_COST); + TOTAL_COST, + false); Assert(cheapest != NULL); if (bms_equal(PATH_REQ_OUTER(cheapest), required_outer)) return cheapest; @@ -1613,7 +1729,7 @@ set_dummy_rel_pathlist(RelOptInfo *rel) rel->pathlist = NIL; rel->partial_pathlist = NIL; - add_path(rel, (Path *) create_append_path(rel, NIL, NULL, 0)); + add_path(rel, (Path *) create_append_path(rel, NIL, NULL, 0, NIL)); /* * We set the cheapest path immediately, to ensure that IS_DUMMY_REL() @@ -1753,6 +1869,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, } } rel->baserestrictinfo = upperrestrictlist; + /* We don't bother recomputing baserestrict_min_security */ } pfree(safetyInfo.unsafeColumns); @@ -1974,6 +2091,27 @@ set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) } /* + * set_tablefunc_pathlist + * Build the (single) access path for a table func RTE + */ +static void +set_tablefunc_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + Relids required_outer; + + /* + * We don't support pushing join clauses into the quals of a tablefunc + * scan, but it could still have required parameterization due to LATERAL + * refs in the function expression. + */ + required_outer = rel->lateral_relids; + + /* Generate appropriate path */ + add_path(rel, create_tablefuncscan_path(root, rel, + required_outer)); +} + +/* * set_cte_pathlist * Build the (single) access path for a non-self-reference CTE RTE * @@ -2040,6 +2178,36 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) } /* + * set_namedtuplestore_pathlist + * Build the (single) access path for a named tuplestore RTE + * + * There's no need for a separate set_namedtuplestore_size phase, since we + * don't support join-qual-parameterized paths for tuplestores. + */ +static void +set_namedtuplestore_pathlist(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte) +{ + Relids required_outer; + + /* Mark rel with estimated output rows, width, etc */ + set_namedtuplestore_size_estimates(root, rel); + + /* + * We don't support pushing join clauses into the quals of a tuplestore + * scan, but it could still have required parameterization due to LATERAL + * refs in its tlist. + */ + required_outer = rel->lateral_relids; + + /* Generate appropriate path */ + add_path(rel, create_namedtuplestorescan_path(root, rel, required_outer)); + + /* Select cheapest path (pretty easy in this case...) */ + set_cheapest(rel); +} + +/* * set_worktable_pathlist * Build the (single) access path for a self-reference CTE RTE * @@ -2091,39 +2259,51 @@ set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) /* * generate_gather_paths - * Generate parallel access paths for a relation by pushing a Gather on - * top of a partial path. + * Generate parallel access paths for a relation by pushing a Gather or + * Gather Merge on top of a partial path. * * This must not be called until after we're done creating all partial paths * for the specified relation. (Otherwise, add_partial_path might delete a - * path that some GatherPath has a reference to.) + * path that some GatherPath or GatherMergePath has a reference to.) */ void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel) { Path *cheapest_partial_path; Path *simple_gather_path; + ListCell *lc; /* If there are no partial paths, there's nothing to do here. */ if (rel->partial_pathlist == NIL) return; /* - * The output of Gather is currently always unsorted, so there's only one - * partial path of interest: the cheapest one. That will be the one at - * the front of partial_pathlist because of the way add_partial_path - * works. - * - * Eventually, we should have a Gather Merge operation that can merge - * multiple tuple streams together while preserving their ordering. We - * could usefully generate such a path from each partial path that has - * non-NIL pathkeys. + * The output of Gather is always unsorted, so there's only one partial + * path of interest: the cheapest one. That will be the one at the front + * of partial_pathlist because of the way add_partial_path works. */ cheapest_partial_path = linitial(rel->partial_pathlist); simple_gather_path = (Path *) create_gather_path(root, rel, cheapest_partial_path, rel->reltarget, NULL, NULL); add_path(rel, simple_gather_path); + + /* + * For each useful ordering, we can consider an order-preserving Gather + * Merge. + */ + foreach(lc, rel->partial_pathlist) + { + Path *subpath = (Path *) lfirst(lc); + GatherMergePath *path; + + if (subpath->pathkeys == NIL) + continue; + + path = create_gather_merge_path(root, rel, subpath, rel->reltarget, + subpath->pathkeys, NULL, NULL); + add_path(rel, &path->path); + } } /* @@ -2346,6 +2526,12 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels) * thereby changing the partition contents and thus the window functions' * results for rows that remain. * + * 5. If the subquery contains any set-returning functions in its targetlist, + * we cannot push volatile quals into it. That would push them below the SRFs + * and thereby change the number of times they are evaluated. Also, a + * volatile qual could succeed for some SRF output rows and fail for others, + * a behavior that cannot occur if it's evaluated before SRF expansion. + * * In addition, we make several checks on the subquery's output columns to see * if it is safe to reference them in pushed-down quals. If output column k * is found to be unsafe to reference, we set safetyInfo->unsafeColumns[k] @@ -2390,8 +2576,10 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery, if (subquery->limitOffset != NULL || subquery->limitCount != NULL) return false; - /* Check points 3 and 4 */ - if (subquery->distinctClause || subquery->hasWindowFuncs) + /* Check points 3, 4, and 5 */ + if (subquery->distinctClause || + subquery->hasWindowFuncs || + subquery->hasTargetSRFs) safetyInfo->unsafeVolatile = true; /* @@ -2418,8 +2606,8 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery, if (subquery->setOperations != NULL) return false; /* Check whether setop component output types match top level */ - topop = (SetOperationStmt *) topquery->setOperations; - Assert(topop && IsA(topop, SetOperationStmt)); + topop = castNode(SetOperationStmt, topquery->setOperations); + Assert(topop); compare_tlist_datatypes(subquery->targetList, topop->colTypes, safetyInfo); @@ -2514,7 +2702,8 @@ check_output_expressions(Query *subquery, pushdown_safety_info *safetyInfo) continue; /* Functions returning sets are unsafe (point 1) */ - if (expression_returns_set((Node *) tle->expr)) + if (subquery->hasTargetSRFs && + expression_returns_set((Node *) tle->expr)) { safetyInfo->unsafeColumns[tle->resno] = true; continue; @@ -2723,46 +2912,6 @@ subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual) recurse_push_qual(subquery->setOperations, subquery, rte, rti, qual); } - else if (IsA(qual, CurrentOfExpr)) - { - /* - * This is possible when a WHERE CURRENT OF expression is applied to a - * table with row-level security. In that case, the subquery should - * contain precisely one rtable entry for the table, and we can safely - * push the expression down into the subquery. This will cause a TID - * scan subquery plan to be generated allowing the target relation to - * be updated. - * - * Someday we might also be able to use a WHERE CURRENT OF expression - * on a view, but currently the rewriter prevents that, so we should - * never see any other case here, but generate sane error messages in - * case it does somehow happen. - */ - if (subquery->rtable == NIL) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("WHERE CURRENT OF is not supported on a view with no underlying relation"))); - - if (list_length(subquery->rtable) > 1) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("WHERE CURRENT OF is not supported on a view with more than one underlying relation"))); - - if (subquery->hasAggs || subquery->groupClause || subquery->groupingSets || subquery->havingQual) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("WHERE CURRENT OF is not supported on a view with grouping or aggregation"))); - - /* - * Adjust the CURRENT OF expression to refer to the underlying table - * in the subquery, and attach it to the subquery's WHERE clause. - */ - qual = copyObject(qual); - ((CurrentOfExpr *) qual)->cvarno = 1; - - subquery->jointree->quals = - make_and_qual(subquery->jointree->quals, qual); - } else { /* @@ -2791,7 +2940,7 @@ subquery_push_qual(Query *subquery, RangeTblEntry *rte, Index rti, Node *qual) make_and_qual(subquery->jointree->quals, qual); /* - * We need not change the subquery's hasAggs or hasSublinks flags, + * We need not change the subquery's hasAggs or hasSubLinks flags, * since we can't be pushing down any aggregates that weren't there * before, and we don't push down subselects at all. */ @@ -2927,7 +3076,8 @@ remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel) * If it contains a set-returning function, we can't remove it since * that could change the number of rows returned by the subquery. */ - if (expression_returns_set(texpr)) + if (subquery->hasTargetSRFs && + expression_returns_set(texpr)) continue; /* @@ -2948,6 +3098,123 @@ remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel) } } +/* + * create_partial_bitmap_paths + * Build partial bitmap heap path for the relation + */ +void +create_partial_bitmap_paths(PlannerInfo *root, RelOptInfo *rel, + Path *bitmapqual) +{ + int parallel_workers; + double pages_fetched; + + /* Compute heap pages for bitmap heap scan */ + pages_fetched = compute_bitmap_pages(root, rel, bitmapqual, 1.0, + NULL, NULL); + + parallel_workers = compute_parallel_worker(rel, pages_fetched, -1); + + if (parallel_workers <= 0) + return; + + add_partial_path(rel, (Path *) create_bitmap_heap_path(root, rel, + bitmapqual, rel->lateral_relids, 1.0, parallel_workers)); +} + +/* + * Compute the number of parallel workers that should be used to scan a + * relation. We compute the parallel workers based on the size of the heap to + * be scanned and the size of the index to be scanned, then choose a minimum + * of those. + * + * "heap_pages" is the number of pages from the table that we expect to scan, or + * -1 if we don't expect to scan any. + * + * "index_pages" is the number of pages from the index that we expect to scan, or + * -1 if we don't expect to scan any. + */ +int +compute_parallel_worker(RelOptInfo *rel, double heap_pages, double index_pages) +{ + int parallel_workers = 0; + + /* + * If the user has set the parallel_workers reloption, use that; otherwise + * select a default number of workers. + */ + if (rel->rel_parallel_workers != -1) + parallel_workers = rel->rel_parallel_workers; + else + { + /* + * If the number of pages being scanned is insufficient to justify a + * parallel scan, just return zero ... unless it's an inheritance + * child. In that case, we want to generate a parallel path here + * anyway. It might not be worthwhile just for this relation, but + * when combined with all of its inheritance siblings it may well pay + * off. + */ + if (rel->reloptkind == RELOPT_BASEREL && + ((heap_pages >= 0 && heap_pages < min_parallel_table_scan_size) || + (index_pages >= 0 && index_pages < min_parallel_index_scan_size))) + return 0; + + if (heap_pages >= 0) + { + int heap_parallel_threshold; + int heap_parallel_workers = 1; + + /* + * Select the number of workers based on the log of the size of + * the relation. This probably needs to be a good deal more + * sophisticated, but we need something here for now. Note that + * the upper limit of the min_parallel_table_scan_size GUC is + * chosen to prevent overflow here. + */ + heap_parallel_threshold = Max(min_parallel_table_scan_size, 1); + while (heap_pages >= (BlockNumber) (heap_parallel_threshold * 3)) + { + heap_parallel_workers++; + heap_parallel_threshold *= 3; + if (heap_parallel_threshold > INT_MAX / 3) + break; /* avoid overflow */ + } + + parallel_workers = heap_parallel_workers; + } + + if (index_pages >= 0) + { + int index_parallel_workers = 1; + int index_parallel_threshold; + + /* same calculation as for heap_pages above */ + index_parallel_threshold = Max(min_parallel_index_scan_size, 1); + while (index_pages >= (BlockNumber) (index_parallel_threshold * 3)) + { + index_parallel_workers++; + index_parallel_threshold *= 3; + if (index_parallel_threshold > INT_MAX / 3) + break; /* avoid overflow */ + } + + if (parallel_workers > 0) + parallel_workers = Min(parallel_workers, index_parallel_workers); + else + parallel_workers = index_parallel_workers; + } + } + + /* + * In no case use more than max_parallel_workers_per_gather workers. + */ + parallel_workers = Min(parallel_workers, max_parallel_workers_per_gather); + + return parallel_workers; +} + + /***************************************************************************** * DEBUG SUPPORT *****************************************************************************/ @@ -3014,6 +3281,9 @@ print_path(PlannerInfo *root, Path *path, int indent) case T_FunctionScan: ptype = "FunctionScan"; break; + case T_TableFuncScan: + ptype = "TableFuncScan"; + break; case T_ValuesScan: ptype = "ValuesScan"; break; @@ -3074,6 +3344,10 @@ print_path(PlannerInfo *root, Path *path, int indent) ptype = "Projection"; subpath = ((ProjectionPath *) path)->subpath; break; + case T_ProjectSetPath: + ptype = "ProjectSet"; + subpath = ((ProjectSetPath *) path)->subpath; + break; case T_SortPath: ptype = "Sort"; subpath = ((SortPath *) path)->subpath; diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index 02660c2ba5..758ddea4a5 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -3,7 +3,7 @@ * clausesel.c * Routines to compute clause selectivities * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -22,6 +22,7 @@ #include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/selfuncs.h" +#include "statistics/statistics.h" /* @@ -40,7 +41,8 @@ typedef struct RangeQueryClause static void addRangeClause(RangeQueryClause **rqlist, Node *clause, bool varonleft, bool isLTsel, Selectivity s2); - +static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root, + List *clauses); /**************************************************************************** * ROUTINES TO COMPUTE SELECTIVITIES @@ -60,23 +62,28 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause, * subclauses. However, that's only right if the subclauses have independent * probabilities, and in reality they are often NOT independent. So, * we want to be smarter where we can. - - * Currently, the only extra smarts we have is to recognize "range queries", - * such as "x > 34 AND x < 42". Clauses are recognized as possible range - * query components if they are restriction opclauses whose operators have - * scalarltsel() or scalargtsel() as their restriction selectivity estimator. - * We pair up clauses of this form that refer to the same variable. An - * unpairable clause of this kind is simply multiplied into the selectivity - * product in the normal way. But when we find a pair, we know that the - * selectivities represent the relative positions of the low and high bounds - * within the column's range, so instead of figuring the selectivity as - * hisel * losel, we can figure it as hisel + losel - 1. (To visualize this, - * see that hisel is the fraction of the range below the high bound, while - * losel is the fraction above the low bound; so hisel can be interpreted - * directly as a 0..1 value but we need to convert losel to 1-losel before - * interpreting it as a value. Then the available range is 1-losel to hisel. - * However, this calculation double-excludes nulls, so really we need - * hisel + losel + null_frac - 1.) + * + * If the clauses taken together refer to just one relation, we'll try to + * apply selectivity estimates using any extended statistics for that rel. + * Currently we only have (soft) functional dependencies, so apply these in as + * many cases as possible, and fall back on normal estimates for remaining + * clauses. + * + * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses + * are recognized as possible range query components if they are restriction + * opclauses whose operators have scalarltsel() or scalargtsel() as their + * restriction selectivity estimator. We pair up clauses of this form that + * refer to the same variable. An unpairable clause of this kind is simply + * multiplied into the selectivity product in the normal way. But when we + * find a pair, we know that the selectivities represent the relative + * positions of the low and high bounds within the column's range, so instead + * of figuring the selectivity as hisel * losel, we can figure it as hisel + + * losel - 1. (To visualize this, see that hisel is the fraction of the range + * below the high bound, while losel is the fraction above the low bound; so + * hisel can be interpreted directly as a 0..1 value but we need to convert + * losel to 1-losel before interpreting it as a value. Then the available + * range is 1-losel to hisel. However, this calculation double-excludes + * nulls, so really we need hisel + losel + null_frac - 1.) * * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation @@ -96,28 +103,67 @@ clauselist_selectivity(PlannerInfo *root, SpecialJoinInfo *sjinfo) { Selectivity s1 = 1.0; + RelOptInfo *rel; + Bitmapset *estimatedclauses = NULL; RangeQueryClause *rqlist = NULL; ListCell *l; + int listidx; /* - * If there's exactly one clause, then no use in trying to match up pairs, - * so just go directly to clause_selectivity(). + * If there's exactly one clause, just go directly to + * clause_selectivity(). None of what we might do below is relevant. */ if (list_length(clauses) == 1) return clause_selectivity(root, (Node *) linitial(clauses), varRelid, jointype, sjinfo); /* - * Initial scan over clauses. Anything that doesn't look like a potential - * rangequery clause gets multiplied into s1 and forgotten. Anything that - * does gets inserted into an rqlist entry. + * Determine if these clauses reference a single relation. If so, and if + * it has extended statistics, try to apply those. */ + rel = find_single_rel_for_clauses(root, clauses); + if (rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL) + { + /* + * Perform selectivity estimations on any clauses found applicable by + * dependencies_clauselist_selectivity. 'estimatedclauses' will be + * filled with the 0-based list positions of clauses used that way, so + * that we can ignore them below. + */ + s1 *= dependencies_clauselist_selectivity(root, clauses, varRelid, + jointype, sjinfo, rel, + &estimatedclauses); + + /* + * This would be the place to apply any other types of extended + * statistics selectivity estimations for remaining clauses. + */ + } + + /* + * Apply normal selectivity estimates for remaining clauses. We'll be + * careful to skip any clauses which were already estimated above. + * + * Anything that doesn't look like a potential rangequery clause gets + * multiplied into s1 and forgotten. Anything that does gets inserted into + * an rqlist entry. + */ + listidx = -1; foreach(l, clauses) { Node *clause = (Node *) lfirst(l); RestrictInfo *rinfo; Selectivity s2; + listidx++; + + /* + * Skip this clause if it's already been estimated by some other + * statistics above. + */ + if (bms_is_member(listidx, estimatedclauses)) + continue; + /* Always compute the selectivity using clause_selectivity */ s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo); @@ -373,6 +419,49 @@ addRangeClause(RangeQueryClause **rqlist, Node *clause, } /* + * find_single_rel_for_clauses + * Examine each clause in 'clauses' and determine if all clauses + * reference only a single relation. If so return that relation, + * otherwise return NULL. + */ +static RelOptInfo * +find_single_rel_for_clauses(PlannerInfo *root, List *clauses) +{ + int lastrelid = 0; + ListCell *l; + + foreach(l, clauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + int relid; + + /* + * If we have a list of bare clauses rather than RestrictInfos, we + * could pull out their relids the hard way with pull_varnos(). + * However, currently the extended-stats machinery won't do anything + * with non-RestrictInfo clauses anyway, so there's no point in + * spending extra cycles; just fail if that's what we have. + */ + if (!IsA(rinfo, RestrictInfo)) + return NULL; + + if (bms_is_empty(rinfo->clause_relids)) + continue; /* we can ignore variable-free clauses */ + if (!bms_get_singleton_member(rinfo->clause_relids, &relid)) + return NULL; /* multiple relations in this clause */ + if (lastrelid == 0) + lastrelid = relid; /* first clause referencing a relation */ + else if (relid != lastrelid) + return NULL; /* relation not same as last one */ + } + + if (lastrelid != 0) + return find_base_rel(root, lastrelid); + + return NULL; /* no clauses */ +} + +/* * bms_is_subset_singleton * * Same result as bms_is_subset(s, bms_make_singleton(x)), diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 485717acce..6e4808d51b 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -61,7 +61,7 @@ * * * Portions Copyright (c) 2012-2014, TransLattice, Inc. - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -132,6 +132,7 @@ bool enable_material = true; bool enable_mergejoin = true; bool enable_hashjoin = true; bool enable_fast_query_shipping = true; +bool enable_gathermerge = true; typedef struct { @@ -167,6 +168,7 @@ static Selectivity get_foreign_key_join_selectivity(PlannerInfo *root, static void set_rel_width(PlannerInfo *root, RelOptInfo *rel); static double relation_byte_size(double tuples, int width); static double page_size(double tuples, int width); +static double get_parallel_divisor(Path *path); /* @@ -244,32 +246,7 @@ cost_seqscan(Path *path, PlannerInfo *root, /* Adjust costing for parallelism, if used. */ if (path->parallel_workers > 0) { - double parallel_divisor = path->parallel_workers; - double leader_contribution; - - /* - * Early experience with parallel query suggests that when there is - * only one worker, the leader often makes a very substantial - * contribution to executing the parallel portion of the plan, but as - * more workers are added, it does less and less, because it's busy - * reading tuples from the workers and doing whatever non-parallel - * post-processing is needed. By the time we reach 4 workers, the - * leader no longer makes a meaningful contribution. Thus, for now, - * estimate that the leader spends 30% of its time servicing each - * worker, and the remainder executing the parallel plan. - */ - leader_contribution = 1.0 - (0.3 * path->parallel_workers); - if (leader_contribution > 0) - parallel_divisor += leader_contribution; - - /* - * In the case of a parallel plan, the row count needs to represent - * the number of tuples processed per worker. Otherwise, higher-level - * plan nodes that appear below the gather will be costed incorrectly, - * because they'll anticipate receiving more rows than any given copy - * will actually get. - */ - path->rows = clamp_row_est(path->rows / parallel_divisor); + double parallel_divisor = get_parallel_divisor(path); /* The CPU cost is divided among all the workers. */ cpu_run_cost /= parallel_divisor; @@ -280,6 +257,12 @@ cost_seqscan(Path *path, PlannerInfo *root, * prefetching. For now, we assume that the disk run cost can't be * amortized at all. */ + + /* + * In the case of a parallel plan, the row count needs to represent + * the number of tuples processed per worker. + */ + path->rows = clamp_row_est(path->rows / parallel_divisor); } path->startup_cost = startup_cost; @@ -397,6 +380,73 @@ cost_gather(GatherPath *path, PlannerInfo *root, } /* + * cost_gather_merge + * Determines and returns the cost of gather merge path. + * + * GatherMerge merges several pre-sorted input streams, using a heap that at + * any given instant holds the next tuple from each stream. If there are N + * streams, we need about N*log2(N) tuple comparisons to construct the heap at + * startup, and then for each output tuple, about log2(N) comparisons to + * replace the top heap entry with the next tuple from the same stream. + */ +void +cost_gather_merge(GatherMergePath *path, PlannerInfo *root, + RelOptInfo *rel, ParamPathInfo *param_info, + Cost input_startup_cost, Cost input_total_cost, + double *rows) +{ + Cost startup_cost = 0; + Cost run_cost = 0; + Cost comparison_cost; + double N; + double logN; + + /* Mark the path with the correct row estimate */ + if (rows) + path->path.rows = *rows; + else if (param_info) + path->path.rows = param_info->ppi_rows; + else + path->path.rows = rel->rows; + + if (!enable_gathermerge) + startup_cost += disable_cost; + + /* + * Add one to the number of workers to account for the leader. This might + * be overgenerous since the leader will do less work than other workers + * in typical cases, but we'll go with it for now. + */ + Assert(path->num_workers > 0); + N = (double) path->num_workers + 1; + logN = LOG2(N); + + /* Assumed cost per tuple comparison */ + comparison_cost = 2.0 * cpu_operator_cost; + + /* Heap creation cost */ + startup_cost += comparison_cost * N * logN; + + /* Per-tuple heap maintenance cost */ + run_cost += path->path.rows * comparison_cost * logN; + + /* small cost for heap management, like cost_merge_append */ + run_cost += cpu_operator_cost * path->path.rows; + + /* + * Parallel setup and communication cost. Since Gather Merge, unlike + * Gather, requires us to block until a tuple is available from every + * worker, we bump the IPC cost up a little bit as compared with Gather. + * For lack of a better idea, charge an extra 5%. + */ + startup_cost += parallel_setup_cost; + run_cost += parallel_tuple_cost * path->path.rows * 1.05; + + path->path.startup_cost = startup_cost + input_startup_cost; + path->path.total_cost = (startup_cost + run_cost + input_total_cost); +} + +/* * cost_index * Determines and returns the cost of scanning a relation using an index. * @@ -415,7 +465,8 @@ cost_gather(GatherPath *path, PlannerInfo *root, * we have to fetch from the table, so they don't reduce the scan cost. */ void -cost_index(IndexPath *path, PlannerInfo *root, double loop_count) +cost_index(IndexPath *path, PlannerInfo *root, double loop_count, + bool partial_path) { IndexOptInfo *index = path->indexinfo; RelOptInfo *baserel = index->rel; @@ -424,6 +475,7 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count) List *qpquals; Cost startup_cost = 0; Cost run_cost = 0; + Cost cpu_run_cost = 0; Cost indexStartupCost; Cost indexTotalCost; Selectivity indexSelectivity; @@ -437,6 +489,8 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count) Cost cpu_per_tuple; double tuples_fetched; double pages_fetched; + double rand_heap_pages; + double index_pages; /* Should only be applied to base relations */ Assert(IsA(baserel, RelOptInfo) && @@ -483,7 +537,8 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count) amcostestimate = (amcostestimate_function) index->amcostestimate; amcostestimate(root, path, loop_count, &indexStartupCost, &indexTotalCost, - &indexSelectivity, &indexCorrelation); + &indexSelectivity, &indexCorrelation, + &index_pages); /* * Save amcostestimate's results for possible use in bitmap scan planning. @@ -550,6 +605,8 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count) if (indexonly) pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac)); + rand_heap_pages = pages_fetched; + max_IO_cost = (pages_fetched * spc_random_page_cost) / loop_count; /* @@ -588,6 +645,8 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count) if (indexonly) pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac)); + rand_heap_pages = pages_fetched; + /* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */ max_IO_cost = pages_fetched * spc_random_page_cost; @@ -607,6 +666,36 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count) min_IO_cost = 0; } + if (partial_path) + { + /* + * For index only scans compute workers based on number of index pages + * fetched; the number of heap pages we fetch might be so small as to + * effectively rule out parallelism, which we don't want to do. + */ + if (indexonly) + rand_heap_pages = -1; + + /* + * Estimate the number of parallel workers required to scan index. Use + * the number of heap pages computed considering heap fetches won't be + * sequential as for parallel scans the pages are accessed in random + * order. + */ + path->path.parallel_workers = compute_parallel_worker(baserel, + rand_heap_pages, index_pages); + + /* + * Fall out if workers can't be assigned for parallel scan, because in + * such a case this path will be rejected. So there is no benefit in + * doing extra computation. + */ + if (path->path.parallel_workers <= 0) + return; + + path->path.parallel_aware = true; + } + /* * Now interpolate based on estimated index order correlation to get total * disk I/O cost for main table accesses. @@ -626,11 +715,24 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count) startup_cost += qpqual_cost.startup; cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple; - run_cost += cpu_per_tuple * tuples_fetched; + cpu_run_cost += cpu_per_tuple * tuples_fetched; /* tlist eval costs are paid per output row, not per tuple scanned */ startup_cost += path->path.pathtarget->cost.startup; - run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows; + cpu_run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows; + + /* Adjust costing for parallelism, if used. */ + if (path->path.parallel_workers > 0) + { + double parallel_divisor = get_parallel_divisor(&path->path); + + path->path.rows = clamp_row_est(path->path.rows / parallel_divisor); + + /* The CPU cost is divided among all the workers. */ + cpu_run_cost /= parallel_divisor; + } + + run_cost += cpu_run_cost; path->path.startup_cost = startup_cost; path->path.total_cost = startup_cost + run_cost; @@ -661,9 +763,8 @@ extract_nonindex_conditions(List *qual_clauses, List *indexquals) foreach(lc, qual_clauses) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); - Assert(IsA(rinfo, RestrictInfo)); if (rinfo->pseudoconstant) continue; /* we may drop pseudoconstants here */ if (list_member_ptr(indexquals, rinfo)) @@ -837,10 +938,10 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, Cost startup_cost = 0; Cost run_cost = 0; Cost indexTotalCost; - Selectivity indexSelectivity; QualCost qpqual_cost; Cost cpu_per_tuple; Cost cost_per_page; + Cost cpu_run_cost; double tuples_fetched; double pages_fetched; double spc_seq_page_cost, @@ -861,13 +962,12 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, if (!enable_bitmapscan) startup_cost += disable_cost; - /* - * Fetch total cost of obtaining the bitmap, as well as its total - * selectivity. - */ - cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity); + pages_fetched = compute_bitmap_pages(root, baserel, bitmapqual, + loop_count, &indexTotalCost, + &tuples_fetched); startup_cost += indexTotalCost; + T = (baserel->pages > 1) ? (double) baserel->pages : 1.0; /* Fetch estimated page costs for tablespace containing table. */ get_tablespace_page_costs(baserel->reltablespace, @@ -875,41 +975,6 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, &spc_seq_page_cost); /* - * Estimate number of main-table pages fetched. - */ - tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples); - - T = (baserel->pages > 1) ? (double) baserel->pages : 1.0; - - if (loop_count > 1) - { - /* - * For repeated bitmap scans, scale up the number of tuples fetched in - * the Mackert and Lohman formula by the number of scans, so that we - * estimate the number of pages fetched by all the scans. Then - * pro-rate for one scan. - */ - pages_fetched = index_pages_fetched(tuples_fetched * loop_count, - baserel->pages, - get_indexpath_pages(bitmapqual), - root); - pages_fetched /= loop_count; - } - else - { - /* - * For a single scan, the number of heap pages that need to be fetched - * is the same as the Mackert and Lohman formula for the case T <= b - * (ie, no re-reads needed). - */ - pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched); - } - if (pages_fetched >= T) - pages_fetched = T; - else - pages_fetched = ceil(pages_fetched); - - /* * For small numbers of pages we should charge spc_random_page_cost * apiece, while if nearly all the table's pages are being read, it's more * appropriate to charge spc_seq_page_cost apiece. The effect is @@ -938,8 +1003,21 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, startup_cost += qpqual_cost.startup; cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple; + cpu_run_cost = cpu_per_tuple * tuples_fetched; - run_cost += cpu_per_tuple * tuples_fetched; + /* Adjust costing for parallelism, if used. */ + if (path->parallel_workers > 0) + { + double parallel_divisor = get_parallel_divisor(path); + + /* The CPU cost is divided among all the workers. */ + cpu_run_cost /= parallel_divisor; + + path->rows = clamp_row_est(path->rows / parallel_divisor); + } + + + run_cost += cpu_run_cost; /* tlist eval costs are paid per output row, not per tuple scanned */ startup_cost += path->pathtarget->cost.startup; @@ -1295,6 +1373,62 @@ cost_functionscan(Path *path, PlannerInfo *root, } /* + * cost_tablefuncscan + * Determines and returns the cost of scanning a table function. + * + * 'baserel' is the relation to be scanned + * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL + */ +void +cost_tablefuncscan(Path *path, PlannerInfo *root, + RelOptInfo *baserel, ParamPathInfo *param_info) +{ + Cost startup_cost = 0; + Cost run_cost = 0; + QualCost qpqual_cost; + Cost cpu_per_tuple; + RangeTblEntry *rte; + QualCost exprcost; + + /* Should only be applied to base relations that are functions */ + Assert(baserel->relid > 0); + rte = planner_rt_fetch(baserel->relid, root); + Assert(rte->rtekind == RTE_TABLEFUNC); + + /* Mark the path with the correct row estimate */ + if (param_info) + path->rows = param_info->ppi_rows; + else + path->rows = baserel->rows; + + /* + * Estimate costs of executing the table func expression(s). + * + * XXX in principle we ought to charge tuplestore spill costs if the + * number of rows is large. However, given how phony our rowcount + * estimates for tablefuncs tend to be, there's not a lot of point in that + * refinement right now. + */ + cost_qual_eval_node(&exprcost, (Node *) rte->tablefunc, root); + + startup_cost += exprcost.startup + exprcost.per_tuple; + + /* Add scanning CPU costs */ + get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost); + + startup_cost += qpqual_cost.startup; + cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple; + run_cost += cpu_per_tuple * baserel->tuples; + + /* tlist eval costs are paid per output row, not per tuple scanned */ + startup_cost += path->pathtarget->cost.startup; + run_cost += path->pathtarget->cost.per_tuple * path->rows; + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; +} + +/* * cost_valuesscan * Determines and returns the cost of scanning a VALUES RTE. * @@ -1389,6 +1523,43 @@ cost_ctescan(Path *path, PlannerInfo *root, } /* + * cost_namedtuplestorescan + * Determines and returns the cost of scanning a named tuplestore. + */ +void +cost_namedtuplestorescan(Path *path, PlannerInfo *root, + RelOptInfo *baserel, ParamPathInfo *param_info) +{ + Cost startup_cost = 0; + Cost run_cost = 0; + QualCost qpqual_cost; + Cost cpu_per_tuple; + + /* Should only be applied to base relations that are Tuplestores */ + Assert(baserel->relid > 0); + Assert(baserel->rtekind == RTE_NAMEDTUPLESTORE); + + /* Mark the path with the correct row estimate */ + if (param_info) + path->rows = param_info->ppi_rows; + else + path->rows = baserel->rows; + + /* Charge one CPU tuple cost per row for tuplestore manipulation */ + cpu_per_tuple = cpu_tuple_cost; + + /* Add scanning CPU costs */ + get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost); + + startup_cost += qpqual_cost.startup; + cpu_per_tuple += cpu_tuple_cost + qpqual_cost.per_tuple; + run_cost += cpu_per_tuple * baserel->tuples; + + path->startup_cost = startup_cost; + path->total_cost = startup_cost + run_cost; +} + +/* * cost_recursive_union * Determines and returns the cost of performing a recursive union, * and also the estimated output size. @@ -1583,8 +1754,7 @@ cost_sort(Path *path, PlannerInfo *root, * at any given instant holds the next tuple from each stream. If there * are N streams, we need about N*log2(N) tuple comparisons to construct * the heap at startup, and then for each output tuple, about log2(N) - * comparisons to delete the top heap entry and another log2(N) comparisons - * to insert its successor from the same stream. + * comparisons to replace the top entry. * * (The effective value of N will drop once some of the input streams are * exhausted, but it seems unlikely to be worth trying to account for that.) @@ -1625,7 +1795,7 @@ cost_merge_append(Path *path, PlannerInfo *root, startup_cost += comparison_cost * N * logN; /* Per-tuple heap maintenance cost */ - run_cost += tuples * comparison_cost * 2.0 * logN; + run_cost += tuples * comparison_cost * logN; /* * Also charge a small amount (arbitrarily set equal to operator cost) per @@ -1757,11 +1927,16 @@ cost_agg(Path *path, PlannerInfo *root, total_cost = startup_cost + cpu_tuple_cost; output_tuples = 1; } - else if (aggstrategy == AGG_SORTED) + else if (aggstrategy == AGG_SORTED || aggstrategy == AGG_MIXED) { /* Here we are able to deliver output on-the-fly */ startup_cost = input_startup_cost; total_cost = input_total_cost; + if (aggstrategy == AGG_MIXED && !enable_hashagg) + { + startup_cost += disable_cost; + total_cost += disable_cost; + } /* calcs phrased this way to match HASHED case, see note above */ total_cost += aggcosts->transCost.startup; total_cost += aggcosts->transCost.per_tuple * input_tuples; @@ -1821,12 +1996,10 @@ cost_windowagg(Path *path, PlannerInfo *root, */ foreach(lc, windowFuncs) { - WindowFunc *wfunc = (WindowFunc *) lfirst(lc); + WindowFunc *wfunc = lfirst_node(WindowFunc, lc); Cost wfunccost; QualCost argcosts; - Assert(IsA(wfunc, WindowFunc)); - wfunccost = get_func_cost(wfunc->winfnoid) * cpu_operator_cost; /* also add the input expressions' cost to per-input-row costs */ @@ -1914,15 +2087,13 @@ cost_group(Path *path, PlannerInfo *root, * 'jointype' is the type of join to be performed * 'outer_path' is the outer input to the join * 'inner_path' is the inner input to the join - * 'sjinfo' is extra info about the join for selectivity estimation - * 'semifactors' contains valid data if jointype is SEMI or ANTI + * 'extra' contains miscellaneous information about the join */ void initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, Path *outer_path, Path *inner_path, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors) + JoinPathExtraData *extra) { Cost startup_cost = 0; Cost run_cost = 0; @@ -1953,10 +2124,12 @@ initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, inner_run_cost = inner_path->total_cost - inner_path->startup_cost; inner_rescan_run_cost = inner_rescan_total_cost - inner_rescan_start_cost; - if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || + extra->inner_unique) { /* - * SEMI or ANTI join: executor will stop after first match. + * With a SEMI or ANTI join, or if the innerrel is known unique, the + * executor will stop after the first match. * * Getting decent estimates requires inspection of the join quals, * which we choose to postpone to final_cost_nestloop. @@ -1989,14 +2162,12 @@ initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, * * 'path' is already filled in except for the rows and cost fields * 'workspace' is the result from initial_cost_nestloop - * 'sjinfo' is extra info about the join for selectivity estimation - * 'semifactors' contains valid data if path->jointype is SEMI or ANTI + * 'extra' contains miscellaneous information about the join */ void final_cost_nestloop(PlannerInfo *root, NestPath *path, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors) + JoinPathExtraData *extra) { Path *outer_path = path->outerjoinpath; Path *inner_path = path->innerjoinpath; @@ -2020,6 +2191,15 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, else path->path.rows = path->path.parent->rows; + /* For partial paths, scale row estimate. */ + if (path->path.parallel_workers > 0) + { + double parallel_divisor = get_parallel_divisor(&path->path); + + path->path.rows = + clamp_row_est(path->path.rows / parallel_divisor); + } + /* * We could include disable_cost in the preliminary estimate, but that * would amount to optimizing for the case where the join method is @@ -2030,10 +2210,12 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, /* cost of inner-relation source data (we already dealt with outer rel) */ - if (path->jointype == JOIN_SEMI || path->jointype == JOIN_ANTI) + if (path->jointype == JOIN_SEMI || path->jointype == JOIN_ANTI || + extra->inner_unique) { /* - * SEMI or ANTI join: executor will stop after first match. + * With a SEMI or ANTI join, or if the innerrel is known unique, the + * executor will stop after the first match. */ Cost inner_run_cost = workspace->inner_run_cost; Cost inner_rescan_run_cost = workspace->inner_rescan_run_cost; @@ -2049,8 +2231,8 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, * clamp inner_scan_frac to at most 1.0; but since match_count is at * least 1, no such clamp is needed now.) */ - outer_matched_rows = rint(outer_path_rows * semifactors->outer_match_frac); - inner_scan_frac = 2.0 / (semifactors->match_count + 1.0); + outer_matched_rows = rint(outer_path_rows * extra->semifactors.outer_match_frac); + inner_scan_frac = 2.0 / (extra->semifactors.match_count + 1.0); /* * Compute number of tuples processed (not number emitted!). First, @@ -2174,7 +2356,7 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path, * 'inner_path' is the inner input to the join * 'outersortkeys' is the list of sort keys for the outer path * 'innersortkeys' is the list of sort keys for the inner path - * 'sjinfo' is extra info about the join for selectivity estimation + * 'extra' contains miscellaneous information about the join * * Note: outersortkeys and innersortkeys should be NIL if no explicit * sort is needed because the respective source path is already ordered. @@ -2185,7 +2367,7 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace, List *mergeclauses, Path *outer_path, Path *inner_path, List *outersortkeys, List *innersortkeys, - SpecialJoinInfo *sjinfo) + JoinPathExtraData *extra) { Cost startup_cost = 0; Cost run_cost = 0; @@ -2386,26 +2568,33 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace, * final_cost_mergejoin * Final estimate of the cost and result size of a mergejoin path. * - * Unlike other costsize functions, this routine makes one actual decision: - * whether we should materialize the inner path. We do that either because - * the inner path can't support mark/restore, or because it's cheaper to - * use an interposed Material node to handle mark/restore. When the decision - * is cost-based it would be logically cleaner to build and cost two separate - * paths with and without that flag set; but that would require repeating most - * of the cost calculations, which are not all that cheap. Since the choice - * will not affect output pathkeys or startup cost, only total cost, there is - * no possibility of wanting to keep both paths. So it seems best to make - * the decision here and record it in the path's materialize_inner field. + * Unlike other costsize functions, this routine makes two actual decisions: + * whether the executor will need to do mark/restore, and whether we should + * materialize the inner path. It would be logically cleaner to build + * separate paths testing these alternatives, but that would require repeating + * most of the cost calculations, which are not all that cheap. Since the + * choice will not affect output pathkeys or startup cost, only total cost, + * there is no possibility of wanting to keep more than one path. So it seems + * best to make the decisions here and record them in the path's + * skip_mark_restore and materialize_inner fields. + * + * Mark/restore overhead is usually required, but can be skipped if we know + * that the executor need find only one match per outer tuple, and that the + * mergeclauses are sufficient to identify a match. + * + * We materialize the inner path if we need mark/restore and either the inner + * path can't support mark/restore, or it's cheaper to use an interposed + * Material node to handle mark/restore. * * 'path' is already filled in except for the rows and cost fields and - * materialize_inner + * skip_mark_restore and materialize_inner * 'workspace' is the result from initial_cost_mergejoin - * 'sjinfo' is extra info about the join for selectivity estimation + * 'extra' contains miscellaneous information about the join */ void final_cost_mergejoin(PlannerInfo *root, MergePath *path, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo) + JoinPathExtraData *extra) { Path *outer_path = path->jpath.outerjoinpath; Path *inner_path = path->jpath.innerjoinpath; @@ -2438,6 +2627,15 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, else path->jpath.path.rows = path->jpath.path.parent->rows; + /* For partial paths, scale row estimate. */ + if (path->jpath.path.parallel_workers > 0) + { + double parallel_divisor = get_parallel_divisor(&path->jpath.path); + + path->jpath.path.rows = + clamp_row_est(path->jpath.path.rows / parallel_divisor); + } + /* * We could include disable_cost in the preliminary estimate, but that * would amount to optimizing for the case where the join method is @@ -2456,6 +2654,21 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, qp_qual_cost.per_tuple -= merge_qual_cost.per_tuple; /* + * With a SEMI or ANTI join, or if the innerrel is known unique, the + * executor will stop scanning for matches after the first match. When + * all the joinclauses are merge clauses, this means we don't ever need to + * back up the merge, and so we can skip mark/restore overhead. + */ + if ((path->jpath.jointype == JOIN_SEMI || + path->jpath.jointype == JOIN_ANTI || + extra->inner_unique) && + (list_length(path->jpath.joinrestrictinfo) == + list_length(path->path_mergeclauses))) + path->skip_mark_restore = true; + else + path->skip_mark_restore = false; + + /* * Get approx # tuples passing the mergequals. We use approx_tuple_count * here because we need an estimate done with JOIN_INNER semantics. */ @@ -2485,9 +2698,9 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, * computations? * * The whole issue is moot if we are working from a unique-ified outer - * input. + * input, or if we know we don't need to mark/restore at all. */ - if (IsA(outer_path, UniquePath)) + if (IsA(outer_path, UniquePath) ||path->skip_mark_restore) rescannedtuples = 0; else { @@ -2527,10 +2740,16 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path, cpu_operator_cost * inner_path_rows * rescanratio; /* + * If we don't need mark/restore at all, we don't need materialization. + */ + if (path->skip_mark_restore) + path->materialize_inner = false; + + /* * Prefer materializing if it looks cheaper, unless the user has asked to * suppress materialization. */ - if (enable_material && mat_inner_cost < bare_inner_cost) + else if (enable_material && mat_inner_cost < bare_inner_cost) path->materialize_inner = true; /* @@ -2700,16 +2919,14 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey) * 'hashclauses' is the list of joinclauses to be used as hash clauses * 'outer_path' is the outer input to the join * 'inner_path' is the inner input to the join - * 'sjinfo' is extra info about the join for selectivity estimation - * 'semifactors' contains valid data if jointype is SEMI or ANTI + * 'extra' contains miscellaneous information about the join */ void initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, List *hashclauses, Path *outer_path, Path *inner_path, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors) + JoinPathExtraData *extra) { Cost startup_cost = 0; Cost run_cost = 0; @@ -2794,14 +3011,12 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, * 'path' is already filled in except for the rows and cost fields and * num_batches * 'workspace' is the result from initial_cost_hashjoin - * 'sjinfo' is extra info about the join for selectivity estimation - * 'semifactors' contains valid data if path->jointype is SEMI or ANTI + * 'extra' contains miscellaneous information about the join */ void final_cost_hashjoin(PlannerInfo *root, HashPath *path, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors) + JoinPathExtraData *extra) { Path *outer_path = path->jpath.outerjoinpath; Path *inner_path = path->jpath.innerjoinpath; @@ -2826,6 +3041,15 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, else path->jpath.path.rows = path->jpath.path.parent->rows; + /* For partial paths, scale row estimate. */ + if (path->jpath.path.parallel_workers > 0) + { + double parallel_divisor = get_parallel_divisor(&path->jpath.path); + + path->jpath.path.rows = + clamp_row_est(path->jpath.path.rows / parallel_divisor); + } + /* * We could include disable_cost in the preliminary estimate, but that * would amount to optimizing for the case where the join method is @@ -2857,11 +3081,9 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, innerbucketsize = 1.0; foreach(hcl, hashclauses) { - RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl); + RestrictInfo *restrictinfo = lfirst_node(RestrictInfo, hcl); Selectivity thisbucketsize; - Assert(IsA(restrictinfo, RestrictInfo)); - /* * First we have to figure out which side of the hashjoin clause * is the inner side. @@ -2918,13 +3140,16 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, /* CPU costs */ - if (path->jpath.jointype == JOIN_SEMI || path->jpath.jointype == JOIN_ANTI) + if (path->jpath.jointype == JOIN_SEMI || + path->jpath.jointype == JOIN_ANTI || + extra->inner_unique) { double outer_matched_rows; Selectivity inner_scan_frac; /* - * SEMI or ANTI join: executor will stop after first match. + * With a SEMI or ANTI join, or if the innerrel is known unique, the + * executor will stop after the first match. * * For an outer-rel row that has at least one match, we can expect the * bucket scan to stop after a fraction 1/(match_count+1) of the @@ -2934,8 +3159,8 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path, * to clamp inner_scan_frac to at most 1.0; but since match_count is * at least 1, no such clamp is needed now.) */ - outer_matched_rows = rint(outer_path_rows * semifactors->outer_match_frac); - inner_scan_frac = 2.0 / (semifactors->match_count + 1.0); + outer_matched_rows = rint(outer_path_rows * extra->semifactors.outer_match_frac); + inner_scan_frac = 2.0 / (extra->semifactors.match_count + 1.0); startup_cost += hash_qual_cost.startup; run_cost += hash_qual_cost.per_tuple * outer_matched_rows * @@ -3360,7 +3585,7 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context) /* * Aggref and WindowFunc nodes are (and should be) treated like Vars, * ie, zero execution cost in the current model, because they behave - * essentially like Vars in execQual.c. We disregard the costs of + * essentially like Vars at execution. We disregard the costs of * their input expressions for the same reason. The actual execution * costs of the aggregate/window functions and their arguments have to * be factored into plan-node-specific costing of the Agg or WindowAgg @@ -3501,11 +3726,12 @@ get_restriction_qual_cost(PlannerInfo *root, RelOptInfo *baserel, /* * compute_semi_anti_join_factors - * Estimate how much of the inner input a SEMI or ANTI join + * Estimate how much of the inner input a SEMI, ANTI, or inner_unique join * can be expected to scan. * * In a hash or nestloop SEMI/ANTI join, the executor will stop scanning * inner rows as soon as it finds a match to the current outer row. + * The same happens if we have detected the inner rel is unique. * We should therefore adjust some of the cost components for this effect. * This function computes some estimates needed for these adjustments. * These estimates will be the same regardless of the particular paths used @@ -3515,7 +3741,7 @@ get_restriction_qual_cost(PlannerInfo *root, RelOptInfo *baserel, * Input parameters: * outerrel: outer relation under consideration * innerrel: inner relation under consideration - * jointype: must be JOIN_SEMI or JOIN_ANTI + * jointype: if not JOIN_SEMI or JOIN_ANTI, we assume it's inner_unique * sjinfo: SpecialJoinInfo relevant to this join * restrictlist: join quals * Output parameters: @@ -3537,23 +3763,20 @@ compute_semi_anti_join_factors(PlannerInfo *root, List *joinquals; ListCell *l; - /* Should only be called in these cases */ - Assert(jointype == JOIN_SEMI || jointype == JOIN_ANTI); - /* * In an ANTI join, we must ignore clauses that are "pushed down", since * those won't affect the match logic. In a SEMI join, we do not * distinguish joinquals from "pushed down" quals, so just use the whole - * restrictinfo list. + * restrictinfo list. For other outer join types, we should consider only + * non-pushed-down quals, so that this devolves to an IS_OUTER_JOIN check. */ - if (jointype == JOIN_ANTI) + if (IS_OUTER_JOIN(jointype)) { joinquals = NIL; foreach(l, restrictlist) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); - Assert(IsA(rinfo, RestrictInfo)); if (!rinfo->is_pushed_down) joinquals = lappend(joinquals, rinfo); } @@ -3567,7 +3790,7 @@ compute_semi_anti_join_factors(PlannerInfo *root, jselec = clauselist_selectivity(root, joinquals, 0, - jointype, + (jointype == JOIN_ANTI) ? JOIN_ANTI : JOIN_SEMI, sjinfo); /* @@ -3594,7 +3817,7 @@ compute_semi_anti_join_factors(PlannerInfo *root, &norm_sjinfo); /* Avoid leaking a lot of ListCells */ - if (jointype == JOIN_ANTI) + if (IS_OUTER_JOIN(jointype)) list_free(joinquals); /* @@ -3984,9 +4207,8 @@ calc_joinrel_size_estimate(PlannerInfo *root, /* Grovel through the clauses to separate into two lists */ foreach(l, restrictlist) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); - Assert(IsA(rinfo, RestrictInfo)); if (rinfo->is_pushed_down) pushedquals = lappend(pushedquals, rinfo); else @@ -4101,6 +4323,7 @@ get_foreign_key_join_selectivity(PlannerInfo *root, { ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc); bool ref_is_outer; + bool use_smallest_selectivity = false; List *removedlist; ListCell *cell; ListCell *prev; @@ -4221,9 +4444,9 @@ get_foreign_key_join_selectivity(PlannerInfo *root, * be double-counting the null fraction, and (2) it's not very clear * how to combine null fractions for multiple referencing columns. * - * In the first branch of the logic below, null derating is done - * implicitly by relying on clause_selectivity(); in the other two - * paths, we do nothing for now about correcting for nulls. + * In the use_smallest_selectivity code below, null derating is done + * implicitly by relying on clause_selectivity(); in the other cases, + * we do nothing for now about correcting for nulls. * * XXX another point here is that if either side of an FK constraint * is an inheritance parent, we estimate as though the constraint @@ -4246,28 +4469,41 @@ get_foreign_key_join_selectivity(PlannerInfo *root, * the smallest per-column selectivity, instead. (This should * correspond to the FK column with the most nulls.) */ - Selectivity thisfksel = 1.0; - - foreach(cell, removedlist) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); - Selectivity csel; - - csel = clause_selectivity(root, (Node *) rinfo, - 0, jointype, sjinfo); - thisfksel = Min(thisfksel, csel); - } - fkselec *= thisfksel; + use_smallest_selectivity = true; } else if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) { /* * For JOIN_SEMI and JOIN_ANTI, the selectivity is defined as the - * fraction of LHS rows that have matches. If the referenced - * table is on the inner side, that means the selectivity is 1.0 - * (modulo nulls, which we're ignoring for now). We already - * covered the other case, so no work here. + * fraction of LHS rows that have matches. The referenced table + * is on the inner side (we already handled the other case above), + * so the FK implies that every LHS row has a match *in the + * referenced table*. But any restriction or join clauses below + * here will reduce the number of matches. */ + if (bms_membership(inner_relids) == BMS_SINGLETON) + { + /* + * When the inner side of the semi/anti join is just the + * referenced table, we may take the FK selectivity as equal + * to the selectivity of the table's restriction clauses. + */ + RelOptInfo *ref_rel = find_base_rel(root, fkinfo->ref_relid); + double ref_tuples = Max(ref_rel->tuples, 1.0); + + fkselec *= ref_rel->rows / ref_tuples; + } + else + { + /* + * When the inner side of the semi/anti join is itself a join, + * it's hard to guess what fraction of the referenced table + * will get through the join. But we still don't want to + * multiply per-column estimates together. Take the smallest + * per-column selectivity, instead. + */ + use_smallest_selectivity = true; + } } else { @@ -4281,6 +4517,26 @@ get_foreign_key_join_selectivity(PlannerInfo *root, fkselec *= 1.0 / ref_tuples; } + + /* + * Common code for cases where we should use the smallest selectivity + * that would be computed for any one of the FK's clauses. + */ + if (use_smallest_selectivity) + { + Selectivity thisfksel = 1.0; + + foreach(cell, removedlist) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell); + Selectivity csel; + + csel = clause_selectivity(root, (Node *) rinfo, + 0, jointype, sjinfo); + thisfksel = Min(thisfksel, csel); + } + fkselec *= thisfksel; + } } *restrictlist = worklist; @@ -4307,8 +4563,10 @@ set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel) /* Should only be applied to base relations that are subqueries */ Assert(rel->relid > 0); +#ifdef USE_ASSERT_CHECKING rte = planner_rt_fetch(rel->relid, root); Assert(rte->rtekind == RTE_SUBQUERY); +#endif /* * Copy raw number of output rows from subquery. All of its paths should @@ -4325,11 +4583,10 @@ set_subquery_size_estimates(PlannerInfo *root, RelOptInfo *rel) */ foreach(lc, subroot->parse->targetList) { - TargetEntry *te = (TargetEntry *) lfirst(lc); + TargetEntry *te = lfirst_node(TargetEntry, lc); Node *texpr = (Node *) te->expr; int32 item_width = 0; - Assert(IsA(te, TargetEntry)); /* junk columns aren't visible to upper query */ if (te->resjunk) continue; @@ -4410,6 +4667,33 @@ set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel) } /* + * set_function_size_estimates + * Set the size estimates for a base relation that is a function call. + * + * The rel's targetlist and restrictinfo list must have been constructed + * already. + * + * We set the same fields as set_tablefunc_size_estimates. + */ +void +set_tablefunc_size_estimates(PlannerInfo *root, RelOptInfo *rel) +{ + RangeTblEntry *rte PG_USED_FOR_ASSERTS_ONLY; + + /* Should only be applied to base relations that are functions */ + Assert(rel->relid > 0); +#ifdef USE_ASSERT_CHECKING + rte = planner_rt_fetch(rel->relid, root); + Assert(rte->rtekind == RTE_TABLEFUNC); +#endif + + rel->tuples = 100; + + /* Now estimate number of output rows, etc */ + set_baserel_size_estimates(root, rel); +} + +/* * set_values_size_estimates * Set the size estimates for a base relation that is a values list. * @@ -4479,6 +4763,39 @@ set_cte_size_estimates(PlannerInfo *root, RelOptInfo *rel, double cte_rows) } /* + * set_namedtuplestore_size_estimates + * Set the size estimates for a base relation that is a tuplestore reference. + * + * The rel's targetlist and restrictinfo list must have been constructed + * already. + * + * We set the same fields as set_baserel_size_estimates. + */ +void +set_namedtuplestore_size_estimates(PlannerInfo *root, RelOptInfo *rel) +{ + RangeTblEntry *rte; + + /* Should only be applied to base relations that are tuplestore references */ + Assert(rel->relid > 0); + rte = planner_rt_fetch(rel->relid, root); + Assert(rte->rtekind == RTE_NAMEDTUPLESTORE); + + /* + * Use the estimate provided by the code which is generating the named + * tuplestore. In some cases, the actual number might be available; in + * others the same plan will be re-used, so a "typical" value might be + * estimated and used. + */ + rel->tuples = rte->enrtuples; + if (rel->tuples < 0) + rel->tuples = 1000; + + /* Now estimate number of output rows, etc */ + set_baserel_size_estimates(root, rel); +} + +/* * set_foreign_size_estimates * Set the size estimates for a base relation that is a foreign table. * @@ -4781,7 +5098,6 @@ page_size(double tuples, int width) return ceil(relation_byte_size(tuples, width) / BLCKSZ); } - #ifdef XCP void cost_remote_subplan(Path *path, @@ -4807,3 +5123,97 @@ cost_remote_subplan(Path *path, path->total_cost = startup_cost + run_cost; } #endif + +/* + * Estimate the fraction of the work that each worker will do given the + * number of workers budgeted for the path. + */ +static double +get_parallel_divisor(Path *path) +{ + double parallel_divisor = path->parallel_workers; + double leader_contribution; + + /* + * Early experience with parallel query suggests that when there is only + * one worker, the leader often makes a very substantial contribution to + * executing the parallel portion of the plan, but as more workers are + * added, it does less and less, because it's busy reading tuples from the + * workers and doing whatever non-parallel post-processing is needed. By + * the time we reach 4 workers, the leader no longer makes a meaningful + * contribution. Thus, for now, estimate that the leader spends 30% of + * its time servicing each worker, and the remainder executing the + * parallel plan. + */ + leader_contribution = 1.0 - (0.3 * path->parallel_workers); + if (leader_contribution > 0) + parallel_divisor += leader_contribution; + + return parallel_divisor; +} + +/* + * compute_bitmap_pages + * + * compute number of pages fetched from heap in bitmap heap scan. + */ +double +compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, + int loop_count, Cost *cost, double *tuple) +{ + Cost indexTotalCost; + Selectivity indexSelectivity; + double T; + double pages_fetched; + double tuples_fetched; + + /* + * Fetch total cost of obtaining the bitmap, as well as its total + * selectivity. + */ + cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity); + + /* + * Estimate number of main-table pages fetched. + */ + tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples); + + T = (baserel->pages > 1) ? (double) baserel->pages : 1.0; + + if (loop_count > 1) + { + /* + * For repeated bitmap scans, scale up the number of tuples fetched in + * the Mackert and Lohman formula by the number of scans, so that we + * estimate the number of pages fetched by all the scans. Then + * pro-rate for one scan. + */ + pages_fetched = index_pages_fetched(tuples_fetched * loop_count, + baserel->pages, + get_indexpath_pages(bitmapqual), + root); + pages_fetched /= loop_count; + } + else + { + /* + * For a single scan, the number of heap pages that need to be fetched + * is the same as the Mackert and Lohman formula for the case T <= b + * (ie, no re-reads needed). + */ + pages_fetched = + (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched); + } + + if (pages_fetched >= T) + pages_fetched = T; + else + pages_fetched = ceil(pages_fetched); + + if (cost) + *cost = indexTotalCost; + if (tuple) + *tuple = tuples_fetched; + + return pages_fetched; +} diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 0e50ad5f34..67bd760fb4 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -6,7 +6,7 @@ * See src/backend/optimizer/README for discussion of EquivalenceClasses. * * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -16,6 +16,8 @@ */ #include "postgres.h" +#include <limits.h> + #include "access/stratnum.h" #include "catalog/pg_type.h" #include "nodes/makefuncs.h" @@ -78,9 +80,16 @@ static bool reconsider_full_join_clause(PlannerInfo *root, * care to mark an EquivalenceClass if it came from any such clauses. Also, * we have to check that both sides are either pseudo-constants or strict * functions of Vars, else they might not both go to NULL above the outer - * join. (This is the reason why we need a failure return. It's more + * join. (This is the main reason why we need a failure return. It's more * convenient to check this case here than at the call sites...) * + * We also reject proposed equivalence clauses if they contain leaky functions + * and have security_level above zero. The EC evaluation rules require us to + * apply certain tests at certain joining levels, and we can't tolerate + * delaying any test on security_level grounds. By rejecting candidate clauses + * that might require security delays, we ensure it's safe to apply an EC + * clause as soon as it's supposed to be applied. + * * On success return, we have also initialized the clause's left_ec/right_ec * fields to point to the EquivalenceClass representing it. This saves lookup * effort later. @@ -120,6 +129,10 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, Assert(restrictinfo->left_ec == NULL); Assert(restrictinfo->right_ec == NULL); + /* Reject if it is potentially postponable by security considerations */ + if (restrictinfo->security_level > 0 && !restrictinfo->leakproof) + return false; + /* Extract info from given clause */ Assert(is_opclause(clause)); opno = ((OpExpr *) clause)->opno; @@ -275,6 +288,10 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, { ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); ec1->ec_below_outer_join |= below_outer_join; + ec1->ec_min_security = Min(ec1->ec_min_security, + restrictinfo->security_level); + ec1->ec_max_security = Max(ec1->ec_max_security, + restrictinfo->security_level); /* mark the RI as associated with this eclass */ restrictinfo->left_ec = ec1; restrictinfo->right_ec = ec1; @@ -306,6 +323,10 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, ec1->ec_has_const |= ec2->ec_has_const; /* can't need to set has_volatile */ ec1->ec_below_outer_join |= ec2->ec_below_outer_join; + ec1->ec_min_security = Min(ec1->ec_min_security, + ec2->ec_min_security); + ec1->ec_max_security = Max(ec1->ec_max_security, + ec2->ec_max_security); ec2->ec_merged = ec1; root->eq_classes = list_delete_ptr(root->eq_classes, ec2); /* just to avoid debugging confusion w/ dangling pointers: */ @@ -315,6 +336,10 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, ec2->ec_relids = NULL; ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); ec1->ec_below_outer_join |= below_outer_join; + ec1->ec_min_security = Min(ec1->ec_min_security, + restrictinfo->security_level); + ec1->ec_max_security = Max(ec1->ec_max_security, + restrictinfo->security_level); /* mark the RI as associated with this eclass */ restrictinfo->left_ec = ec1; restrictinfo->right_ec = ec1; @@ -329,6 +354,10 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, false, item2_type); ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo); ec1->ec_below_outer_join |= below_outer_join; + ec1->ec_min_security = Min(ec1->ec_min_security, + restrictinfo->security_level); + ec1->ec_max_security = Max(ec1->ec_max_security, + restrictinfo->security_level); /* mark the RI as associated with this eclass */ restrictinfo->left_ec = ec1; restrictinfo->right_ec = ec1; @@ -343,6 +372,10 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, false, item1_type); ec2->ec_sources = lappend(ec2->ec_sources, restrictinfo); ec2->ec_below_outer_join |= below_outer_join; + ec2->ec_min_security = Min(ec2->ec_min_security, + restrictinfo->security_level); + ec2->ec_max_security = Max(ec2->ec_max_security, + restrictinfo->security_level); /* mark the RI as associated with this eclass */ restrictinfo->left_ec = ec2; restrictinfo->right_ec = ec2; @@ -366,6 +399,8 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo, ec->ec_below_outer_join = below_outer_join; ec->ec_broken = false; ec->ec_sortref = 0; + ec->ec_min_security = restrictinfo->security_level; + ec->ec_max_security = restrictinfo->security_level; ec->ec_merged = NULL; em1 = add_eq_member(ec, item1, item1_relids, item1_nullable_relids, false, item1_type); @@ -639,6 +674,8 @@ get_eclass_for_sort_expr(PlannerInfo *root, newec->ec_below_outer_join = false; newec->ec_broken = false; newec->ec_sortref = sortref; + newec->ec_min_security = UINT_MAX; + newec->ec_max_security = 0; newec->ec_merged = NULL; if (newec->ec_has_volatile && sortref == 0) /* should not happen */ @@ -834,6 +871,7 @@ generate_base_implied_equalities_const(PlannerInfo *root, bms_copy(ec->ec_relids), bms_union(cur_em->em_nullable_relids, const_em->em_nullable_relids), + ec->ec_min_security, ec->ec_below_outer_join, cur_em->em_is_const); } @@ -890,6 +928,7 @@ generate_base_implied_equalities_no_const(PlannerInfo *root, bms_copy(ec->ec_relids), bms_union(prev_em->em_nullable_relids, cur_em->em_nullable_relids), + ec->ec_min_security, ec->ec_below_outer_join, false); } @@ -1021,10 +1060,12 @@ generate_join_implied_equalities_for_ecs(PlannerInfo *root, ListCell *lc; /* If inner rel is a child, extra setup work is needed */ - if (inner_rel->reloptkind == RELOPT_OTHER_MEMBER_REL) + if (IS_OTHER_REL(inner_rel)) { + Assert(!bms_is_empty(inner_rel->top_parent_relids)); + /* Fetch relid set for the topmost parent rel */ - nominal_inner_relids = find_childrel_top_parent(root, inner_rel)->relids; + nominal_inner_relids = inner_rel->top_parent_relids; /* ECs will be marked with the parent's relid, not the child's */ nominal_join_relids = bms_union(outer_relids, nominal_inner_relids); } @@ -1285,8 +1326,7 @@ generate_join_implied_equalities_broken(PlannerInfo *root, * mentioned in the ec_sources clauses, we have to be prepared to apply * multiple levels of Var translation. */ - if (inner_rel->reloptkind == RELOPT_OTHER_MEMBER_REL && - result != NIL) + if (IS_OTHER_REL(inner_rel) && result != NIL) result = (List *) adjust_appendrel_attrs_multilevel(root, (Node *) result, inner_rel); @@ -1313,7 +1353,13 @@ select_equality_operator(EquivalenceClass *ec, Oid lefttype, Oid righttype) opno = get_opfamily_member(opfamily, lefttype, righttype, BTEqualStrategyNumber); - if (OidIsValid(opno)) + if (!OidIsValid(opno)) + continue; + /* If no barrier quals in query, don't worry about leaky operators */ + if (ec->ec_max_security == 0) + return opno; + /* Otherwise, insist that selected operators be leakproof */ + if (get_func_leakproof(get_opcode(opno))) return opno; } return InvalidOid; @@ -1380,7 +1426,8 @@ create_join_clause(PlannerInfo *root, bms_union(leftem->em_relids, rightem->em_relids), bms_union(leftem->em_nullable_relids, - rightem->em_nullable_relids)); + rightem->em_nullable_relids), + ec->ec_min_security); /* Mark the clause as redundant, or not */ rinfo->parent_ec = parent_ec; @@ -1691,7 +1738,8 @@ reconsider_outer_join_clause(PlannerInfo *root, RestrictInfo *rinfo, innervar, cur_em->em_expr, bms_copy(inner_relids), - bms_copy(inner_nullable_relids)); + bms_copy(inner_nullable_relids), + cur_ec->ec_min_security); if (process_equivalence(root, newrinfo, true)) match = true; } @@ -1833,7 +1881,8 @@ reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo) leftvar, cur_em->em_expr, bms_copy(left_relids), - bms_copy(left_nullable_relids)); + bms_copy(left_nullable_relids), + cur_ec->ec_min_security); if (process_equivalence(root, newrinfo, true)) matchleft = true; } @@ -1847,7 +1896,8 @@ reconsider_full_join_clause(PlannerInfo *root, RestrictInfo *rinfo) rightvar, cur_em->em_expr, bms_copy(right_relids), - bms_copy(right_nullable_relids)); + bms_copy(right_nullable_relids), + cur_ec->ec_min_security); if (process_equivalence(root, newrinfo, true)) matchright = true; } @@ -2131,6 +2181,9 @@ generate_implied_equalities_for_column(PlannerInfo *root, Relids parent_relids; ListCell *lc1; + /* Indexes are available only on base or "other" member relations. */ + Assert(IS_SIMPLE_REL(rel)); + /* If it's a child rel, we'll need to know what its parent(s) are */ if (is_child_rel) parent_relids = find_childrel_parents(root, rel); @@ -2364,8 +2417,11 @@ eclass_useful_for_merging(PlannerInfo *root, */ /* If specified rel is a child, we must consider the topmost parent rel */ - if (rel->reloptkind == RELOPT_OTHER_MEMBER_REL) - relids = find_childrel_top_parent(root, rel)->relids; + if (IS_OTHER_REL(rel)) + { + Assert(!bms_is_empty(rel->top_parent_relids)); + relids = rel->top_parent_relids; + } else relids = rel->relids; diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 2952bfb7c2..607a8f97bf 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -4,7 +4,7 @@ * Routines to determine which indexes are usable for scanning a * given relation, and create Paths accordingly. * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -337,8 +337,12 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel) bitmapqual = choose_bitmap_and(root, rel, bitindexpaths); bpath = create_bitmap_heap_path(root, rel, bitmapqual, - rel->lateral_relids, 1.0); + rel->lateral_relids, 1.0, 0); add_path(rel, (Path *) bpath); + + /* create a partial bitmap heap path */ + if (rel->consider_parallel && rel->lateral_relids == NULL) + create_partial_bitmap_paths(root, rel, bitmapqual); } /* @@ -410,7 +414,7 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel) required_outer = get_bitmap_tree_required_outer(bitmapqual); loop_count = get_loop_count(root, rel->relid, required_outer); bpath = create_bitmap_heap_path(root, rel, bitmapqual, - required_outer, loop_count); + required_outer, loop_count, 0); add_path(rel, (Path *) bpath); } } @@ -813,7 +817,7 @@ get_index_paths(PlannerInfo *root, RelOptInfo *rel, /* * build_index_paths * Given an index and a set of index clauses for it, construct zero - * or more IndexPaths. + * or more IndexPaths. It also constructs zero or more partial IndexPaths. * * We return a list of paths because (1) this routine checks some cases * that should cause us to not generate any IndexPath, and (2) in some @@ -1042,8 +1046,41 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, NoMovementScanDirection, index_only_scan, outer_relids, - loop_count); + loop_count, + false); result = lappend(result, ipath); + + /* + * If appropriate, consider parallel index scan. We don't allow + * parallel index scan for bitmap index scans. + */ + if (index->amcanparallel && + rel->consider_parallel && outer_relids == NULL && + scantype != ST_BITMAPSCAN) + { + ipath = create_index_path(root, index, + index_clauses, + clause_columns, + orderbyclauses, + orderbyclausecols, + useful_pathkeys, + index_is_ordered ? + ForwardScanDirection : + NoMovementScanDirection, + index_only_scan, + outer_relids, + loop_count, + true); + + /* + * if, after costing the path, we find that it's not worth using + * parallel workers, just free it. + */ + if (ipath->path.parallel_workers > 0) + add_partial_path(rel, (Path *) ipath); + else + pfree(ipath); + } } /* @@ -1066,8 +1103,36 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel, BackwardScanDirection, index_only_scan, outer_relids, - loop_count); + loop_count, + false); result = lappend(result, ipath); + + /* If appropriate, consider parallel index scan */ + if (index->amcanparallel && + rel->consider_parallel && outer_relids == NULL && + scantype != ST_BITMAPSCAN) + { + ipath = create_index_path(root, index, + index_clauses, + clause_columns, + NIL, + NIL, + useful_pathkeys, + BackwardScanDirection, + index_only_scan, + outer_relids, + loop_count, + true); + + /* + * if, after costing the path, we find that it's not worth + * using parallel workers, just free it. + */ + if (ipath->path.parallel_workers > 0) + add_partial_path(rel, (Path *) ipath); + else + pfree(ipath); + } } } @@ -1212,12 +1277,11 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, foreach(lc, clauses) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); List *pathlist; Path *bitmapqual; ListCell *j; - Assert(IsA(rinfo, RestrictInfo)); /* Ignore RestrictInfos that aren't ORs */ if (!restriction_is_or_clause(rinfo)) continue; @@ -1249,11 +1313,11 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel, } else { + RestrictInfo *rinfo = castNode(RestrictInfo, orarg); List *orargs; - Assert(IsA(orarg, RestrictInfo)); - Assert(!restriction_is_or_clause((RestrictInfo *) orarg)); - orargs = list_make1(orarg); + Assert(!restriction_is_or_clause(rinfo)); + orargs = list_make1(rinfo); indlist = build_paths_for_OR(root, rel, orargs, @@ -1557,6 +1621,11 @@ bitmap_scan_cost_est(PlannerInfo *root, RelOptInfo *rel, Path *ipath) bpath.path.pathkeys = NIL; bpath.bitmapqual = ipath; + /* + * Check the cost of temporary path without considering parallelism. + * Parallel bitmap heap path will be considered at later stage. + */ + bpath.path.parallel_workers = 0; cost_bitmap_heap_scan(&bpath.path, root, rel, bpath.path.param_info, ipath, @@ -1599,6 +1668,12 @@ bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel, List *paths) bpath.path.pathkeys = NIL; bpath.bitmapqual = (Path *) &apath; + /* + * Check the cost of temporary path without considering parallelism. + * Parallel bitmap heap path will be considered at later stage. + */ + bpath.path.parallel_workers = 0; + /* Now we can do cost_bitmap_heap_scan */ cost_bitmap_heap_scan(&bpath.path, root, rel, bpath.path.param_info, @@ -2113,9 +2188,8 @@ match_clauses_to_index(IndexOptInfo *index, foreach(lc, clauses) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); - Assert(IsA(rinfo, RestrictInfo)); match_clause_to_index(index, rinfo, clauseset); } } @@ -2143,6 +2217,23 @@ match_clause_to_index(IndexOptInfo *index, { int indexcol; + /* + * Never match pseudoconstants to indexes. (Normally a match could not + * happen anyway, since a pseudoconstant clause couldn't contain a Var, + * but what if someone builds an expression index on a constant? It's not + * totally unreasonable to do so with a partial index, either.) + */ + if (rinfo->pseudoconstant) + return; + + /* + * If clause can't be used as an indexqual because it must wait till after + * some lower-security-level restriction clause, reject it. + */ + if (!restriction_is_securely_promotable(rinfo, index->rel)) + return; + + /* OK, check each index column for a match */ for (indexcol = 0; indexcol < index->ncolumns; indexcol++) { if (match_clause_to_indexcol(index, @@ -2237,15 +2328,6 @@ match_clause_to_indexcol(IndexOptInfo *index, Oid expr_coll; bool plain_op; - /* - * Never match pseudoconstants to indexes. (Normally this could not - * happen anyway, since a pseudoconstant clause couldn't contain a Var, - * but what if someone builds an expression index on a constant? It's not - * totally unreasonable to do so with a partial index, either.) - */ - if (rinfo->pseudoconstant) - return false; - /* First check for boolean-index cases. */ if (IsBooleanOpfamily(opfamily)) { @@ -2697,6 +2779,9 @@ check_index_predicates(PlannerInfo *root, RelOptInfo *rel) Relids otherrels; ListCell *lc; + /* Indexes are available only on base or "other" member relations. */ + Assert(IS_SIMPLE_REL(rel)); + /* * Initialize the indrestrictinfo lists to be identical to * baserestrictinfo, and check whether there are any partial indexes. If @@ -3025,6 +3110,52 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, return false; } +/* + * indexcol_is_bool_constant_for_query + * + * If an index column is constrained to have a constant value by the query's + * WHERE conditions, then it's irrelevant for sort-order considerations. + * Usually that means we have a restriction clause WHERE indexcol = constant, + * which gets turned into an EquivalenceClass containing a constant, which + * is recognized as redundant by build_index_pathkeys(). But if the index + * column is a boolean variable (or expression), then we are not going to + * see WHERE indexcol = constant, because expression preprocessing will have + * simplified that to "WHERE indexcol" or "WHERE NOT indexcol". So we are not + * going to have a matching EquivalenceClass (unless the query also contains + * "ORDER BY indexcol"). To allow such cases to work the same as they would + * for non-boolean values, this function is provided to detect whether the + * specified index column matches a boolean restriction clause. + */ +bool +indexcol_is_bool_constant_for_query(IndexOptInfo *index, int indexcol) +{ + ListCell *lc; + + /* If the index isn't boolean, we can't possibly get a match */ + if (!IsBooleanOpfamily(index->opfamily[indexcol])) + return false; + + /* Check each restriction clause for the index's rel */ + foreach(lc, index->rel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + /* + * As in match_clause_to_indexcol, never match pseudoconstants to + * indexes. (It might be semantically okay to do so here, but the + * odds of getting a match are negligible, so don't waste the cycles.) + */ + if (rinfo->pseudoconstant) + continue; + + /* See if we can match the clause's expression to the index column */ + if (match_boolean_index_clause((Node *) rinfo->clause, indexcol, index)) + return true; + } + + return false; +} + /**************************************************************************** * ---- ROUTINES TO CHECK OPERANDS ---- @@ -3876,9 +4007,9 @@ adjust_rowcompare_for_index(RowCompareExpr *clause, matching_cols); rc->inputcollids = list_truncate(list_copy(clause->inputcollids), matching_cols); - rc->largs = list_truncate((List *) copyObject(clause->largs), + rc->largs = list_truncate(copyObject(clause->largs), matching_cols); - rc->rargs = list_truncate((List *) copyObject(clause->rargs), + rc->rargs = list_truncate(copyObject(clause->rargs), matching_cols); return (Expr *) rc; } diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index cc7384f7e5..c130d2f17f 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -3,7 +3,7 @@ * joinpath.c * Routines to find all possible paths for processing a set of joins * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -21,6 +21,7 @@ #include "optimizer/cost.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" +#include "optimizer/planmain.h" /* Hook for plugins to get control in add_paths_to_joinrel() */ set_join_pathlist_hook_type set_join_pathlist_hook = NULL; @@ -28,6 +29,16 @@ set_join_pathlist_hook_type set_join_pathlist_hook = NULL; #define PATH_PARAM_BY_REL(path, rel) \ ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids)) +static void try_partial_mergejoin_path(PlannerInfo *root, + RelOptInfo *joinrel, + Path *outer_path, + Path *inner_path, + List *pathkeys, + List *mergeclauses, + List *outersortkeys, + List *innersortkeys, + JoinType jointype, + JoinPathExtraData *extra); static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra); @@ -40,6 +51,13 @@ static void consider_parallel_nestloop(PlannerInfo *root, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra); +static void consider_parallel_mergejoin(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + JoinPathExtraData *extra, + Path *inner_cheapest_total); static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra); @@ -50,6 +68,16 @@ static List *select_mergejoin_clauses(PlannerInfo *root, List *restrictlist, JoinType jointype, bool *mergejoin_allowed); +static void generate_mergejoin_paths(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *innerrel, + Path *outerpath, + JoinType jointype, + JoinPathExtraData *extra, + bool useallclauses, + Path *inner_cheapest_total, + List *merge_pathkeys, + bool is_partial); /* @@ -94,6 +122,49 @@ add_paths_to_joinrel(PlannerInfo *root, extra.param_source_rels = NULL; /* + * See if the inner relation is provably unique for this outer rel. + * + * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't + * matter since the executor can make the equivalent optimization anyway; + * we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, we + * must be considering a semijoin whose inner side is not provably unique + * (else reduce_unique_semijoins would've simplified it), so there's no + * point in calling innerrel_is_unique. However, if the LHS covers all of + * the semijoin's min_lefthand, then it's appropriate to set inner_unique + * because the path produced by create_unique_path will be unique relative + * to the LHS. (If we have an LHS that's only part of the min_lefthand, + * that is *not* true.) For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid + * letting that value escape this module. + */ + switch (jointype) + { + case JOIN_SEMI: + case JOIN_ANTI: + extra.inner_unique = false; /* well, unproven */ + break; + case JOIN_UNIQUE_INNER: + extra.inner_unique = bms_is_subset(sjinfo->min_lefthand, + outerrel->relids); + break; + case JOIN_UNIQUE_OUTER: + extra.inner_unique = innerrel_is_unique(root, + outerrel->relids, + innerrel, + JOIN_INNER, + restrictlist, + false); + break; + default: + extra.inner_unique = innerrel_is_unique(root, + outerrel->relids, + innerrel, + jointype, + restrictlist, + false); + break; + } + + /* * Find potential mergejoin clauses. We can skip this if we are not * interested in doing a mergejoin. However, mergejoin may be our only * way of implementing a full outer join, so override enable_mergejoin if @@ -109,10 +180,10 @@ add_paths_to_joinrel(PlannerInfo *root, &mergejoin_allowed); /* - * If it's SEMI or ANTI join, compute correction factors for cost - * estimation. These will be the same for all paths. + * If it's SEMI, ANTI, or inner_unique join, compute correction factors + * for cost estimation. These will be the same for all paths. */ - if (jointype == JOIN_SEMI || jointype == JOIN_ANTI) + if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique) compute_semi_anti_join_factors(root, outerrel, innerrel, jointype, sjinfo, restrictlist, &extra.semifactors); @@ -131,7 +202,7 @@ add_paths_to_joinrel(PlannerInfo *root, */ foreach(lc, root->join_info_list) { - SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); + SpecialJoinInfo *sjinfo2 = (SpecialJoinInfo *) lfirst(lc); /* * SJ is relevant to this join if we have some part of its RHS @@ -140,19 +211,19 @@ add_paths_to_joinrel(PlannerInfo *root, * join has already been proven legal.) If the SJ is relevant, it * presents constraints for joining to anything not in its RHS. */ - if (bms_overlap(joinrel->relids, sjinfo->min_righthand) && - !bms_overlap(joinrel->relids, sjinfo->min_lefthand)) + if (bms_overlap(joinrel->relids, sjinfo2->min_righthand) && + !bms_overlap(joinrel->relids, sjinfo2->min_lefthand)) extra.param_source_rels = bms_join(extra.param_source_rels, bms_difference(root->all_baserels, - sjinfo->min_righthand)); + sjinfo2->min_righthand)); /* full joins constrain both sides symmetrically */ - if (sjinfo->jointype == JOIN_FULL && - bms_overlap(joinrel->relids, sjinfo->min_lefthand) && - !bms_overlap(joinrel->relids, sjinfo->min_righthand)) + if (sjinfo2->jointype == JOIN_FULL && + bms_overlap(joinrel->relids, sjinfo2->min_lefthand) && + !bms_overlap(joinrel->relids, sjinfo2->min_righthand)) extra.param_source_rels = bms_join(extra.param_source_rels, bms_difference(root->all_baserels, - sjinfo->min_lefthand)); + sjinfo2->min_lefthand)); } /* @@ -309,8 +380,7 @@ try_nestloop_path(PlannerInfo *root, * methodology worthwhile. */ initial_cost_nestloop(root, &workspace, jointype, - outer_path, inner_path, - extra->sjinfo, &extra->semifactors); + outer_path, inner_path, extra); if (add_path_precheck(joinrel, workspace.startup_cost, workspace.total_cost, @@ -321,8 +391,7 @@ try_nestloop_path(PlannerInfo *root, joinrel, jointype, &workspace, - extra->sjinfo, - &extra->semifactors, + extra, outer_path, inner_path, extra->restrictlist, @@ -372,8 +441,7 @@ try_partial_nestloop_path(PlannerInfo *root, * cost. Bail out right away if it looks terrible. */ initial_cost_nestloop(root, &workspace, jointype, - outer_path, inner_path, - extra->sjinfo, &extra->semifactors); + outer_path, inner_path, extra); if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys)) return; @@ -383,8 +451,7 @@ try_partial_nestloop_path(PlannerInfo *root, joinrel, jointype, &workspace, - extra->sjinfo, - &extra->semifactors, + extra, outer_path, inner_path, extra->restrictlist, @@ -407,11 +474,27 @@ try_mergejoin_path(PlannerInfo *root, List *outersortkeys, List *innersortkeys, JoinType jointype, - JoinPathExtraData *extra) + JoinPathExtraData *extra, + bool is_partial) { Relids required_outer; JoinCostWorkspace workspace; + if (is_partial) + { + try_partial_mergejoin_path(root, + joinrel, + outer_path, + inner_path, + pathkeys, + mergeclauses, + outersortkeys, + innersortkeys, + jointype, + extra); + return; + } + /* * Check to see if proposed path is still parameterized, and reject if the * parameterization wouldn't be sensible. @@ -443,7 +526,7 @@ try_mergejoin_path(PlannerInfo *root, initial_cost_mergejoin(root, &workspace, jointype, mergeclauses, outer_path, inner_path, outersortkeys, innersortkeys, - extra->sjinfo); + extra); if (add_path_precheck(joinrel, workspace.startup_cost, workspace.total_cost, @@ -454,7 +537,7 @@ try_mergejoin_path(PlannerInfo *root, joinrel, jointype, &workspace, - extra->sjinfo, + extra, outer_path, inner_path, extra->restrictlist, @@ -472,6 +555,76 @@ try_mergejoin_path(PlannerInfo *root, } /* + * try_partial_mergejoin_path + * Consider a partial merge join path; if it appears useful, push it into + * the joinrel's pathlist via add_partial_path(). + */ +static void +try_partial_mergejoin_path(PlannerInfo *root, + RelOptInfo *joinrel, + Path *outer_path, + Path *inner_path, + List *pathkeys, + List *mergeclauses, + List *outersortkeys, + List *innersortkeys, + JoinType jointype, + JoinPathExtraData *extra) +{ + JoinCostWorkspace workspace; + + /* + * See comments in try_partial_hashjoin_path(). + */ + Assert(bms_is_empty(joinrel->lateral_relids)); + if (inner_path->param_info != NULL) + { + Relids inner_paramrels = inner_path->param_info->ppi_req_outer; + + if (!bms_is_empty(inner_paramrels)) + return; + } + + /* + * If the given paths are already well enough ordered, we can skip doing + * an explicit sort. + */ + if (outersortkeys && + pathkeys_contained_in(outersortkeys, outer_path->pathkeys)) + outersortkeys = NIL; + if (innersortkeys && + pathkeys_contained_in(innersortkeys, inner_path->pathkeys)) + innersortkeys = NIL; + + /* + * See comments in try_partial_nestloop_path(). + */ + initial_cost_mergejoin(root, &workspace, jointype, mergeclauses, + outer_path, inner_path, + outersortkeys, innersortkeys, + extra); + + if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys)) + return; + + /* Might be good enough to be worth trying, so let's try it. */ + add_partial_path(joinrel, (Path *) + create_mergejoin_path(root, + joinrel, + jointype, + &workspace, + extra, + outer_path, + inner_path, + extra->restrictlist, + pathkeys, + NULL, + mergeclauses, + outersortkeys, + innersortkeys)); +} + +/* * try_hashjoin_path * Consider a hash join path; if it appears useful, push it into * the joinrel's pathlist via add_path(). @@ -507,8 +660,7 @@ try_hashjoin_path(PlannerInfo *root, * never have any output pathkeys, per comments in create_hashjoin_path. */ initial_cost_hashjoin(root, &workspace, jointype, hashclauses, - outer_path, inner_path, - extra->sjinfo, &extra->semifactors); + outer_path, inner_path, extra); if (add_path_precheck(joinrel, workspace.startup_cost, workspace.total_cost, @@ -519,8 +671,7 @@ try_hashjoin_path(PlannerInfo *root, joinrel, jointype, &workspace, - extra->sjinfo, - &extra->semifactors, + extra, outer_path, inner_path, extra->restrictlist, @@ -570,8 +721,7 @@ try_partial_hashjoin_path(PlannerInfo *root, * cost. Bail out right away if it looks terrible. */ initial_cost_hashjoin(root, &workspace, jointype, hashclauses, - outer_path, inner_path, - extra->sjinfo, &extra->semifactors); + outer_path, inner_path, extra); if (!add_partial_path_precheck(joinrel, workspace.total_cost, NIL)) return; @@ -581,8 +731,7 @@ try_partial_hashjoin_path(PlannerInfo *root, joinrel, jointype, &workspace, - extra->sjinfo, - &extra->semifactors, + extra, outer_path, inner_path, extra->restrictlist, @@ -640,8 +789,11 @@ sort_inner_and_outer(PlannerInfo *root, JoinType jointype, JoinPathExtraData *extra) { + JoinType save_jointype = jointype; Path *outer_path; Path *inner_path; + Path *cheapest_partial_outer = NULL; + Path *cheapest_safe_inner = NULL; List *all_pathkeys; ListCell *l; @@ -691,6 +843,30 @@ sort_inner_and_outer(PlannerInfo *root, } /* + * If the joinrel is parallel-safe, we may be able to consider a partial + * merge join. However, we can't handle JOIN_UNIQUE_OUTER, because the + * outer path will be partial, and therefore we won't be able to properly + * guarantee uniqueness. Similarly, we can't handle JOIN_FULL and + * JOIN_RIGHT, because they can produce false null extended rows. Also, + * the resulting path must not be parameterized. + */ + if (joinrel->consider_parallel && + save_jointype != JOIN_UNIQUE_OUTER && + save_jointype != JOIN_FULL && + save_jointype != JOIN_RIGHT && + outerrel->partial_pathlist != NIL && + bms_is_empty(joinrel->lateral_relids)) + { + cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist); + + if (inner_path->parallel_safe) + cheapest_safe_inner = inner_path; + else if (save_jointype != JOIN_UNIQUE_INNER) + cheapest_safe_inner = + get_cheapest_parallel_safe_total_inner(innerrel->pathlist); + } + + /* * Each possible ordering of the available mergejoin clauses will generate * a differently-sorted result path at essentially the same cost. We have * no basis for choosing one over another at this level of joining, but @@ -772,7 +948,265 @@ sort_inner_and_outer(PlannerInfo *root, outerkeys, innerkeys, jointype, - extra); + extra, + false); + + /* + * If we have partial outer and parallel safe inner path then try + * partial mergejoin path. + */ + if (cheapest_partial_outer && cheapest_safe_inner) + try_partial_mergejoin_path(root, + joinrel, + cheapest_partial_outer, + cheapest_safe_inner, + merge_pathkeys, + cur_mergeclauses, + outerkeys, + innerkeys, + jointype, + extra); + } +} + +/* + * generate_mergejoin_paths + * Creates possible mergejoin paths for input outerpath. + * + * We generate mergejoins if mergejoin clauses are available. We have + * two ways to generate the inner path for a mergejoin: sort the cheapest + * inner path, or use an inner path that is already suitably ordered for the + * merge. If we have several mergeclauses, it could be that there is no inner + * path (or only a very expensive one) for the full list of mergeclauses, but + * better paths exist if we truncate the mergeclause list (thereby discarding + * some sort key requirements). So, we consider truncations of the + * mergeclause list as well as the full list. (Ideally we'd consider all + * subsets of the mergeclause list, but that seems way too expensive.) + */ +static void +generate_mergejoin_paths(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *innerrel, + Path *outerpath, + JoinType jointype, + JoinPathExtraData *extra, + bool useallclauses, + Path *inner_cheapest_total, + List *merge_pathkeys, + bool is_partial) +{ + List *mergeclauses; + List *innersortkeys; + List *trialsortkeys; + Path *cheapest_startup_inner; + Path *cheapest_total_inner; + JoinType save_jointype = jointype; + int num_sortkeys; + int sortkeycnt; + + if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER) + jointype = JOIN_INNER; + + /* Look for useful mergeclauses (if any) */ + mergeclauses = find_mergeclauses_for_pathkeys(root, + outerpath->pathkeys, + true, + extra->mergeclause_list); + + /* + * Done with this outer path if no chance for a mergejoin. + * + * Special corner case: for "x FULL JOIN y ON true", there will be no join + * clauses at all. Ordinarily we'd generate a clauseless nestloop path, + * but since mergejoin is our only join type that supports FULL JOIN + * without any join clauses, it's necessary to generate a clauseless + * mergejoin path instead. + */ + if (mergeclauses == NIL) + { + if (jointype == JOIN_FULL) + /* okay to try for mergejoin */ ; + else + return; + } + if (useallclauses && + list_length(mergeclauses) != list_length(extra->mergeclause_list)) + return; + + /* Compute the required ordering of the inner path */ + innersortkeys = make_inner_pathkeys_for_merge(root, + mergeclauses, + outerpath->pathkeys); + + /* + * Generate a mergejoin on the basis of sorting the cheapest inner. Since + * a sort will be needed, only cheapest total cost matters. (But + * try_mergejoin_path will do the right thing if inner_cheapest_total is + * already correctly sorted.) + */ + try_mergejoin_path(root, + joinrel, + outerpath, + inner_cheapest_total, + merge_pathkeys, + mergeclauses, + NIL, + innersortkeys, + jointype, + extra, + is_partial); + + /* Can't do anything else if inner path needs to be unique'd */ + if (save_jointype == JOIN_UNIQUE_INNER) + return; + + /* + * Look for presorted inner paths that satisfy the innersortkey list --- + * or any truncation thereof, if we are allowed to build a mergejoin using + * a subset of the merge clauses. Here, we consider both cheap startup + * cost and cheap total cost. + * + * Currently we do not consider parameterized inner paths here. This + * interacts with decisions elsewhere that also discriminate against + * mergejoins with parameterized inputs; see comments in + * src/backend/optimizer/README. + * + * As we shorten the sortkey list, we should consider only paths that are + * strictly cheaper than (in particular, not the same as) any path found + * in an earlier iteration. Otherwise we'd be intentionally using fewer + * merge keys than a given path allows (treating the rest as plain + * joinquals), which is unlikely to be a good idea. Also, eliminating + * paths here on the basis of compare_path_costs is a lot cheaper than + * building the mergejoin path only to throw it away. + * + * If inner_cheapest_total is well enough sorted to have not required a + * sort in the path made above, we shouldn't make a duplicate path with + * it, either. We handle that case with the same logic that handles the + * previous consideration, by initializing the variables that track + * cheapest-so-far properly. Note that we do NOT reject + * inner_cheapest_total if we find it matches some shorter set of + * pathkeys. That case corresponds to using fewer mergekeys to avoid + * sorting inner_cheapest_total, whereas we did sort it above, so the + * plans being considered are different. + */ + if (pathkeys_contained_in(innersortkeys, + inner_cheapest_total->pathkeys)) + { + /* inner_cheapest_total didn't require a sort */ + cheapest_startup_inner = inner_cheapest_total; + cheapest_total_inner = inner_cheapest_total; + } + else + { + /* it did require a sort, at least for the full set of keys */ + cheapest_startup_inner = NULL; + cheapest_total_inner = NULL; + } + num_sortkeys = list_length(innersortkeys); + if (num_sortkeys > 1 && !useallclauses) + trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */ + else + trialsortkeys = innersortkeys; /* won't really truncate */ + + for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--) + { + Path *innerpath; + List *newclauses = NIL; + + /* + * Look for an inner path ordered well enough for the first + * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified + * destructively, which is why we made a copy... + */ + trialsortkeys = list_truncate(trialsortkeys, sortkeycnt); + innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, + trialsortkeys, + NULL, + TOTAL_COST, + is_partial); + if (innerpath != NULL && + (cheapest_total_inner == NULL || + compare_path_costs(innerpath, cheapest_total_inner, + TOTAL_COST) < 0)) + { + /* Found a cheap (or even-cheaper) sorted path */ + /* Select the right mergeclauses, if we didn't already */ + if (sortkeycnt < num_sortkeys) + { + newclauses = + find_mergeclauses_for_pathkeys(root, + trialsortkeys, + false, + mergeclauses); + Assert(newclauses != NIL); + } + else + newclauses = mergeclauses; + try_mergejoin_path(root, + joinrel, + outerpath, + innerpath, + merge_pathkeys, + newclauses, + NIL, + NIL, + jointype, + extra, + is_partial); + cheapest_total_inner = innerpath; + } + /* Same on the basis of cheapest startup cost ... */ + innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, + trialsortkeys, + NULL, + STARTUP_COST, + is_partial); + if (innerpath != NULL && + (cheapest_startup_inner == NULL || + compare_path_costs(innerpath, cheapest_startup_inner, + STARTUP_COST) < 0)) + { + /* Found a cheap (or even-cheaper) sorted path */ + if (innerpath != cheapest_total_inner) + { + /* + * Avoid rebuilding clause list if we already made one; saves + * memory in big join trees... + */ + if (newclauses == NIL) + { + if (sortkeycnt < num_sortkeys) + { + newclauses = + find_mergeclauses_for_pathkeys(root, + trialsortkeys, + false, + mergeclauses); + Assert(newclauses != NIL); + } + else + newclauses = mergeclauses; + } + try_mergejoin_path(root, + joinrel, + outerpath, + innerpath, + merge_pathkeys, + newclauses, + NIL, + NIL, + jointype, + extra, + is_partial); + } + cheapest_startup_inner = innerpath; + } + + /* + * Don't consider truncated sortkeys if we need all clauses. + */ + if (useallclauses) + break; } } @@ -790,15 +1224,8 @@ sort_inner_and_outer(PlannerInfo *root, * cheapest-total inner-indexscan path (if any), and one on the * cheapest-startup inner-indexscan path (if different). * - * We also consider mergejoins if mergejoin clauses are available. We have - * two ways to generate the inner path for a mergejoin: sort the cheapest - * inner path, or use an inner path that is already suitably ordered for the - * merge. If we have several mergeclauses, it could be that there is no inner - * path (or only a very expensive one) for the full list of mergeclauses, but - * better paths exist if we truncate the mergeclause list (thereby discarding - * some sort key requirements). So, we consider truncations of the - * mergeclause list as well as the full list. (Ideally we'd consider all - * subsets of the mergeclause list, but that seems way too expensive.) + * We also consider mergejoins if mergejoin clauses are available. See + * detailed comments in generate_mergejoin_paths. * * 'joinrel' is the join relation * 'outerrel' is the outer join relation @@ -894,13 +1321,6 @@ match_unsorted_outer(PlannerInfo *root, { Path *outerpath = (Path *) lfirst(lc1); List *merge_pathkeys; - List *mergeclauses; - List *innersortkeys; - List *trialsortkeys; - Path *cheapest_startup_inner; - Path *cheapest_total_inner; - int num_sortkeys; - int sortkeycnt; /* * We cannot use an outer path that is parameterized by the inner rel. @@ -986,216 +1406,94 @@ match_unsorted_outer(PlannerInfo *root, if (inner_cheapest_total == NULL) continue; - /* Look for useful mergeclauses (if any) */ - mergeclauses = find_mergeclauses_for_pathkeys(root, - outerpath->pathkeys, - true, - extra->mergeclause_list); + /* Generate merge join paths */ + generate_mergejoin_paths(root, joinrel, innerrel, outerpath, + save_jointype, extra, useallclauses, + inner_cheapest_total, merge_pathkeys, + false); + } + + /* + * Consider partial nestloop and mergejoin plan if outerrel has any + * partial path and the joinrel is parallel-safe. However, we can't + * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and + * therefore we won't be able to properly guarantee uniqueness. Nor can + * we handle extra_lateral_rels, since partial paths must not be + * parameterized. Similarly, we can't handle JOIN_FULL and JOIN_RIGHT, + * because they can produce false null extended rows. + */ + if (joinrel->consider_parallel && + save_jointype != JOIN_UNIQUE_OUTER && + save_jointype != JOIN_FULL && + save_jointype != JOIN_RIGHT && + outerrel->partial_pathlist != NIL && + bms_is_empty(joinrel->lateral_relids)) + { + if (nestjoinOK) + consider_parallel_nestloop(root, joinrel, outerrel, innerrel, + save_jointype, extra); /* - * Done with this outer path if no chance for a mergejoin. - * - * Special corner case: for "x FULL JOIN y ON true", there will be no - * join clauses at all. Ordinarily we'd generate a clauseless - * nestloop path, but since mergejoin is our only join type that - * supports FULL JOIN without any join clauses, it's necessary to - * generate a clauseless mergejoin path instead. + * If inner_cheapest_total is NULL or non parallel-safe then find the + * cheapest total parallel safe path. If doing JOIN_UNIQUE_INNER, we + * can't use any alternative inner path. */ - if (mergeclauses == NIL) + if (inner_cheapest_total == NULL || + !inner_cheapest_total->parallel_safe) { - if (jointype == JOIN_FULL) - /* okay to try for mergejoin */ ; - else - continue; + if (save_jointype == JOIN_UNIQUE_INNER) + return; + + inner_cheapest_total = get_cheapest_parallel_safe_total_inner( + innerrel->pathlist); } - if (useallclauses && list_length(mergeclauses) != list_length(extra->mergeclause_list)) - continue; - /* Compute the required ordering of the inner path */ - innersortkeys = make_inner_pathkeys_for_merge(root, - mergeclauses, - outerpath->pathkeys); + if (inner_cheapest_total) + consider_parallel_mergejoin(root, joinrel, outerrel, innerrel, + save_jointype, extra, + inner_cheapest_total); + } +} - /* - * Generate a mergejoin on the basis of sorting the cheapest inner. - * Since a sort will be needed, only cheapest total cost matters. (But - * try_mergejoin_path will do the right thing if inner_cheapest_total - * is already correctly sorted.) - */ - try_mergejoin_path(root, - joinrel, - outerpath, - inner_cheapest_total, - merge_pathkeys, - mergeclauses, - NIL, - innersortkeys, - jointype, - extra); +/* + * consider_parallel_mergejoin + * Try to build partial paths for a joinrel by joining a partial path + * for the outer relation to a complete path for the inner relation. + * + * 'joinrel' is the join relation + * 'outerrel' is the outer join relation + * 'innerrel' is the inner join relation + * 'jointype' is the type of join to do + * 'extra' contains additional input values + * 'inner_cheapest_total' cheapest total path for innerrel + */ +static void +consider_parallel_mergejoin(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *outerrel, + RelOptInfo *innerrel, + JoinType jointype, + JoinPathExtraData *extra, + Path *inner_cheapest_total) +{ + ListCell *lc1; - /* Can't do anything else if inner path needs to be unique'd */ - if (save_jointype == JOIN_UNIQUE_INNER) - continue; + /* generate merge join path for each partial outer path */ + foreach(lc1, outerrel->partial_pathlist) + { + Path *outerpath = (Path *) lfirst(lc1); + List *merge_pathkeys; /* - * Look for presorted inner paths that satisfy the innersortkey list - * --- or any truncation thereof, if we are allowed to build a - * mergejoin using a subset of the merge clauses. Here, we consider - * both cheap startup cost and cheap total cost. - * - * Currently we do not consider parameterized inner paths here. This - * interacts with decisions elsewhere that also discriminate against - * mergejoins with parameterized inputs; see comments in - * src/backend/optimizer/README. - * - * As we shorten the sortkey list, we should consider only paths that - * are strictly cheaper than (in particular, not the same as) any path - * found in an earlier iteration. Otherwise we'd be intentionally - * using fewer merge keys than a given path allows (treating the rest - * as plain joinquals), which is unlikely to be a good idea. Also, - * eliminating paths here on the basis of compare_path_costs is a lot - * cheaper than building the mergejoin path only to throw it away. - * - * If inner_cheapest_total is well enough sorted to have not required - * a sort in the path made above, we shouldn't make a duplicate path - * with it, either. We handle that case with the same logic that - * handles the previous consideration, by initializing the variables - * that track cheapest-so-far properly. Note that we do NOT reject - * inner_cheapest_total if we find it matches some shorter set of - * pathkeys. That case corresponds to using fewer mergekeys to avoid - * sorting inner_cheapest_total, whereas we did sort it above, so the - * plans being considered are different. + * Figure out what useful ordering any paths we create will have. */ - if (pathkeys_contained_in(innersortkeys, - inner_cheapest_total->pathkeys)) - { - /* inner_cheapest_total didn't require a sort */ - cheapest_startup_inner = inner_cheapest_total; - cheapest_total_inner = inner_cheapest_total; - } - else - { - /* it did require a sort, at least for the full set of keys */ - cheapest_startup_inner = NULL; - cheapest_total_inner = NULL; - } - num_sortkeys = list_length(innersortkeys); - if (num_sortkeys > 1 && !useallclauses) - trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */ - else - trialsortkeys = innersortkeys; /* won't really truncate */ - - for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--) - { - Path *innerpath; - List *newclauses = NIL; - - /* - * Look for an inner path ordered well enough for the first - * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified - * destructively, which is why we made a copy... - */ - trialsortkeys = list_truncate(trialsortkeys, sortkeycnt); - innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, - trialsortkeys, - NULL, - TOTAL_COST); - if (innerpath != NULL && - (cheapest_total_inner == NULL || - compare_path_costs(innerpath, cheapest_total_inner, - TOTAL_COST) < 0)) - { - /* Found a cheap (or even-cheaper) sorted path */ - /* Select the right mergeclauses, if we didn't already */ - if (sortkeycnt < num_sortkeys) - { - newclauses = - find_mergeclauses_for_pathkeys(root, - trialsortkeys, - false, - mergeclauses); - Assert(newclauses != NIL); - } - else - newclauses = mergeclauses; - try_mergejoin_path(root, - joinrel, - outerpath, - innerpath, - merge_pathkeys, - newclauses, - NIL, - NIL, - jointype, - extra); - cheapest_total_inner = innerpath; - } - /* Same on the basis of cheapest startup cost ... */ - innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist, - trialsortkeys, - NULL, - STARTUP_COST); - if (innerpath != NULL && - (cheapest_startup_inner == NULL || - compare_path_costs(innerpath, cheapest_startup_inner, - STARTUP_COST) < 0)) - { - /* Found a cheap (or even-cheaper) sorted path */ - if (innerpath != cheapest_total_inner) - { - /* - * Avoid rebuilding clause list if we already made one; - * saves memory in big join trees... - */ - if (newclauses == NIL) - { - if (sortkeycnt < num_sortkeys) - { - newclauses = - find_mergeclauses_for_pathkeys(root, - trialsortkeys, - false, - mergeclauses); - Assert(newclauses != NIL); - } - else - newclauses = mergeclauses; - } - try_mergejoin_path(root, - joinrel, - outerpath, - innerpath, - merge_pathkeys, - newclauses, - NIL, - NIL, - jointype, - extra); - } - cheapest_startup_inner = innerpath; - } + merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, + outerpath->pathkeys); - /* - * Don't consider truncated sortkeys if we need all clauses. - */ - if (useallclauses) - break; - } + generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype, + extra, false, inner_cheapest_total, + merge_pathkeys, true); } - - /* - * If the joinrel is parallel-safe and the join type supports nested - * loops, we may be able to consider a partial nestloop plan. However, we - * can't handle JOIN_UNIQUE_OUTER, because the outer path will be partial, - * and therefore we won't be able to properly guarantee uniqueness. Nor - * can we handle extra_lateral_rels, since partial paths must not be - * parameterized. - */ - if (joinrel->consider_parallel && nestjoinOK && - save_jointype != JOIN_UNIQUE_OUTER && - bms_is_empty(joinrel->lateral_relids)) - consider_parallel_nestloop(root, joinrel, outerrel, innerrel, - save_jointype, extra); } /* @@ -1217,8 +1515,12 @@ consider_parallel_nestloop(PlannerInfo *root, JoinType jointype, JoinPathExtraData *extra) { + JoinType save_jointype = jointype; ListCell *lc1; + if (jointype == JOIN_UNIQUE_INNER) + jointype = JOIN_INNER; + foreach(lc1, outerrel->partial_pathlist) { Path *outerpath = (Path *) lfirst(lc1); @@ -1244,18 +1546,19 @@ consider_parallel_nestloop(PlannerInfo *root, continue; /* - * Like match_unsorted_outer, we only consider a single nestloop - * path when the jointype is JOIN_UNIQUE_INNER. But we have to - * scan cheapest_parameterized_paths to find the one we want to - * consider, because cheapest_total_path might not be - * parallel-safe. + * If we're doing JOIN_UNIQUE_INNER, we can only use the inner's + * cheapest_total_path, and we have to unique-ify it. (We might + * be able to relax this to allow other safe, unparameterized + * inner paths, but right now create_unique_path is not on board + * with that.) */ - if (jointype == JOIN_UNIQUE_INNER) + if (save_jointype == JOIN_UNIQUE_INNER) { - if (!bms_is_empty(PATH_REQ_OUTER(innerpath))) + if (innerpath != innerrel->cheapest_total_path) continue; innerpath = (Path *) create_unique_path(root, innerrel, - innerpath, extra->sjinfo); + innerpath, + extra->sjinfo); Assert(innerpath); } @@ -1284,6 +1587,7 @@ hash_inner_and_outer(PlannerInfo *root, JoinType jointype, JoinPathExtraData *extra) { + JoinType save_jointype = jointype; bool isouterjoin = IS_OUTER_JOIN(jointype); List *hashclauses; ListCell *l; @@ -1450,9 +1754,9 @@ hash_inner_and_outer(PlannerInfo *root, * extended rows. Also, the resulting path must not be parameterized. */ if (joinrel->consider_parallel && - jointype != JOIN_UNIQUE_OUTER && - jointype != JOIN_FULL && - jointype != JOIN_RIGHT && + save_jointype != JOIN_UNIQUE_OUTER && + save_jointype != JOIN_FULL && + save_jointype != JOIN_RIGHT && outerrel->partial_pathlist != NIL && bms_is_empty(joinrel->lateral_relids)) { @@ -1465,27 +1769,15 @@ hash_inner_and_outer(PlannerInfo *root, /* * Normally, given that the joinrel is parallel-safe, the cheapest * total inner path will also be parallel-safe, but if not, we'll - * have to search cheapest_parameterized_paths for the cheapest - * unparameterized inner path. + * have to search for the cheapest safe, unparameterized inner + * path. If doing JOIN_UNIQUE_INNER, we can't use any alternative + * inner path. */ if (cheapest_total_inner->parallel_safe) cheapest_safe_inner = cheapest_total_inner; - else - { - ListCell *lc; - - foreach(lc, innerrel->cheapest_parameterized_paths) - { - Path *innerpath = (Path *) lfirst(lc); - - if (innerpath->parallel_safe && - bms_is_empty(PATH_REQ_OUTER(innerpath))) - { - cheapest_safe_inner = innerpath; - break; - } - } - } + else if (save_jointype != JOIN_UNIQUE_INNER) + cheapest_safe_inner = + get_cheapest_parallel_safe_total_inner(innerrel->pathlist); if (cheapest_safe_inner != NULL) try_partial_hashjoin_path(root, joinrel, @@ -1572,7 +1864,7 @@ select_mergejoin_clauses(PlannerInfo *root, /* * Insist that each side have a non-redundant eclass. This * restriction is needed because various bits of the planner expect - * that each clause in a merge be associatable with some pathkey in a + * that each clause in a merge be associable with some pathkey in a * canonical pathkey list, but redundant eclasses can't appear in * canonical sort orderings. (XXX it might be worth relaxing this, * but not enough time to address it for 8.3.) diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 01d4fea78c..5a68de3cc8 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -3,7 +3,7 @@ * joinrels.c * Routines to determine which relations should be joined * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -32,6 +32,9 @@ static bool is_dummy_rel(RelOptInfo *rel); static void mark_dummy_rel(RelOptInfo *rel); static bool restriction_is_constant_false(List *restrictlist, bool only_pushed_down); +static void populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, + RelOptInfo *rel2, RelOptInfo *joinrel, + SpecialJoinInfo *sjinfo, List *restrictlist); /* @@ -724,6 +727,27 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) return joinrel; } + /* Add paths to the join relation. */ + populate_joinrel_with_paths(root, rel1, rel2, joinrel, sjinfo, + restrictlist); + + bms_free(joinrelids); + + return joinrel; +} + +/* + * populate_joinrel_with_paths + * Add paths to the given joinrel for given pair of joining relations. The + * SpecialJoinInfo provides details about the join and the restrictlist + * contains the join clauses and the other clauses applicable for given pair + * of the joining relations. + */ +static void +populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, + RelOptInfo *rel2, RelOptInfo *joinrel, + SpecialJoinInfo *sjinfo, List *restrictlist) +{ /* * Consider paths using each rel as both outer and inner. Depending on * the join type, a provably empty outer or inner rel might mean the join @@ -868,10 +892,6 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) elog(ERROR, "unrecognized join type: %d", (int) sjinfo->jointype); break; } - - bms_free(joinrelids); - - return joinrel; } @@ -1197,7 +1217,7 @@ mark_dummy_rel(RelOptInfo *rel) rel->partial_pathlist = NIL; /* Set up the dummy path */ - add_path(rel, (Path *) create_append_path(rel, NIL, NULL, 0)); + add_path(rel, (Path *) create_append_path(rel, NIL, NULL, 0, NIL)); /* Set or update cheapest_total_path and related fields */ set_cheapest(rel); @@ -1230,9 +1250,8 @@ restriction_is_constant_false(List *restrictlist, bool only_pushed_down) */ foreach(lc, restrictlist) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); - Assert(IsA(rinfo, RestrictInfo)); if (only_pushed_down && !rinfo->is_pushed_down) continue; diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 4436ac111d..2c269062ec 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -7,7 +7,7 @@ * the nature and use of path keys. * * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -337,11 +337,13 @@ pathkeys_contained_in(List *keys1, List *keys2) * 'pathkeys' represents a required ordering (in canonical form!) * 'required_outer' denotes allowable outer relations for parameterized paths * 'cost_criterion' is STARTUP_COST or TOTAL_COST + * 'require_parallel_safe' causes us to consider only parallel-safe paths */ Path * get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, - CostSelector cost_criterion) + CostSelector cost_criterion, + bool require_parallel_safe) { Path *matched_path = NULL; ListCell *l; @@ -358,6 +360,9 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, compare_path_costs(matched_path, path, cost_criterion) <= 0) continue; + if (require_parallel_safe && !path->parallel_safe) + continue; + if (pathkeys_contained_in(pathkeys, path->pathkeys) && bms_is_subset(PATH_REQ_OUTER(path), required_outer)) matched_path = path; @@ -407,6 +412,28 @@ get_cheapest_fractional_path_for_pathkeys(List *paths, return matched_path; } + +/* + * get_cheapest_parallel_safe_total_inner + * Find the unparameterized parallel-safe path with the least total cost. + */ +Path * +get_cheapest_parallel_safe_total_inner(List *paths) +{ + ListCell *l; + + foreach(l, paths) + { + Path *innerpath = (Path *) lfirst(l); + + if (innerpath->parallel_safe && + bms_is_empty(PATH_REQ_OUTER(innerpath))) + return innerpath; + } + + return NULL; +} + /**************************************************************************** * NEW PATHKEY FORMATION ****************************************************************************/ @@ -480,17 +507,30 @@ build_index_pathkeys(PlannerInfo *root, index->rel->relids, false); - /* - * If the sort key isn't already present in any EquivalenceClass, then - * it's not an interesting sort order for this query. So we can stop - * now --- lower-order sort keys aren't useful either. - */ - if (!cpathkey) - break; - - /* Add to list unless redundant */ - if (!pathkey_is_redundant(cpathkey, retval)) - retval = lappend(retval, cpathkey); + if (cpathkey) + { + /* + * We found the sort key in an EquivalenceClass, so it's relevant + * for this query. Add it to list, unless it's redundant. + */ + if (!pathkey_is_redundant(cpathkey, retval)) + retval = lappend(retval, cpathkey); + } + else + { + /* + * Boolean index keys might be redundant even if they do not + * appear in an EquivalenceClass, because of our special treatment + * of boolean equality conditions --- see the comment for + * indexcol_is_bool_constant_for_query(). If that applies, we can + * continue to examine lower-order index columns. Otherwise, the + * sort key is not an interesting sort order for this query, so we + * should stop considering index columns; any lower-order sort + * keys won't be useful either. + */ + if (!indexcol_is_bool_constant_for_query(index, i)) + break; + } i++; } diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c index 530e1347e0..a2fe661075 100644 --- a/src/backend/optimizer/path/tidpath.c +++ b/src/backend/optimizer/path/tidpath.c @@ -25,7 +25,7 @@ * for that. * * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -43,12 +43,13 @@ #include "optimizer/clauses.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" +#include "optimizer/restrictinfo.h" static bool IsTidEqualClause(OpExpr *node, int varno); static bool IsTidEqualAnyClause(ScalarArrayOpExpr *node, int varno); static List *TidQualFromExpr(Node *expr, int varno); -static List *TidQualFromRestrictinfo(List *restrictinfo, int varno); +static List *TidQualFromBaseRestrictinfo(RelOptInfo *rel); /* @@ -216,24 +217,26 @@ TidQualFromExpr(Node *expr, int varno) } /* - * Extract a set of CTID conditions from the given restrictinfo list - * - * This is essentially identical to the AND case of TidQualFromExpr, - * except for the format of the input. + * Extract a set of CTID conditions from the rel's baserestrictinfo list */ static List * -TidQualFromRestrictinfo(List *restrictinfo, int varno) +TidQualFromBaseRestrictinfo(RelOptInfo *rel) { List *rlst = NIL; ListCell *l; - foreach(l, restrictinfo) + foreach(l, rel->baserestrictinfo) { RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - if (!IsA(rinfo, RestrictInfo)) - continue; /* probably should never happen */ - rlst = TidQualFromExpr((Node *) rinfo->clause, varno); + /* + * If clause must wait till after some lower-security-level + * restriction clause, reject it. + */ + if (!restriction_is_securely_promotable(rinfo, rel)) + continue; + + rlst = TidQualFromExpr((Node *) rinfo->clause, rel->relid); if (rlst) break; } @@ -259,7 +262,7 @@ create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel) */ required_outer = rel->lateral_relids; - tidquals = TidQualFromRestrictinfo(rel->baserestrictinfo, rel->relid); + tidquals = TidQualFromBaseRestrictinfo(rel); if (tidquals) add_path(rel, (Path *) create_tidscan_path(root, rel, tidquals, diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index e28a8dc533..34317fe778 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -11,7 +11,7 @@ * is that we have to work harder to clean up after ourselves when we modify * the query, since the derived data structures have to be updated too. * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -41,6 +41,11 @@ static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel); static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list); static Oid distinct_col_search(int colno, List *colnos, List *opids); +static bool is_innerrel_unique_for(PlannerInfo *root, + Relids outerrelids, + RelOptInfo *innerrel, + JoinType jointype, + List *restrictlist); /* @@ -491,6 +496,88 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved) /* + * reduce_unique_semijoins + * Check for semijoins that can be simplified to plain inner joins + * because the inner relation is provably unique for the join clauses. + * + * Ideally this would happen during reduce_outer_joins, but we don't have + * enough information at that point. + * + * To perform the strength reduction when applicable, we need only delete + * the semijoin's SpecialJoinInfo from root->join_info_list. (We don't + * bother fixing the join type attributed to it in the query jointree, + * since that won't be consulted again.) + */ +void +reduce_unique_semijoins(PlannerInfo *root) +{ + ListCell *lc; + ListCell *next; + + /* + * Scan the join_info_list to find semijoins. We can't use foreach + * because we may delete the current cell. + */ + for (lc = list_head(root->join_info_list); lc != NULL; lc = next) + { + SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); + int innerrelid; + RelOptInfo *innerrel; + Relids joinrelids; + List *restrictlist; + + next = lnext(lc); + + /* + * Must be a non-delaying semijoin to a single baserel, else we aren't + * going to be able to do anything with it. (It's probably not + * possible for delay_upper_joins to be set on a semijoin, but we + * might as well check.) + */ + if (sjinfo->jointype != JOIN_SEMI || + sjinfo->delay_upper_joins) + continue; + + if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid)) + continue; + + innerrel = find_base_rel(root, innerrelid); + + /* + * Before we trouble to run generate_join_implied_equalities, make a + * quick check to eliminate cases in which we will surely be unable to + * prove uniqueness of the innerrel. + */ + if (!rel_supports_distinctness(root, innerrel)) + continue; + + /* Compute the relid set for the join we are considering */ + joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); + + /* + * Since we're only considering a single-rel RHS, any join clauses it + * has must be clauses linking it to the semijoin's min_lefthand. We + * can also consider EC-derived join clauses. + */ + restrictlist = + list_concat(generate_join_implied_equalities(root, + joinrelids, + sjinfo->min_lefthand, + innerrel), + innerrel->joininfo); + + /* Test whether the innerrel is unique for those clauses. */ + if (!innerrel_is_unique(root, sjinfo->min_lefthand, innerrel, + JOIN_SEMI, restrictlist, true)) + continue; + + /* OK, remove the SpecialJoinInfo from the list. */ + root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo); + } +} + + +/* * rel_supports_distinctness * Could the relation possibly be proven distinct on some set of columns? * @@ -596,7 +683,7 @@ rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) */ foreach(l, clause_list) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); Oid op; Var *var; @@ -608,8 +695,7 @@ rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) * caller's mergejoinability test should have selected only * OpExprs. */ - Assert(IsA(rinfo->clause, OpExpr)); - op = ((OpExpr *) rinfo->clause)->opno; + op = castNode(OpExpr, rinfo->clause)->opno; /* caller identified the inner side for us */ if (rinfo->outer_is_left) @@ -650,6 +736,11 @@ rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) bool query_supports_distinctness(Query *query) { + /* we don't cope with SRFs, see comment below */ + if (query->hasTargetSRFs) + return false; + + /* check for features we can prove distinctness with */ if (query->distinctClause != NIL || query->groupClause != NIL || query->groupingSets != NIL || @@ -695,7 +786,7 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) * specified columns, since those must be evaluated before de-duplication; * but it doesn't presently seem worth the complication to check that.) */ - if (expression_returns_set((Node *) query->targetList)) + if (query->hasTargetSRFs) return false; /* @@ -777,9 +868,8 @@ query_is_distinct_for(Query *query, List *colnos, List *opids) */ if (query->setOperations) { - SetOperationStmt *topop = (SetOperationStmt *) query->setOperations; + SetOperationStmt *topop = castNode(SetOperationStmt, query->setOperations); - Assert(IsA(topop, SetOperationStmt)); Assert(topop->op != SETOP_NONE); if (!topop->all) @@ -842,3 +932,184 @@ distinct_col_search(int colno, List *colnos, List *opids) } return InvalidOid; } + + +/* + * innerrel_is_unique + * Check if the innerrel provably contains at most one tuple matching any + * tuple from the outerrel, based on join clauses in the 'restrictlist'. + * + * We need an actual RelOptInfo for the innerrel, but it's sufficient to + * identify the outerrel by its Relids. This asymmetry supports use of this + * function before joinrels have been built. + * + * The proof must be made based only on clauses that will be "joinquals" + * rather than "otherquals" at execution. For an inner join there's no + * difference; but if the join is outer, we must ignore pushed-down quals, + * as those will become "otherquals". Note that this means the answer might + * vary depending on whether IS_OUTER_JOIN(jointype); since we cache the + * answer without regard to that, callers must take care not to call this + * with jointypes that would be classified differently by IS_OUTER_JOIN(). + * + * The actual proof is undertaken by is_innerrel_unique_for(); this function + * is a frontend that is mainly concerned with caching the answers. + * In particular, the force_cache argument allows overriding the internal + * heuristic about whether to cache negative answers; it should be "true" + * if making an inquiry that is not part of the normal bottom-up join search + * sequence. + */ +bool +innerrel_is_unique(PlannerInfo *root, + Relids outerrelids, + RelOptInfo *innerrel, + JoinType jointype, + List *restrictlist, + bool force_cache) +{ + MemoryContext old_context; + ListCell *lc; + + /* Certainly can't prove uniqueness when there are no joinclauses */ + if (restrictlist == NIL) + return false; + + /* + * Make a quick check to eliminate cases in which we will surely be unable + * to prove uniqueness of the innerrel. + */ + if (!rel_supports_distinctness(root, innerrel)) + return false; + + /* + * Query the cache to see if we've managed to prove that innerrel is + * unique for any subset of this outerrel. We don't need an exact match, + * as extra outerrels can't make the innerrel any less unique (or more + * formally, the restrictlist for a join to a superset outerrel must be a + * superset of the conditions we successfully used before). + */ + foreach(lc, innerrel->unique_for_rels) + { + Relids unique_for_rels = (Relids) lfirst(lc); + + if (bms_is_subset(unique_for_rels, outerrelids)) + return true; /* Success! */ + } + + /* + * Conversely, we may have already determined that this outerrel, or some + * superset thereof, cannot prove this innerrel to be unique. + */ + foreach(lc, innerrel->non_unique_for_rels) + { + Relids unique_for_rels = (Relids) lfirst(lc); + + if (bms_is_subset(outerrelids, unique_for_rels)) + return false; + } + + /* No cached information, so try to make the proof. */ + if (is_innerrel_unique_for(root, outerrelids, innerrel, + jointype, restrictlist)) + { + /* + * Cache the positive result for future probes, being sure to keep it + * in the planner_cxt even if we are working in GEQO. + * + * Note: one might consider trying to isolate the minimal subset of + * the outerrels that proved the innerrel unique. But it's not worth + * the trouble, because the planner builds up joinrels incrementally + * and so we'll see the minimally sufficient outerrels before any + * supersets of them anyway. + */ + old_context = MemoryContextSwitchTo(root->planner_cxt); + innerrel->unique_for_rels = lappend(innerrel->unique_for_rels, + bms_copy(outerrelids)); + MemoryContextSwitchTo(old_context); + + return true; /* Success! */ + } + else + { + /* + * None of the join conditions for outerrel proved innerrel unique, so + * we can safely reject this outerrel or any subset of it in future + * checks. + * + * However, in normal planning mode, caching this knowledge is totally + * pointless; it won't be queried again, because we build up joinrels + * from smaller to larger. It is useful in GEQO mode, where the + * knowledge can be carried across successive planning attempts; and + * it's likely to be useful when using join-search plugins, too. Hence + * cache when join_search_private is non-NULL. (Yeah, that's a hack, + * but it seems reasonable.) + * + * Also, allow callers to override that heuristic and force caching; + * that's useful for reduce_unique_semijoins, which calls here before + * the normal join search starts. + */ + if (force_cache || root->join_search_private) + { + old_context = MemoryContextSwitchTo(root->planner_cxt); + innerrel->non_unique_for_rels = + lappend(innerrel->non_unique_for_rels, + bms_copy(outerrelids)); + MemoryContextSwitchTo(old_context); + } + + return false; + } +} + +/* + * is_innerrel_unique_for + * Check if the innerrel provably contains at most one tuple matching any + * tuple from the outerrel, based on join clauses in the 'restrictlist'. + */ +static bool +is_innerrel_unique_for(PlannerInfo *root, + Relids outerrelids, + RelOptInfo *innerrel, + JoinType jointype, + List *restrictlist) +{ + List *clause_list = NIL; + ListCell *lc; + + /* + * Search for mergejoinable clauses that constrain the inner rel against + * the outer rel. If an operator is mergejoinable then it behaves like + * equality for some btree opclass, so it's what we want. The + * mergejoinability test also eliminates clauses containing volatile + * functions, which we couldn't depend on. + */ + foreach(lc, restrictlist) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc); + + /* + * As noted above, if it's a pushed-down clause and we're at an outer + * join, we can't use it. + */ + if (restrictinfo->is_pushed_down && IS_OUTER_JOIN(jointype)) + continue; + + /* Ignore if it's not a mergejoinable clause */ + if (!restrictinfo->can_join || + restrictinfo->mergeopfamilies == NIL) + continue; /* not mergejoinable */ + + /* + * Check if clause has the form "outer op inner" or "inner op outer", + * and if so mark which side is inner. + */ + if (!clause_sides_match_join(restrictinfo, outerrelids, + innerrel->relids)) + continue; /* no good for these input relations */ + + /* OK, add to list */ + clause_list = lappend(clause_list, restrictinfo); + } + + /* Let rel_is_distinct_for() do the hard work */ + return rel_is_distinct_for(root, innerrel, clause_list); +} diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 637926ff3a..af89e9d288 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -6,7 +6,7 @@ * Path into a Plan. * * Portions Copyright (c) 2012-2014, TransLattice, Inc. - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -110,13 +110,14 @@ static RemoteSubplan *create_remotescan_plan(PlannerInfo *root, RemoteSubPath *best_path); static char *get_internal_cursor(void); #endif +static ProjectSet *create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path); static Material *create_material_plan(PlannerInfo *root, MaterialPath *best_path, int flags); static Plan *create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags); static Gather *create_gather_plan(PlannerInfo *root, GatherPath *best_path); static Plan *create_projection_plan(PlannerInfo *root, ProjectionPath *best_path); -static Plan *inject_projection_plan(Plan *subplan, List *tlist); +static Plan *inject_projection_plan(Plan *subplan, List *tlist, bool parallel_safe); static Sort *create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags); static Group *create_group_plan(PlannerInfo *root, GroupPath *best_path); static Unique *create_upper_unique_plan(PlannerInfo *root, UpperUniquePath *best_path, @@ -153,6 +154,7 @@ static BitmapHeapScan *create_bitmap_scan_plan(PlannerInfo *root, List *tlist, List *scan_clauses); static Plan *create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual, List **qual, List **indexqual, List **indexECs); +static void bitmap_subplan_mark_shared(Plan *plan); static TidScan *create_tidscan_plan(PlannerInfo *root, TidPath *best_path, List *tlist, List *scan_clauses); static SubqueryScan *create_subqueryscan_plan(PlannerInfo *root, @@ -162,8 +164,12 @@ static FunctionScan *create_functionscan_plan(PlannerInfo *root, Path *best_path List *tlist, List *scan_clauses); static ValuesScan *create_valuesscan_plan(PlannerInfo *root, Path *best_path, List *tlist, List *scan_clauses); +static TableFuncScan *create_tablefuncscan_plan(PlannerInfo *root, Path *best_path, + List *tlist, List *scan_clauses); static CteScan *create_ctescan_plan(PlannerInfo *root, Path *best_path, List *tlist, List *scan_clauses); +static NamedTuplestoreScan *create_namedtuplestorescan_plan(PlannerInfo *root, + Path *best_path, List *tlist, List *scan_clauses); static WorkTableScan *create_worktablescan_plan(PlannerInfo *root, Path *best_path, List *tlist, List *scan_clauses); static ForeignScan *create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path, @@ -218,11 +224,15 @@ static FunctionScan *make_functionscan(List *qptlist, List *qpqual, Index scanrelid, List *functions, bool funcordinality); static ValuesScan *make_valuesscan(List *qptlist, List *qpqual, Index scanrelid, List *values_lists); +static TableFuncScan *make_tablefuncscan(List *qptlist, List *qpqual, + Index scanrelid, TableFunc *tablefunc); static CteScan *make_ctescan(List *qptlist, List *qpqual, Index scanrelid, int ctePlanId, int cteParam); +static NamedTuplestoreScan *make_namedtuplestorescan(List *qptlist, List *qpqual, + Index scanrelid, char *enrname); static WorkTableScan *make_worktablescan(List *qptlist, List *qpqual, Index scanrelid, int wtParam); -static Append *make_append(List *appendplans, List *tlist); +static Append *make_append(List *appendplans, List *tlist, List *partitioned_rels); static RecursiveUnion *make_recursive_union(PlannerInfo *root, List *tlist, Plan *lefttree, @@ -235,18 +245,16 @@ static BitmapOr *make_bitmap_or(List *bitmapplans); static NestLoop *make_nestloop(List *tlist, List *joinclauses, List *otherclauses, List *nestParams, Plan *lefttree, Plan *righttree, - JoinType jointype); + JoinType jointype, bool inner_unique); static HashJoin *make_hashjoin(List *tlist, List *joinclauses, List *otherclauses, List *hashclauses, Plan *lefttree, Plan *righttree, - JoinType jointype); + JoinType jointype, bool inner_unique); static Hash *make_hash(Plan *lefttree, Oid skewTable, AttrNumber skewColumn, - bool skewInherit, - Oid skewColType, - int32 skewColTypmod); + bool skewInherit); static MergeJoin *make_mergejoin(List *tlist, List *joinclauses, List *otherclauses, List *mergeclauses, @@ -255,7 +263,8 @@ static MergeJoin *make_mergejoin(List *tlist, int *mergestrategies, bool *mergenullsfirst, Plan *lefttree, Plan *righttree, - JoinType jointype); + JoinType jointype, bool inner_unique, + bool skip_mark_restore); static Sort *make_sort(Plan *lefttree, int numCols, AttrNumber *sortColIdx, Oid *sortOperators, Oid *collations, bool *nullsFirst); @@ -294,12 +303,15 @@ static SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, long numGroups); static LockRows *make_lockrows(Plan *lefttree, List *rowMarks, int epqParam); static Result *make_result(List *tlist, Node *resconstantqual, Plan *subplan); +static ProjectSet *make_project_set(List *tlist, Plan *subplan); static ModifyTable *make_modifytable(PlannerInfo *root, CmdType operation, bool canSetTag, - Index nominalRelation, + Index nominalRelation, List *partitioned_rels, List *resultRelations, List *subplans, List *withCheckOptionLists, List *returningLists, List *rowMarks, OnConflictExpr *onconflict, int epqParam); +static GatherMerge *create_gather_merge_plan(PlannerInfo *root, + GatherMergePath *best_path); #ifdef XCP static int add_sort_column(AttrNumber colIdx, Oid sortOp, Oid coll, @@ -395,9 +407,11 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) case T_TidScan: case T_SubqueryScan: case T_FunctionScan: + case T_TableFuncScan: case T_ValuesScan: case T_CteScan: case T_WorkTableScan: + case T_NamedTuplestoreScan: case T_ForeignScan: case T_CustomScan: plan = create_scan_plan(root, best_path, flags); @@ -440,6 +454,10 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) (ResultPath *) best_path); } break; + case T_ProjectSet: + plan = (Plan *) create_project_set_plan(root, + (ProjectSetPath *) best_path); + break; case T_Material: plan = (Plan *) create_material_plan(root, (MaterialPath *) best_path, @@ -511,6 +529,10 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags) (LimitPath *) best_path, flags, 0, 1); break; + case T_GatherMerge: + plan = (Plan *) create_gather_merge_plan(root, + (GatherMergePath *) best_path); + break; default: elog(ERROR, "unrecognized node type: %d", (int) best_path->pathtype); @@ -550,8 +572,7 @@ create_scan_plan(PlannerInfo *root, Path *best_path, int flags) { case T_IndexScan: case T_IndexOnlyScan: - Assert(IsA(best_path, IndexPath)); - scan_clauses = ((IndexPath *) best_path)->indexinfo->indrestrictinfo; + scan_clauses = castNode(IndexPath, best_path)->indexinfo->indrestrictinfo; break; default: scan_clauses = rel->baserestrictinfo; @@ -678,6 +699,13 @@ create_scan_plan(PlannerInfo *root, Path *best_path, int flags) scan_clauses); break; + case T_TableFuncScan: + plan = (Plan *) create_tablefuncscan_plan(root, + best_path, + tlist, + scan_clauses); + break; + case T_ValuesScan: plan = (Plan *) create_valuesscan_plan(root, best_path, @@ -692,6 +720,13 @@ create_scan_plan(PlannerInfo *root, Path *best_path, int flags) scan_clauses); break; + case T_NamedTuplestoreScan: + plan = (Plan *) create_namedtuplestorescan_plan(root, + best_path, + tlist, + scan_clauses); + break; + case T_WorkTableScan: plan = (Plan *) create_worktablescan_plan(root, best_path, @@ -792,11 +827,12 @@ use_physical_tlist(PlannerInfo *root, Path *path, int flags) /* * We can do this for real relation scans, subquery scans, function scans, - * values scans, and CTE scans (but not for, eg, joins). + * tablefunc scans, values scans, and CTE scans (but not for, eg, joins). */ if (rel->rtekind != RTE_RELATION && rel->rtekind != RTE_SUBQUERY && rel->rtekind != RTE_FUNCTION && + rel->rtekind != RTE_TABLEFUNC && rel->rtekind != RTE_VALUES && rel->rtekind != RTE_CTE) return false; @@ -810,6 +846,15 @@ use_physical_tlist(PlannerInfo *root, Path *path, int flags) return false; /* + * Also, don't do it to a CustomPath; the premise that we're extracting + * columns from a simple physical tuple is unlikely to hold for those. + * (When it does make sense, the custom path creator can set up the path's + * pathtarget that way.) + */ + if (IsA(path, CustomPath)) + return false; + + /* * Can't do it if any system columns or whole-row Vars are requested. * (This could possibly be fixed but would take some fragile assumptions * in setrefs.c, I think.) @@ -930,6 +975,9 @@ create_gating_plan(PlannerInfo *root, Path *path, Plan *plan, */ copy_plan_costsize(gplan, plan); + /* Gating quals could be unsafe, so better use the Path's safety flag */ + gplan->parallel_safe = path->parallel_safe; + return gplan; } @@ -1049,7 +1097,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) * parent-rel Vars it'll be asked to emit. */ - plan = make_append(subplans, tlist); + plan = make_append(subplans, tlist, best_path->partitioned_rels); copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -1157,6 +1205,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) subplans = lappend(subplans, subplan); } + node->partitioned_rels = best_path->partitioned_rels; node->mergeplans = subplans; return (Plan *) node; @@ -1190,6 +1239,31 @@ create_result_plan(PlannerInfo *root, ResultPath *best_path) } /* + * create_project_set_plan + * Create a ProjectSet plan for 'best_path'. + * + * Returns a Plan node. + */ +static ProjectSet * +create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path) +{ + ProjectSet *plan; + Plan *subplan; + List *tlist; + + /* Since we intend to project, we don't need to constrain child tlist */ + subplan = create_plan_recurse(root, best_path->subpath, 0); + + tlist = build_path_tlist(root, &best_path->path); + + plan = make_project_set(tlist, subplan); + + copy_generic_path_info(&plan->plan, (Path *) best_path); + + return plan; +} + +/* * create_material_plan * Create a Material plan for 'best_path' and (recursively) plans * for its subpaths. @@ -1272,7 +1346,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) foreach(l, uniq_exprs) { - Node *uniqexpr = lfirst(l); + Expr *uniqexpr = lfirst(l); TargetEntry *tle; tle = tlist_member(uniqexpr, newtlist); @@ -1297,7 +1371,8 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) */ if (!is_projection_capable_plan(subplan) && !tlist_same_exprs(newtlist, subplan->targetlist)) - subplan = inject_projection_plan(subplan, newtlist); + subplan = inject_projection_plan(subplan, newtlist, + best_path->path.parallel_safe); else subplan->targetlist = newtlist; #ifdef XCP @@ -1323,7 +1398,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path, int flags) groupColPos = 0; foreach(l, uniq_exprs) { - Node *uniqexpr = lfirst(l); + Expr *uniqexpr = lfirst(l); TargetEntry *tle; tle = tlist_member(uniqexpr, newtlist); @@ -1451,7 +1526,7 @@ create_gather_plan(PlannerInfo *root, GatherPath *best_path) gather_plan = make_gather(tlist, NIL, - best_path->path.parallel_workers, + best_path->num_workers, best_path->single_copy, subplan); @@ -1464,6 +1539,61 @@ create_gather_plan(PlannerInfo *root, GatherPath *best_path) } /* + * create_gather_merge_plan + * + * Create a Gather Merge plan for 'best_path' and (recursively) + * plans for its subpaths. + */ +static GatherMerge * +create_gather_merge_plan(PlannerInfo *root, GatherMergePath *best_path) +{ + GatherMerge *gm_plan; + Plan *subplan; + List *pathkeys = best_path->path.pathkeys; + List *tlist = build_path_tlist(root, &best_path->path); + + /* As with Gather, it's best to project away columns in the workers. */ + subplan = create_plan_recurse(root, best_path->subpath, CP_EXACT_TLIST); + + /* Create a shell for a GatherMerge plan. */ + gm_plan = makeNode(GatherMerge); + gm_plan->plan.targetlist = tlist; + gm_plan->num_workers = best_path->num_workers; + copy_generic_path_info(&gm_plan->plan, &best_path->path); + + /* Gather Merge is pointless with no pathkeys; use Gather instead. */ + Assert(pathkeys != NIL); + + /* Compute sort column info, and adjust subplan's tlist as needed */ + subplan = prepare_sort_from_pathkeys(subplan, pathkeys, + best_path->subpath->parent->relids, + gm_plan->sortColIdx, + false, + &gm_plan->numCols, + &gm_plan->sortColIdx, + &gm_plan->sortOperators, + &gm_plan->collations, + &gm_plan->nullsFirst); + + + /* Now, insert a Sort node if subplan isn't sufficiently ordered */ + if (!pathkeys_contained_in(pathkeys, best_path->subpath->pathkeys)) + subplan = (Plan *) make_sort(subplan, gm_plan->numCols, + gm_plan->sortColIdx, + gm_plan->sortOperators, + gm_plan->collations, + gm_plan->nullsFirst); + + /* Now insert the subplan under GatherMerge. */ + gm_plan->plan.lefttree = subplan; + + /* use parallel mode for parallel plans. */ + root->glob->parallelModeNeeded = true; + + return gm_plan; +} + +/* * create_projection_plan * * Create a plan tree to do a projection step and (recursively) plans @@ -1509,7 +1639,8 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path) plan->total_cost = best_path->path.total_cost; plan->plan_rows = best_path->path.rows; plan->plan_width = best_path->path.pathtarget->width; - /* ... but be careful not to munge subplan's parallel-aware flag */ + plan->parallel_safe = best_path->path.parallel_safe; + /* ... but don't change subplan's parallel_aware flag */ } else { @@ -1529,9 +1660,12 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path) * This is used in a few places where we decide on-the-fly that we need a * projection step as part of the tree generated for some Path node. * We should try to get rid of this in favor of doing it more honestly. + * + * One reason it's ugly is we have to be told the right parallel_safe marking + * to apply (since the tlist might be unsafe even if the child plan is safe). */ static Plan * -inject_projection_plan(Plan *subplan, List *tlist) +inject_projection_plan(Plan *subplan, List *tlist, bool parallel_safe) { Plan *plan; @@ -1545,6 +1679,7 @@ inject_projection_plan(Plan *subplan, List *tlist) * consistent not more so. Hence, just copy the subplan's cost. */ copy_plan_costsize(plan, subplan); + plan->parallel_safe = parallel_safe; return plan; } @@ -1733,18 +1868,15 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) { Agg *plan; Plan *subplan; - List *rollup_groupclauses = best_path->rollup_groupclauses; - List *rollup_lists = best_path->rollup_lists; + List *rollups = best_path->rollups; AttrNumber *grouping_map; int maxref; List *chain; - ListCell *lc, - *lc2; + ListCell *lc; /* Shouldn't get here without grouping sets */ Assert(root->parse->groupingSets); - Assert(rollup_lists != NIL); - Assert(list_length(rollup_lists) == list_length(rollup_groupclauses)); + Assert(rollups != NIL); /* * Agg can project, so no need to be terribly picky about child tlist, but @@ -1796,72 +1928,86 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) * costs will be shown by EXPLAIN. */ chain = NIL; - if (list_length(rollup_groupclauses) > 1) + if (list_length(rollups) > 1) { - forboth(lc, rollup_groupclauses, lc2, rollup_lists) + ListCell *lc2 = lnext(list_head(rollups)); + bool is_first_sort = ((RollupData *) linitial(rollups))->is_hashed; + + for_each_cell(lc, lc2) { - List *groupClause = (List *) lfirst(lc); - List *gsets = (List *) lfirst(lc2); + RollupData *rollup = lfirst(lc); AttrNumber *new_grpColIdx; - Plan *sort_plan; + Plan *sort_plan = NULL; Plan *agg_plan; + AggStrategy strat; - /* We want to iterate over all but the last rollup list elements */ - if (lnext(lc) == NULL) - break; + new_grpColIdx = remap_groupColIdx(root, rollup->groupClause); - new_grpColIdx = remap_groupColIdx(root, groupClause); + if (!rollup->is_hashed && !is_first_sort) + { + sort_plan = (Plan *) + make_sort_from_groupcols(rollup->groupClause, + new_grpColIdx, + subplan); + } - sort_plan = (Plan *) - make_sort_from_groupcols(groupClause, - new_grpColIdx, - subplan); + if (!rollup->is_hashed) + is_first_sort = false; + + if (rollup->is_hashed) + strat = AGG_HASHED; + else if (list_length(linitial(rollup->gsets)) == 0) + strat = AGG_PLAIN; + else + strat = AGG_SORTED; agg_plan = (Plan *) make_agg(NIL, NIL, - AGG_SORTED, + strat, AGGSPLIT_SIMPLE, - list_length((List *) linitial(gsets)), + list_length((List *) linitial(rollup->gsets)), new_grpColIdx, - extract_grouping_ops(groupClause), - gsets, + extract_grouping_ops(rollup->groupClause), + rollup->gsets, NIL, - 0, /* numGroups not needed */ + rollup->numGroups, sort_plan); /* - * Nuke stuff we don't need to avoid bloating debug output. + * Remove stuff we don't need to avoid bloating debug output. */ - sort_plan->targetlist = NIL; - sort_plan->lefttree = NULL; + if (sort_plan) + { + sort_plan->targetlist = NIL; + sort_plan->lefttree = NULL; + } chain = lappend(chain, agg_plan); } } /* - * Now make the final Agg node + * Now make the real Agg node */ { - List *groupClause = (List *) llast(rollup_groupclauses); - List *gsets = (List *) llast(rollup_lists); + RollupData *rollup = linitial(rollups); AttrNumber *top_grpColIdx; int numGroupCols; - top_grpColIdx = remap_groupColIdx(root, groupClause); + top_grpColIdx = remap_groupColIdx(root, rollup->groupClause); - numGroupCols = list_length((List *) linitial(gsets)); + numGroupCols = list_length((List *) linitial(rollup->gsets)); plan = make_agg(build_path_tlist(root, &best_path->path), best_path->qual, - (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN, + best_path->aggstrategy, AGGSPLIT_SIMPLE, numGroupCols, top_grpColIdx, - extract_grouping_ops(groupClause), - gsets, + extract_grouping_ops(rollup->groupClause), + rollup->gsets, chain, - 0, /* numGroups not needed */ + rollup->numGroups, subplan); /* Copy cost data from Path to Plan */ @@ -1911,6 +2057,7 @@ create_minmaxagg_plan(PlannerInfo *root, MinMaxAggPath *best_path) plan->plan_rows = 1; plan->plan_width = mminfo->path->pathtarget->width; plan->parallel_aware = false; + plan->parallel_safe = mminfo->path->parallel_safe; /* * XL: Add a remote subplan, splitting the LIMIT into a remote and @@ -2296,6 +2443,7 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) best_path->operation, best_path->canSetTag, best_path->nominalRelation, + best_path->partitioned_rels, best_path->resultRelations, subplans, best_path->withCheckOptionLists, @@ -2703,9 +2851,8 @@ create_indexscan_plan(PlannerInfo *root, qpqual = NIL; foreach(l, scan_clauses) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); - Assert(IsA(rinfo, RestrictInfo)); if (rinfo->pseudoconstant) continue; /* we may drop pseudoconstants here */ if (list_member_ptr(indexquals, rinfo)) @@ -2832,6 +2979,9 @@ create_bitmap_scan_plan(PlannerInfo *root, &bitmapqualorig, &indexquals, &indexECs); + if (best_path->path.parallel_aware) + bitmap_subplan_mark_shared(bitmapqualplan); + /* * The qpqual list must contain all restrictions not automatically handled * by the index, other than pseudoconstant clauses which will be handled @@ -2861,10 +3011,9 @@ create_bitmap_scan_plan(PlannerInfo *root, qpqual = NIL; foreach(l, scan_clauses) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); Node *clause = (Node *) rinfo->clause; - Assert(IsA(rinfo, RestrictInfo)); if (rinfo->pseudoconstant) continue; /* we may drop pseudoconstants here */ if (list_member(indexquals, clause)) @@ -2981,6 +3130,7 @@ create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual, clamp_row_est(apath->bitmapselectivity * apath->path.parent->tuples); plan->plan_width = 0; /* meaningless */ plan->parallel_aware = false; + plan->parallel_safe = apath->path.parallel_safe; *qual = subquals; *indexqual = subindexquals; *indexECs = subindexECs; @@ -3044,6 +3194,7 @@ create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual, clamp_row_est(opath->bitmapselectivity * opath->path.parent->tuples); plan->plan_width = 0; /* meaningless */ plan->parallel_aware = false; + plan->parallel_safe = opath->path.parallel_safe; } /* @@ -3073,9 +3224,9 @@ create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual, ListCell *l; /* Use the regular indexscan plan build machinery... */ - iscan = (IndexScan *) create_indexscan_plan(root, ipath, - NIL, NIL, false); - Assert(IsA(iscan, IndexScan)); + iscan = castNode(IndexScan, + create_indexscan_plan(root, ipath, + NIL, NIL, false)); /* then convert to a bitmap indexscan */ plan = (Plan *) make_bitmap_indexscan(iscan->scan.scanrelid, iscan->indexid, @@ -3088,6 +3239,7 @@ create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual, clamp_row_est(ipath->indexselectivity * ipath->path.parent->tuples); plan->plan_width = 0; /* meaningless */ plan->parallel_aware = false; + plan->parallel_safe = ipath->path.parallel_safe; *qual = get_actual_clauses(ipath->indexclauses); *indexqual = get_actual_clauses(ipath->indexquals); foreach(l, ipath->indexinfo->indpred) @@ -3271,6 +3423,49 @@ create_functionscan_plan(PlannerInfo *root, Path *best_path, } /* + * create_tablefuncscan_plan + * Returns a tablefuncscan plan for the base relation scanned by 'best_path' + * with restriction clauses 'scan_clauses' and targetlist 'tlist'. + */ +static TableFuncScan * +create_tablefuncscan_plan(PlannerInfo *root, Path *best_path, + List *tlist, List *scan_clauses) +{ + TableFuncScan *scan_plan; + Index scan_relid = best_path->parent->relid; + RangeTblEntry *rte; + TableFunc *tablefunc; + + /* it should be a function base rel... */ + Assert(scan_relid > 0); + rte = planner_rt_fetch(scan_relid, root); + Assert(rte->rtekind == RTE_TABLEFUNC); + tablefunc = rte->tablefunc; + + /* Sort clauses into best execution order */ + scan_clauses = order_qual_clauses(root, scan_clauses); + + /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ + scan_clauses = extract_actual_clauses(scan_clauses, false); + + /* Replace any outer-relation variables with nestloop params */ + if (best_path->param_info) + { + scan_clauses = (List *) + replace_nestloop_params(root, (Node *) scan_clauses); + /* The function expressions could contain nestloop params, too */ + tablefunc = (TableFunc *) replace_nestloop_params(root, (Node *) tablefunc); + } + + scan_plan = make_tablefuncscan(tlist, scan_clauses, scan_relid, + tablefunc); + + copy_generic_path_info(&scan_plan->scan.plan, best_path); + + return scan_plan; +} + +/* * create_valuesscan_plan * Returns a valuesscan plan for the base relation scanned by 'best_path' * with restriction clauses 'scan_clauses' and targetlist 'tlist'. @@ -3408,6 +3603,45 @@ create_ctescan_plan(PlannerInfo *root, Path *best_path, } /* + * create_namedtuplestorescan_plan + * Returns a tuplestorescan plan for the base relation scanned by + * 'best_path' with restriction clauses 'scan_clauses' and targetlist + * 'tlist'. + */ +static NamedTuplestoreScan * +create_namedtuplestorescan_plan(PlannerInfo *root, Path *best_path, + List *tlist, List *scan_clauses) +{ + NamedTuplestoreScan *scan_plan; + Index scan_relid = best_path->parent->relid; + RangeTblEntry *rte; + + Assert(scan_relid > 0); + rte = planner_rt_fetch(scan_relid, root); + Assert(rte->rtekind == RTE_NAMEDTUPLESTORE); + + /* Sort clauses into best execution order */ + scan_clauses = order_qual_clauses(root, scan_clauses); + + /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ + scan_clauses = extract_actual_clauses(scan_clauses, false); + + /* Replace any outer-relation variables with nestloop params */ + if (best_path->param_info) + { + scan_clauses = (List *) + replace_nestloop_params(root, (Node *) scan_clauses); + } + + scan_plan = make_namedtuplestorescan(tlist, scan_clauses, scan_relid, + rte->enrname); + + copy_generic_path_info(&scan_plan->scan.plan, best_path); + + return scan_plan; +} + +/* * create_worktablescan_plan * Returns a worktablescan plan for the base relation scanned by 'best_path' * with restriction clauses 'scan_clauses' and targetlist 'tlist'. @@ -3527,8 +3761,15 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path, /* Copy foreign server OID; likewise, no need to make FDW do this */ scan_plan->fs_server = rel->serverid; - /* Likewise, copy the relids that are represented by this foreign scan */ - scan_plan->fs_relids = best_path->path.parent->relids; + /* + * Likewise, copy the relids that are represented by this foreign scan. An + * upper rel doesn't have relids set, but it covers all the base relations + * participating in the underlying scan, so use root's all_baserels. + */ + if (IS_UPPER_REL(rel)) + scan_plan->fs_relids = root->all_baserels; + else + scan_plan->fs_relids = best_path->path.parent->relids; /* * If this is a foreign join, and to make it valid to push down we had to @@ -3637,13 +3878,13 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path, * Invoke custom plan provider to create the Plan node represented by the * CustomPath. */ - cplan = (CustomScan *) best_path->methods->PlanCustomPath(root, - rel, - best_path, - tlist, - scan_clauses, - custom_plans); - Assert(IsA(cplan, CustomScan)); + cplan = castNode(CustomScan, + best_path->methods->PlanCustomPath(root, + rel, + best_path, + tlist, + scan_clauses, + custom_plans)); /* * Copy cost data from Path to Plan; no need to make custom-plan providers @@ -3799,7 +4040,8 @@ create_nestloop_plan(PlannerInfo *root, nestParams, outer_plan, inner_plan, - best_path->jointype); + best_path->jointype, + best_path->inner_unique); copy_generic_path_info(&join_plan->join.plan, &best_path->path); @@ -3950,7 +4192,7 @@ create_mergejoin_plan(PlannerInfo *root, i = 0; foreach(lc, best_path->path_mergeclauses) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); EquivalenceClass *oeclass; EquivalenceClass *ieclass; PathKey *opathkey; @@ -3960,7 +4202,6 @@ create_mergejoin_plan(PlannerInfo *root, ListCell *l2; /* fetch outer/inner eclass from mergeclause */ - Assert(IsA(rinfo, RestrictInfo)); if (rinfo->outer_is_left) { oeclass = rinfo->left_ec; @@ -4102,7 +4343,9 @@ create_mergejoin_plan(PlannerInfo *root, mergenullsfirst, outer_plan, inner_plan, - best_path->jpath.jointype); + best_path->jpath.jointype, + best_path->jpath.inner_unique, + best_path->skip_mark_restore); /* Costs of sort and material steps are included in path cost already */ copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); @@ -4125,8 +4368,6 @@ create_hashjoin_plan(PlannerInfo *root, Oid skewTable = InvalidOid; AttrNumber skewColumn = InvalidAttrNumber; bool skewInherit = false; - Oid skewColType = InvalidOid; - int32 skewColTypmod = -1; /* * HashJoin can project, so we don't have to demand exact tlists from the @@ -4213,8 +4454,6 @@ create_hashjoin_plan(PlannerInfo *root, skewTable = rte->relid; skewColumn = var->varattno; skewInherit = rte->inh; - skewColType = var->vartype; - skewColTypmod = var->vartypmod; } } } @@ -4225,9 +4464,7 @@ create_hashjoin_plan(PlannerInfo *root, hash_plan = make_hash(inner_plan, skewTable, skewColumn, - skewInherit, - skewColType, - skewColTypmod); + skewInherit); /* * Set Hash node's startup & total costs equal to total cost of input @@ -4242,7 +4479,8 @@ create_hashjoin_plan(PlannerInfo *root, hashclauses, outer_plan, (Plan *) hash_plan, - best_path->jpath.jointype); + best_path->jpath.jointype, + best_path->jpath.inner_unique); copy_generic_path_info(&join_plan->join.plan, &best_path->jpath.path); @@ -4455,7 +4693,7 @@ process_subquery_nestloop_params(PlannerInfo *root, List *subplan_params) /* No, so add it */ nlp = makeNode(NestLoopParam); nlp->paramno = pitem->paramId; - nlp->paramval = copyObject(phv); + nlp->paramval = (Var *) copyObject(phv); root->curOuterParams = lappend(root->curOuterParams, nlp); } } @@ -4495,12 +4733,10 @@ fix_indexqual_references(PlannerInfo *root, IndexPath *index_path) forboth(lcc, index_path->indexquals, lci, index_path->indexqualcols) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(lcc); + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lcc); int indexcol = lfirst_int(lci); Node *clause; - Assert(IsA(rinfo, RestrictInfo)); - /* * Replace any outer-relation variables with nestloop params. * @@ -4734,7 +4970,7 @@ fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol) } } - /* Ooops... */ + /* Oops... */ elog(ERROR, "index key does not match expected index column"); return NULL; /* keep compiler quiet */ } @@ -4798,21 +5034,32 @@ get_switched_clauses(List *clauses, Relids outerrelids) * plan node, sort the list into the order we want to check the quals * in at runtime. * + * When security barrier quals are used in the query, we may have quals with + * different security levels in the list. Quals of lower security_level + * must go before quals of higher security_level, except that we can grant + * exceptions to move up quals that are leakproof. When security level + * doesn't force the decision, we prefer to order clauses by estimated + * execution cost, cheapest first. + * * Ideally the order should be driven by a combination of execution cost and * selectivity, but it's not immediately clear how to account for both, * and given the uncertainty of the estimates the reliability of the decisions - * would be doubtful anyway. So we just order by estimated per-tuple cost, - * being careful not to change the order when (as is often the case) the - * estimates are identical. + * would be doubtful anyway. So we just order by security level then + * estimated per-tuple cost, being careful not to change the order when + * (as is often the case) the estimates are identical. * * Although this will work on either bare clauses or RestrictInfos, it's * much faster to apply it to RestrictInfos, since it can re-use cost - * information that is cached in RestrictInfos. + * information that is cached in RestrictInfos. XXX in the bare-clause + * case, we are also not able to apply security considerations. That is + * all right for the moment, because the bare-clause case doesn't occur + * anywhere that barrier quals could be present, but it would be better to + * get rid of it. * * Note: some callers pass lists that contain entries that will later be * removed; this is the easiest way to let this routine see RestrictInfos - * instead of bare clauses. It's OK because we only sort by cost, but - * a cost/selectivity combination would likely do the wrong thing. + * instead of bare clauses. This is another reason why trying to consider + * selectivity in the ordering would likely do the wrong thing. */ static List * order_qual_clauses(PlannerInfo *root, List *clauses) @@ -4821,6 +5068,7 @@ order_qual_clauses(PlannerInfo *root, List *clauses) { Node *clause; Cost cost; + Index security_level; } QualItem; int nitems = list_length(clauses); QualItem *items; @@ -4846,6 +5094,27 @@ order_qual_clauses(PlannerInfo *root, List *clauses) cost_qual_eval_node(&qcost, clause, root); items[i].clause = clause; items[i].cost = qcost.per_tuple; + if (IsA(clause, RestrictInfo)) + { + RestrictInfo *rinfo = (RestrictInfo *) clause; + + /* + * If a clause is leakproof, it doesn't have to be constrained by + * its nominal security level. If it's also reasonably cheap + * (here defined as 10X cpu_operator_cost), pretend it has + * security_level 0, which will allow it to go in front of + * more-expensive quals of lower security levels. Of course, that + * will also force it to go in front of cheaper quals of its own + * security level, which is not so great, but we can alleviate + * that risk by applying the cost limit cutoff. + */ + if (rinfo->leakproof && items[i].cost < 10 * cpu_operator_cost) + items[i].security_level = 0; + else + items[i].security_level = rinfo->security_level; + } + else + items[i].security_level = 0; i++; } @@ -4862,9 +5131,13 @@ order_qual_clauses(PlannerInfo *root, List *clauses) /* insert newitem into the already-sorted subarray */ for (j = i; j > 0; j--) { - if (newitem.cost >= items[j - 1].cost) + QualItem *olditem = &items[j - 1]; + + if (newitem.security_level > olditem->security_level || + (newitem.security_level == olditem->security_level && + newitem.cost >= olditem->cost)) break; - items[j] = items[j - 1]; + items[j] = *olditem; } items[j] = newitem; } @@ -4880,7 +5153,7 @@ order_qual_clauses(PlannerInfo *root, List *clauses) /* * Copy cost and size info from a Path node to the Plan node created from it. * The executor usually won't use this info, but it's needed by EXPLAIN. - * Also copy the parallel-aware flag, which the executor *will* use. + * Also copy the parallel-related flags, which the executor *will* use. */ static void copy_generic_path_info(Plan *dest, Path *src) @@ -4890,6 +5163,7 @@ copy_generic_path_info(Plan *dest, Path *src) dest->plan_rows = src->rows; dest->plan_width = src->pathtarget->width; dest->parallel_aware = src->parallel_aware; + dest->parallel_safe = src->parallel_safe; } /* @@ -4905,6 +5179,8 @@ copy_plan_costsize(Plan *dest, Plan *src) dest->plan_width = src->plan_width; /* Assume the inserted node is not parallel-aware. */ dest->parallel_aware = false; + /* Assume the inserted node is parallel-safe, if child plan is. */ + dest->parallel_safe = src->parallel_safe; } /* @@ -4934,8 +5210,27 @@ label_sort_with_costsize(PlannerInfo *root, Sort *plan, double limit_tuples) plan->plan.plan_rows = lefttree->plan_rows; plan->plan.plan_width = lefttree->plan_width; plan->plan.parallel_aware = false; + plan->plan.parallel_safe = lefttree->parallel_safe; } +/* + * bitmap_subplan_mark_shared + * Set isshared flag in bitmap subplan so that it will be created in + * shared memory. + */ +static void +bitmap_subplan_mark_shared(Plan *plan) +{ + if (IsA(plan, BitmapAnd)) + bitmap_subplan_mark_shared( + linitial(((BitmapAnd *) plan)->bitmapplans)); + else if (IsA(plan, BitmapOr)) + ((BitmapOr *) plan)->isshared = true; + else if (IsA(plan, BitmapIndexScan)) + ((BitmapIndexScan *) plan)->isshared = true; + else + elog(ERROR, "unrecognized node type: %d", nodeTag(plan)); +} /***************************************************************************** * @@ -5145,6 +5440,25 @@ make_functionscan(List *qptlist, return node; } +static TableFuncScan * +make_tablefuncscan(List *qptlist, + List *qpqual, + Index scanrelid, + TableFunc *tablefunc) +{ + TableFuncScan *node = makeNode(TableFuncScan); + Plan *plan = &node->scan.plan; + + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->tablefunc = tablefunc; + + return node; +} + static ValuesScan * make_valuesscan(List *qptlist, List *qpqual, @@ -5185,6 +5499,26 @@ make_ctescan(List *qptlist, return node; } +static NamedTuplestoreScan * +make_namedtuplestorescan(List *qptlist, + List *qpqual, + Index scanrelid, + char *enrname) +{ + NamedTuplestoreScan *node = makeNode(NamedTuplestoreScan); + Plan *plan = &node->scan.plan; + + /* cost should be inserted by caller */ + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->enrname = enrname; + + return node; +} + static WorkTableScan * make_worktablescan(List *qptlist, List *qpqual, @@ -5433,7 +5767,7 @@ make_remotesubplan(PlannerInfo *root, if (em->em_is_const) continue; - tle = tlist_member((Node *) em->em_expr, tlist); + tle = tlist_member(em->em_expr, tlist); if (tle) { pk_datatype = em->em_datatype; @@ -5447,7 +5781,7 @@ make_remotesubplan(PlannerInfo *root, * We prefer an exact match, though, so we do the basic search * first. */ - tle = tlist_member_ignore_relabel((Node *) em->em_expr, tlist); + tle = tlist_member_ignore_relabel(em->em_expr, tlist); if (tle) { pk_datatype = em->em_datatype; @@ -5595,7 +5929,7 @@ make_foreignscan(List *qptlist, } static Append * -make_append(List *appendplans, List *tlist) +make_append(List *appendplans, List *tlist, List *partitioned_rels) { Append *node = makeNode(Append); Plan *plan = &node->plan; @@ -5604,6 +5938,7 @@ make_append(List *appendplans, List *tlist) plan->qual = NIL; plan->lefttree = NULL; plan->righttree = NULL; + node->partitioned_rels = partitioned_rels; node->appendplans = appendplans; return node; @@ -5699,7 +6034,8 @@ make_nestloop(List *tlist, List *nestParams, Plan *lefttree, Plan *righttree, - JoinType jointype) + JoinType jointype, + bool inner_unique) { NestLoop *node = makeNode(NestLoop); Plan *plan = &node->join.plan; @@ -5709,6 +6045,7 @@ make_nestloop(List *tlist, plan->lefttree = lefttree; plan->righttree = righttree; node->join.jointype = jointype; + node->join.inner_unique = inner_unique; node->join.joinqual = joinclauses; node->nestParams = nestParams; @@ -5722,7 +6059,8 @@ make_hashjoin(List *tlist, List *hashclauses, Plan *lefttree, Plan *righttree, - JoinType jointype) + JoinType jointype, + bool inner_unique) { HashJoin *node = makeNode(HashJoin); Plan *plan = &node->join.plan; @@ -5733,6 +6071,7 @@ make_hashjoin(List *tlist, plan->righttree = righttree; node->hashclauses = hashclauses; node->join.jointype = jointype; + node->join.inner_unique = inner_unique; node->join.joinqual = joinclauses; return node; @@ -5742,9 +6081,7 @@ static Hash * make_hash(Plan *lefttree, Oid skewTable, AttrNumber skewColumn, - bool skewInherit, - Oid skewColType, - int32 skewColTypmod) + bool skewInherit) { Hash *node = makeNode(Hash); Plan *plan = &node->plan; @@ -5757,8 +6094,6 @@ make_hash(Plan *lefttree, node->skewTable = skewTable; node->skewColumn = skewColumn; node->skewInherit = skewInherit; - node->skewColType = skewColType; - node->skewColTypmod = skewColTypmod; return node; } @@ -5774,7 +6109,9 @@ make_mergejoin(List *tlist, bool *mergenullsfirst, Plan *lefttree, Plan *righttree, - JoinType jointype) + JoinType jointype, + bool inner_unique, + bool skip_mark_restore) { MergeJoin *node = makeNode(MergeJoin); Plan *plan = &node->join.plan; @@ -5783,12 +6120,14 @@ make_mergejoin(List *tlist, plan->qual = otherclauses; plan->lefttree = lefttree; plan->righttree = righttree; + node->skip_mark_restore = skip_mark_restore; node->mergeclauses = mergeclauses; node->mergeFamilies = mergefamilies; node->mergeCollations = mergecollations; node->mergeStrategies = mergestrategies; node->mergeNullsFirst = mergenullsfirst; node->join.jointype = jointype; + node->join.inner_unique = inner_unique; node->join.joinqual = joinclauses; return node; @@ -5875,9 +6214,9 @@ add_sort_column(AttrNumber colIdx, Oid sortOp, Oid coll, bool nulls_first, * prepare_sort_from_pathkeys * Prepare to sort according to given pathkeys * - * This is used to set up for both Sort and MergeAppend nodes. It calculates - * the executor's representation of the sort key information, and adjusts the - * plan targetlist if needed to add resjunk sort columns. + * This is used to set up for Sort, MergeAppend, and Gather Merge nodes. It + * calculates the executor's representation of the sort key information, and + * adjusts the plan targetlist if needed to add resjunk sort columns. * * Input parameters: * 'lefttree' is the plan node which yields input tuples @@ -5901,7 +6240,7 @@ add_sort_column(AttrNumber colIdx, Oid sortOp, Oid coll, bool nulls_first, * * If the pathkeys include expressions that aren't simple Vars, we will * usually need to add resjunk items to the input plan's targetlist to - * compute these expressions, since the Sort/MergeAppend node itself won't + * compute these expressions, since a Sort or MergeAppend node itself won't * do any such calculations. If the input plan type isn't one that can do * projections, this means adding a Result node just to do the projection. * However, the caller can pass adjust_tlist_in_place = TRUE to force the @@ -6083,7 +6422,8 @@ prepare_sort_from_pathkeys(Plan *lefttree, List *pathkeys, { /* copy needed so we don't modify input's tlist below */ tlist = copyObject(tlist); - lefttree = inject_projection_plan(lefttree, tlist); + lefttree = inject_projection_plan(lefttree, tlist, + lefttree->parallel_safe); } /* Don't bother testing is_projection_capable_plan again */ @@ -6349,6 +6689,16 @@ materialize_finished_plan(Plan *subplan) matplan = (Plan *) make_material(subplan); + /* + * XXX horrid kluge: if there are any initPlans attached to the subplan, + * move them up to the Material node, which is now effectively the top + * plan node in its query level. This prevents failure in + * SS_finalize_plan(), which see for comments. We don't bother adjusting + * the subplan's cost estimate for this. + */ + matplan->initPlan = subplan->initPlan; + subplan->initPlan = NIL; + /* Set cost data */ cost_material(&matpath, subplan->startup_cost, @@ -6360,6 +6710,7 @@ materialize_finished_plan(Plan *subplan) matplan->plan_rows = subplan->plan_rows; matplan->plan_width = subplan->plan_width; matplan->parallel_aware = false; + matplan->parallel_safe = subplan->parallel_safe; return matplan; } @@ -6384,6 +6735,7 @@ make_agg(List *tlist, List *qual, node->grpColIdx = grpColIdx; node->grpOperators = grpOperators; node->numGroups = numGroups; + node->aggParams = NULL; /* SS_finalize_plan() will fill this */ node->groupingSets = groupingSets; node->chain = chain; @@ -6834,13 +7186,32 @@ make_result(List *tlist, } /* + * make_project_set + * Build a ProjectSet plan node + */ +static ProjectSet * +make_project_set(List *tlist, + Plan *subplan) +{ + ProjectSet *node = makeNode(ProjectSet); + Plan *plan = &node->plan; + + plan->targetlist = tlist; + plan->qual = NIL; + plan->lefttree = subplan; + plan->righttree = NULL; + + return node; +} + +/* * make_modifytable * Build a ModifyTable plan node */ static ModifyTable * make_modifytable(PlannerInfo *root, CmdType operation, bool canSetTag, - Index nominalRelation, + Index nominalRelation, List *partitioned_rels, List *resultRelations, List *subplans, List *withCheckOptionLists, List *returningLists, List *rowMarks, OnConflictExpr *onconflict, int epqParam) @@ -6866,8 +7237,10 @@ make_modifytable(PlannerInfo *root, node->operation = operation; node->canSetTag = canSetTag; node->nominalRelation = nominalRelation; + node->partitioned_rels = partitioned_rels; node->resultRelations = resultRelations; node->resultRelIndex = -1; /* will be set correctly in setrefs.c */ + node->rootResultRelIndex = -1; /* will be set correctly in setrefs.c */ node->plans = subplans; if (!onconflict) { @@ -6999,6 +7372,15 @@ is_projection_capable_path(Path *path) * projection to its dummy path. */ return IS_DUMMY_PATH(path); + case T_ProjectSet: + + /* + * Although ProjectSet certainly projects, say "no" because we + * don't want the planner to randomly replace its tlist with + * something else; the SRFs have to stay at top level. This might + * get relaxed later. + */ + return false; default: break; } @@ -7036,6 +7418,15 @@ is_projection_capable_plan(Plan *plan) return ((RemoteSubplan *) plan)->sort == NULL && is_projection_capable_plan(plan->lefttree); #endif + case T_ProjectSet: + + /* + * Although ProjectSet certainly projects, say "no" because we + * don't want the planner to randomly replace its tlist with + * something else; the SRFs have to stay at top level. This might + * get relaxed later. + */ + return false; default: break; } diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 84ce6b3125..ebd442ad4d 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -3,7 +3,7 @@ * initsplan.c * Target list, qualification, joininfo initialization routines * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -51,6 +51,9 @@ static List *deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, Relids *qualscope, Relids *inner_join_rels, List **postponed_qual_list); +static void process_security_barrier_quals(PlannerInfo *root, + int rti, Relids qualscope, + bool below_outer_join); static SpecialJoinInfo *make_outerjoininfo(PlannerInfo *root, Relids left_rels, Relids right_rels, Relids inner_join_rels, @@ -60,6 +63,7 @@ static void distribute_qual_to_rels(PlannerInfo *root, Node *clause, bool is_deduced, bool below_outer_join, JoinType jointype, + Index security_level, Relids qualscope, Relids ojscope, Relids outerjoin_nonnullable, @@ -105,7 +109,7 @@ add_base_rels_to_query(PlannerInfo *root, Node *jtnode) { int varno = ((RangeTblRef *) jtnode)->rtindex; - (void) build_simple_rel(root, varno, RELOPT_BASEREL); + (void) build_simple_rel(root, varno, NULL); } else if (IsA(jtnode, FromExpr)) { @@ -331,6 +335,8 @@ extract_lateral_references(PlannerInfo *root, RelOptInfo *brel, Index rtindex) vars = pull_vars_of_level((Node *) rte->subquery, 1); else if (rte->rtekind == RTE_FUNCTION) vars = pull_vars_of_level((Node *) rte->functions, 0); + else if (rte->rtekind == RTE_TABLEFUNC) + vars = pull_vars_of_level((Node *) rte->tablefunc, 0); else if (rte->rtekind == RTE_VALUES) vars = pull_vars_of_level((Node *) rte->values_lists, 0); else @@ -745,8 +751,14 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, { int varno = ((RangeTblRef *) jtnode)->rtindex; - /* No quals to deal with, just return correct result */ + /* qualscope is just the one RTE */ *qualscope = bms_make_singleton(varno); + /* Deal with any securityQuals attached to the RTE */ + if (root->qual_security_level > 0) + process_security_barrier_quals(root, + varno, + *qualscope, + below_outer_join); /* A single baserel does not create an inner join */ *inner_join_rels = NULL; joinlist = list_make1(jtnode); @@ -810,6 +822,7 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, if (bms_is_subset(pq->relids, *qualscope)) distribute_qual_to_rels(root, pq->qual, false, below_outer_join, JOIN_INNER, + root->qual_security_level, *qualscope, NULL, NULL, NULL, NULL); else @@ -825,6 +838,7 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, distribute_qual_to_rels(root, qual, false, below_outer_join, JOIN_INNER, + root->qual_security_level, *qualscope, NULL, NULL, NULL, postponed_qual_list); } @@ -1002,6 +1016,7 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, distribute_qual_to_rels(root, qual, false, below_outer_join, j->jointype, + root->qual_security_level, *qualscope, ojscope, nonnullable_rels, NULL, postponed_qual_list); @@ -1059,6 +1074,67 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join, } /* + * process_security_barrier_quals + * Transfer security-barrier quals into relation's baserestrictinfo list. + * + * The rewriter put any relevant security-barrier conditions into the RTE's + * securityQuals field, but it's now time to copy them into the rel's + * baserestrictinfo. + * + * In inheritance cases, we only consider quals attached to the parent rel + * here; they will be valid for all children too, so it's okay to consider + * them for purposes like equivalence class creation. Quals attached to + * individual child rels will be dealt with during path creation. + */ +static void +process_security_barrier_quals(PlannerInfo *root, + int rti, Relids qualscope, + bool below_outer_join) +{ + RangeTblEntry *rte = root->simple_rte_array[rti]; + Index security_level = 0; + ListCell *lc; + + /* + * Each element of the securityQuals list has been preprocessed into an + * implicitly-ANDed list of clauses. All the clauses in a given sublist + * should get the same security level, but successive sublists get higher + * levels. + */ + foreach(lc, rte->securityQuals) + { + List *qualset = (List *) lfirst(lc); + ListCell *lc2; + + foreach(lc2, qualset) + { + Node *qual = (Node *) lfirst(lc2); + + /* + * We cheat to the extent of passing ojscope = qualscope rather + * than its more logical value of NULL. The only effect this has + * is to force a Var-free qual to be evaluated at the rel rather + * than being pushed up to top of tree, which we don't want. + */ + distribute_qual_to_rels(root, qual, + false, + below_outer_join, + JOIN_INNER, + security_level, + qualscope, + qualscope, + NULL, + NULL, + NULL); + } + security_level++; + } + + /* Assert that qual_security_level is higher than anything we just used */ + Assert(security_level <= root->qual_security_level); +} + +/* * make_outerjoininfo * Build a SpecialJoinInfo for the current outer join * @@ -1516,6 +1592,7 @@ compute_semijoin_info(SpecialJoinInfo *sjinfo, List *clause) * 'below_outer_join': TRUE if the qual is from a JOIN/ON that is below the * nullable side of a higher-level outer join * 'jointype': type of join the qual is from (JOIN_INNER for a WHERE clause) + * 'security_level': security_level to assign to the qual * 'qualscope': set of baserels the qual's syntactic scope covers * 'ojscope': NULL if not an outer-join qual, else the minimum set of baserels * needed to form this join @@ -1545,6 +1622,7 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, bool is_deduced, bool below_outer_join, JoinType jointype, + Index security_level, Relids qualscope, Relids ojscope, Relids outerjoin_nonnullable, @@ -1794,6 +1872,7 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause, is_pushed_down, outerjoin_delayed, pseudoconstant, + security_level, relids, outerjoin_nonnullable, nullable_relids); @@ -2142,6 +2221,9 @@ distribute_restrictinfo_to_rels(PlannerInfo *root, /* Add clause to rel's restriction list */ rel->baserestrictinfo = lappend(rel->baserestrictinfo, restrictinfo); + /* Update security level info */ + rel->baserestrict_min_security = Min(rel->baserestrict_min_security, + restrictinfo->security_level); break; case BMS_MULTIPLE: @@ -2189,6 +2271,8 @@ distribute_restrictinfo_to_rels(PlannerInfo *root, * caller because this function is used after deconstruct_jointree, so we * don't have knowledge of where the clause items came from.) * + * "security_level" is the security level to assign to the new restrictinfo. + * * "both_const" indicates whether both items are known pseudo-constant; * in this case it is worth applying eval_const_expressions() in case we * can produce constant TRUE or constant FALSE. (Otherwise it's not, @@ -2209,6 +2293,7 @@ process_implied_equality(PlannerInfo *root, Expr *item2, Relids qualscope, Relids nullable_relids, + Index security_level, bool below_outer_join, bool both_const) { @@ -2221,8 +2306,8 @@ process_implied_equality(PlannerInfo *root, clause = make_opclause(opno, BOOLOID, /* opresulttype */ false, /* opretset */ - (Expr *) copyObject(item1), - (Expr *) copyObject(item2), + copyObject(item1), + copyObject(item2), InvalidOid, collation); @@ -2247,6 +2332,7 @@ process_implied_equality(PlannerInfo *root, */ distribute_qual_to_rels(root, (Node *) clause, true, below_outer_join, JOIN_INNER, + security_level, qualscope, NULL, NULL, nullable_relids, NULL); } @@ -2270,7 +2356,8 @@ build_implied_join_equality(Oid opno, Expr *item1, Expr *item2, Relids qualscope, - Relids nullable_relids) + Relids nullable_relids, + Index security_level) { RestrictInfo *restrictinfo; Expr *clause; @@ -2282,8 +2369,8 @@ build_implied_join_equality(Oid opno, clause = make_opclause(opno, BOOLOID, /* opresulttype */ false, /* opretset */ - (Expr *) copyObject(item1), - (Expr *) copyObject(item2), + copyObject(item1), + copyObject(item2), InvalidOid, collation); @@ -2294,6 +2381,7 @@ build_implied_join_equality(Oid opno, true, /* is_pushed_down */ false, /* outerjoin_delayed */ false, /* pseudoconstant */ + security_level, /* security_level */ qualscope, /* required_relids */ NULL, /* outer_relids */ nullable_relids); /* nullable_relids */ diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index f7d6dace59..c9331d272a 100644 --- a/src/backend/optimizer/plan/planagg.c +++ b/src/backend/optimizer/plan/planagg.c @@ -18,7 +18,7 @@ * * * Portions Copyright (c) 2012-2014, TransLattice, Inc. - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -105,6 +105,14 @@ preprocess_minmax_aggregates(PlannerInfo *root, List *tlist) return; /* + * Reject if query contains any CTEs; there's no way to build an indexscan + * on one so we couldn't succeed here. (If the CTEs are unreferenced, + * that's not true, but it doesn't seem worth expending cycles to check.) + */ + if (parse->cteList) + return; + + /* * We also restrict the query to reference exactly one table, since join * conditions can't be handled reasonably. (We could perhaps handle a * query containing cartesian-product joins, but it hardly seems worth the @@ -361,13 +369,12 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo, subroot->plan_params = NIL; subroot->outer_params = NULL; subroot->init_plans = NIL; - subroot->cte_plan_ids = NIL; - subroot->parse = parse = (Query *) copyObject(root->parse); + subroot->parse = parse = copyObject(root->parse); IncrementVarSublevelsUp((Node *) parse, 1, 1); /* append_rel_list might contain outer Vars? */ - subroot->append_rel_list = (List *) copyObject(root->append_rel_list); + subroot->append_rel_list = copyObject(root->append_rel_list); IncrementVarSublevelsUp((Node *) subroot->append_rel_list, 1, 1); /* There shouldn't be any OJ info to translate, as yet */ Assert(subroot->join_info_list == NIL); diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index 27234ffa22..74de3b818f 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -9,7 +9,7 @@ * shorn of features like subselects, inheritance, aggregates, grouping, * and so on. (Those are the things planner.c deals with.) * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -71,14 +71,13 @@ query_planner(PlannerInfo *root, List *tlist, /* * If query allows parallelism in general, check whether the quals are - * parallel-restricted. There's currently no real benefit to setting - * this flag correctly because we can't yet reference subplans from - * parallel workers. But that might change someday, so set this - * correctly anyway. + * parallel-restricted. (We need not check final_rel->reltarget + * because it's empty at this point. Anything parallel-restricted in + * the query tlist will be dealt with later.) */ if (root->glob->parallelModeOK) final_rel->consider_parallel = - !has_parallel_hazard(parse->jointree->quals, false); + is_parallel_safe(root, parse->jointree->quals); /* The only path for it is a trivial Result path */ add_path(final_rel, (Path *) @@ -194,9 +193,15 @@ query_planner(PlannerInfo *root, List *tlist, joinlist = remove_useless_joins(root, joinlist); /* + * Also, reduce any semijoins with unique inner rels to plain inner joins. + * Likewise, this can't be done until now for lack of needed info. + */ + reduce_unique_semijoins(root); + + /* * Now distribute "placeholders" to base rels as needed. This has to be * done after join removal because removal could change whether a - * placeholder is evaluatable at a base rel. + * placeholder is evaluable at a base rel. */ add_placeholders_to_base_rels(root); @@ -243,8 +248,7 @@ query_planner(PlannerInfo *root, List *tlist, Assert(brel->relid == rti); /* sanity check on array */ - if (brel->reloptkind == RELOPT_BASEREL || - brel->reloptkind == RELOPT_OTHER_MEMBER_REL) + if (IS_SIMPLE_REL(brel)) total_pages += (double) brel->pages; } root->total_table_pages = total_pages; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 89031d265e..b49a91a3b0 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -4,7 +4,7 @@ * The query optimizer external interface. * * Portions Copyright (c) 2012-2014, TransLattice, Inc. - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -24,12 +24,14 @@ #include "access/sysattr.h" #include "access/xact.h" #include "catalog/pg_constraint_fn.h" +#include "catalog/pg_proc.h" #include "catalog/pg_type.h" #include "executor/executor.h" #include "executor/nodeAgg.h" #include "foreign/fdwapi.h" #include "miscadmin.h" #include "lib/bipartite_match.h" +#include "lib/knapsack.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #ifdef OPTIMIZER_DEBUG @@ -74,17 +76,19 @@ create_upper_paths_hook_type create_upper_paths_hook = NULL; /* Expression kind codes for preprocess_expression */ -#define EXPRKIND_QUAL 0 -#define EXPRKIND_TARGET 1 -#define EXPRKIND_RTFUNC 2 -#define EXPRKIND_RTFUNC_LATERAL 3 -#define EXPRKIND_VALUES 4 -#define EXPRKIND_VALUES_LATERAL 5 -#define EXPRKIND_LIMIT 6 -#define EXPRKIND_APPINFO 7 -#define EXPRKIND_PHV 8 -#define EXPRKIND_TABLESAMPLE 9 -#define EXPRKIND_ARBITER_ELEM 10 +#define EXPRKIND_QUAL 0 +#define EXPRKIND_TARGET 1 +#define EXPRKIND_RTFUNC 2 +#define EXPRKIND_RTFUNC_LATERAL 3 +#define EXPRKIND_VALUES 4 +#define EXPRKIND_VALUES_LATERAL 5 +#define EXPRKIND_LIMIT 6 +#define EXPRKIND_APPINFO 7 +#define EXPRKIND_PHV 8 +#define EXPRKIND_TABLESAMPLE 9 +#define EXPRKIND_ARBITER_ELEM 10 +#define EXPRKIND_TABLEFUNC 11 +#define EXPRKIND_TABLEFUNC_LATERAL 12 /* Passthrough data for standard_qp_callback */ typedef struct @@ -94,12 +98,31 @@ typedef struct List *groupClause; /* overrides parse->groupClause */ } standard_qp_extra; +/* + * Data specific to grouping sets + */ + +typedef struct +{ + List *rollups; + List *hash_sets_idx; + double dNumHashGroups; + bool any_hashable; + Bitmapset *unsortable_refs; + Bitmapset *unhashable_refs; + List *unsortable_sets; + int *tleref_to_colnum_map; +} grouping_sets_data; + /* Local functions */ static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind); static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode); static void inheritance_planner(PlannerInfo *root); static void grouping_planner(PlannerInfo *root, bool inheritance_update, double tuple_fraction); +static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root); +static List *remap_to_groupclause_idx(List *groupClause, List *gsets, + int *tleref_to_colnum_map); static void preprocess_rowmarks(PlannerInfo *root); static double preprocess_limit(PlannerInfo *root, double tuple_fraction, @@ -112,8 +135,7 @@ static List *reorder_grouping_sets(List *groupingSets, List *sortclause); static void standard_qp_callback(PlannerInfo *root, void *extra); static double get_number_of_groups(PlannerInfo *root, double path_rows, - List *rollup_lists, - List *rollup_groupclauses); + grouping_sets_data *gd); static Size estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs, double dNumGroups); @@ -121,8 +143,16 @@ static RelOptInfo *create_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, PathTarget *target, const AggClauseCosts *agg_costs, - List *rollup_lists, - List *rollup_groupclauses); + grouping_sets_data *gd); +static void consider_groupingsets_paths(PlannerInfo *root, + RelOptInfo *grouped_rel, + Path *path, + bool is_sorted, + bool can_hash, + PathTarget *target, + grouping_sets_data *gd, + const AggClauseCosts *agg_costs, + double dNumGroups); static RelOptInfo *create_window_paths(PlannerInfo *root, RelOptInfo *input_rel, PathTarget *input_target, @@ -168,6 +198,9 @@ static Path *adjust_path_distribution(PlannerInfo *root, Query *parse, Path *path); static bool can_push_down_grouping(PlannerInfo *root, Query *parse, Path *path); static bool can_push_down_window(PlannerInfo *root, Path *path); +static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel, + List *targets, List *targets_contain_srfs); + /***************************************************************************** * @@ -221,12 +254,6 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) IsA(parse->utilityStmt, RemoteQuery)) return pgxc_direct_planner(parse, cursorOptions, boundParams); #endif - - /* Cursor options may come from caller or from DECLARE CURSOR stmt */ - if (parse->utilityStmt && - IsA(parse->utilityStmt, DeclareCursorStmt)) - cursorOptions |= ((DeclareCursorStmt *) parse->utilityStmt)->options; - /* * Set up global state for this planner invocation. This data is needed * across all levels of sub-Query that might exist in the given command, @@ -242,6 +269,8 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) glob->finalrtable = NIL; glob->finalrowmarks = NIL; glob->resultRelations = NIL; + glob->nonleafResultRelations = NIL; + glob->rootResultRelations = NIL; glob->relationOids = NIL; glob->invalItems = NIL; glob->nParamExec = 0; @@ -271,12 +300,25 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) * time and execution time, so don't generate a parallel plan if we're in * serializable mode. */ - glob->parallelModeOK = (cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 && - IsUnderPostmaster && dynamic_shared_memory_type != DSM_IMPL_NONE && - parse->commandType == CMD_SELECT && !parse->hasModifyingCTE && - parse->utilityStmt == NULL && max_parallel_workers_per_gather > 0 && - !IsParallelWorker() && !IsolationIsSerializable() && - !has_parallel_hazard((Node *) parse, true); + if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 && + IsUnderPostmaster && + dynamic_shared_memory_type != DSM_IMPL_NONE && + parse->commandType == CMD_SELECT && + !parse->hasModifyingCTE && + max_parallel_workers_per_gather > 0 && + !IsParallelWorker() && + !IsolationIsSerializable()) + { + /* all the cheap tests pass, so scan the query tree */ + glob->maxParallelHazard = max_parallel_hazard(parse); + glob->parallelModeOK = (glob->maxParallelHazard != PROPARALLEL_UNSAFE); + } + else + { + /* skip the query tree scan, just assume it's unsafe */ + glob->maxParallelHazard = PROPARALLEL_UNSAFE; + glob->parallelModeOK = false; + } /* * glob->parallelModeNeeded should tell us whether it's necessary to @@ -348,33 +390,14 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) if (cursorOptions & CURSOR_OPT_SCROLL) { if (!ExecSupportsBackwardScan(top_plan)) - { - Plan *sub_plan = top_plan; - - top_plan = materialize_finished_plan(sub_plan); - - /* - * XXX horrid kluge: if there are any initPlans attached to the - * formerly-top plan node, move them up to the Material node. This - * prevents failure in SS_finalize_plan, which see for comments. - * We don't bother adjusting the sub_plan's cost estimate for - * this. - */ - top_plan->initPlan = sub_plan->initPlan; - sub_plan->initPlan = NIL; - } + top_plan = materialize_finished_plan(top_plan); } /* * Optionally add a Gather node for testing purposes, provided this is - * actually a safe thing to do. (Note: we assume adding a Material node - * above did not change the parallel safety of the plan, so we can still - * rely on best_path->parallel_safe. However, that flag doesn't account - * for initPlans, which render the plan parallel-unsafe.) + * actually a safe thing to do. */ - if (force_parallel_mode != FORCE_PARALLEL_OFF && - best_path->parallel_safe && - top_plan->initPlan == NIL) + if (force_parallel_mode != FORCE_PARALLEL_OFF && top_plan->parallel_safe) { Gather *gather = makeNode(Gather); @@ -397,6 +420,7 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) gather->plan.plan_rows = top_plan->plan_rows; gather->plan.plan_width = top_plan->plan_width; gather->plan.parallel_aware = false; + gather->plan.parallel_safe = false; /* use parallel mode for parallel plans. */ root->glob->parallelModeNeeded = true; @@ -427,6 +451,8 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) Assert(glob->finalrtable == NIL); Assert(glob->finalrowmarks == NIL); Assert(glob->resultRelations == NIL); + Assert(glob->nonleafResultRelations == NIL); + Assert(glob->rootResultRelations == NIL); top_plan = set_plan_references(root, top_plan); /* ... and the subplans (both regular subplans and initplans) */ Assert(list_length(glob->subplans) == list_length(glob->subroots)); @@ -452,7 +478,8 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) result->planTree = top_plan; result->rtable = glob->finalrtable; result->resultRelations = glob->resultRelations; - result->utilityStmt = parse->utilityStmt; + result->nonleafResultRelations = glob->nonleafResultRelations; + result->rootResultRelations = glob->rootResultRelations; result->subplans = glob->subplans; result->rewindPlanIDs = glob->rewindPlanIDs; result->rowMarks = glob->finalrowmarks; @@ -464,6 +491,10 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) result->distributionNodes = NULL; #endif result->nParamExec = glob->nParamExec; + /* utilityStmt should be null, but we might as well copy it */ + result->utilityStmt = parse->utilityStmt; + result->stmt_location = parse->stmt_location; + result->stmt_len = parse->stmt_len; return result; } @@ -533,6 +564,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, root->multiexpr_params = NIL; root->eq_classes = NIL; root->append_rel_list = NIL; + root->pcinfo_list = NIL; root->rowMarks = NIL; memset(root->upper_rels, 0, sizeof(root->upper_rels)); memset(root->upper_targets, 0, sizeof(root->upper_targets)); @@ -541,6 +573,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, root->recursiveOk = true; root->minmax_aggs = NIL; + root->qual_security_level = 0; root->hasInheritedTarget = false; root->hasRecursion = hasRecursion; if (hasRecursion) @@ -649,6 +682,10 @@ subquery_planner(PlannerGlobal *glob, Query *parse, preprocess_expression(root, (Node *) parse->targetList, EXPRKIND_TARGET); + /* Constant-folding might have removed all set-returning functions */ + if (parse->hasTargetSRFs) + parse->hasTargetSRFs = expression_returns_set((Node *) parse->targetList); + newWithCheckOptions = NIL; foreach(l, parse->withCheckOptions) { @@ -716,6 +753,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, { RangeTblEntry *rte = (RangeTblEntry *) lfirst(l); int kind; + ListCell *lcsq; if (rte->rtekind == RTE_RELATION) { @@ -742,7 +780,15 @@ subquery_planner(PlannerGlobal *glob, Query *parse, { /* Preprocess the function expression(s) fully */ kind = rte->lateral ? EXPRKIND_RTFUNC_LATERAL : EXPRKIND_RTFUNC; - rte->functions = (List *) preprocess_expression(root, (Node *) rte->functions, kind); + rte->functions = (List *) + preprocess_expression(root, (Node *) rte->functions, kind); + } + else if (rte->rtekind == RTE_TABLEFUNC) + { + /* Preprocess the function expression(s) fully */ + kind = rte->lateral ? EXPRKIND_TABLEFUNC_LATERAL : EXPRKIND_TABLEFUNC; + rte->tablefunc = (TableFunc *) + preprocess_expression(root, (Node *) rte->tablefunc, kind); } else if (rte->rtekind == RTE_VALUES) { @@ -751,6 +797,19 @@ subquery_planner(PlannerGlobal *glob, Query *parse, rte->values_lists = (List *) preprocess_expression(root, (Node *) rte->values_lists, kind); } + + /* + * Process each element of the securityQuals list as if it were a + * separate qual expression (as indeed it is). We need to do it this + * way to get proper canonicalization of AND/OR structure. Note that + * this converts each element into an implicit-AND sublist. + */ + foreach(lcsq, rte->securityQuals) + { + lfirst(lcsq) = preprocess_expression(root, + (Node *) lfirst(lcsq), + EXPRKIND_QUAL); + } } /* @@ -841,10 +900,10 @@ subquery_planner(PlannerGlobal *glob, Query *parse, SS_identify_outer_params(root); /* - * If any initPlans were created in this query level, increment the - * surviving Paths' costs to account for them. They won't actually get - * attached to the plan tree till create_plan() runs, but we want to be - * sure their costs are included now. + * If any initPlans were created in this query level, adjust the surviving + * Paths' costs and parallel-safety flags to account for them. The + * initPlans won't actually get attached to the plan tree till + * create_plan() runs, but we must include their effects now. */ final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL); SS_charge_for_initplans(root, final_rel); @@ -953,7 +1012,8 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind) if (root->hasJoinRTEs && !(kind == EXPRKIND_RTFUNC || kind == EXPRKIND_VALUES || - kind == EXPRKIND_TABLESAMPLE)) + kind == EXPRKIND_TABLESAMPLE || + kind == EXPRKIND_TABLEFUNC)) expr = flatten_join_alias_vars(root, expr); /* @@ -1090,7 +1150,6 @@ inheritance_planner(PlannerInfo *root) { Query *parse = root->parse; int parentRTindex = parse->resultRelation; - Bitmapset *resultRTindexes; Bitmapset *subqueryRTindexes; Bitmapset *modifiableARIindexes; int nominalRelation = -1; @@ -1106,6 +1165,8 @@ inheritance_planner(PlannerInfo *root) RelOptInfo *final_rel; ListCell *lc; Index rti; + RangeTblEntry *parent_rte; + List *partitioned_rels = NIL; Assert(parse->commandType != CMD_INSERT); @@ -1124,26 +1185,7 @@ inheritance_planner(PlannerInfo *root) * at least O(N^3) work expended here; and (2) would greatly complicate * management of the rowMarks list. * - * Note that any RTEs with security barrier quals will be turned into - * subqueries during planning, and so we must create copies of them too, - * except where they are target relations, which will each only be used in - * a single plan. - * - * To begin with, we'll need a bitmapset of the target relation relids. - */ - resultRTindexes = bms_make_singleton(parentRTindex); - foreach(lc, root->append_rel_list) - { - AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc); - - if (appinfo->parent_relid == parentRTindex) - resultRTindexes = bms_add_member(resultRTindexes, - appinfo->child_relid); - } - - /* - * Now, generate a bitmapset of the relids of the subquery RTEs, including - * security-barrier RTEs that will become subqueries, as just explained. + * To begin with, generate a bitmapset of the relids of the subquery RTEs. */ subqueryRTindexes = NULL; rti = 1; @@ -1151,9 +1193,7 @@ inheritance_planner(PlannerInfo *root) { RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); - if (rte->rtekind == RTE_SUBQUERY || - (rte->securityQuals != NIL && - !bms_is_member(rti, resultRTindexes))) + if (rte->rtekind == RTE_SUBQUERY) subqueryRTindexes = bms_add_member(subqueryRTindexes, rti); rti++; } @@ -1185,12 +1225,25 @@ inheritance_planner(PlannerInfo *root) } /* + * If the parent RTE is a partitioned table, we should use that as the + * nominal relation, because the RTEs added for partitioned tables + * (including the root parent) as child members of the inheritance set do + * not appear anywhere else in the plan. The situation is exactly the + * opposite in the case of non-partitioned inheritance parent as described + * below. + */ + parent_rte = rt_fetch(parentRTindex, root->parse->rtable); + if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE) + nominalRelation = parentRTindex; + + /* * And now we can get on with generating a plan for each child table. */ foreach(lc, root->append_rel_list) { AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc); PlannerInfo *subroot; + RangeTblEntry *child_rte; RelOptInfo *sub_final_rel; Path *subpath; @@ -1217,12 +1270,21 @@ inheritance_planner(PlannerInfo *root) appinfo); /* + * If there are securityQuals attached to the parent, move them to the + * child rel (they've already been transformed properly for that). + */ + parent_rte = rt_fetch(parentRTindex, subroot->parse->rtable); + child_rte = rt_fetch(appinfo->child_relid, subroot->parse->rtable); + child_rte->securityQuals = parent_rte->securityQuals; + parent_rte->securityQuals = NIL; + + /* * The rowMarks list might contain references to subquery RTEs, so * make a copy that we can apply ChangeVarNodes to. (Fortunately, the * executor doesn't need to see the modified copies --- we can just * pass it the original rowMarks list.) */ - subroot->rowMarks = (List *) copyObject(root->rowMarks); + subroot->rowMarks = copyObject(root->rowMarks); /* * The append_rel_list likewise might contain references to subquery @@ -1244,7 +1306,7 @@ inheritance_planner(PlannerInfo *root) AppendRelInfo *appinfo2 = (AppendRelInfo *) lfirst(lc2); if (bms_is_member(appinfo2->child_relid, modifiableARIindexes)) - appinfo2 = (AppendRelInfo *) copyObject(appinfo2); + appinfo2 = copyObject(appinfo2); subroot->append_rel_list = lappend(subroot->append_rel_list, appinfo2); @@ -1263,11 +1325,11 @@ inheritance_planner(PlannerInfo *root) /* * If this isn't the first child Query, generate duplicates of all - * subquery (or subquery-to-be) RTEs, and adjust Var numbering to - * reference the duplicates. To simplify the loop logic, we scan the - * original rtable not the copy just made by adjust_appendrel_attrs; - * that should be OK since subquery RTEs couldn't contain any - * references to the target rel. + * subquery RTEs, and adjust Var numbering to reference the + * duplicates. To simplify the loop logic, we scan the original rtable + * not the copy just made by adjust_appendrel_attrs; that should be OK + * since subquery RTEs couldn't contain any references to the target + * rel. */ if (final_rtable != NIL && subqueryRTindexes != NULL) { @@ -1284,9 +1346,9 @@ inheritance_planner(PlannerInfo *root) /* * The RTE can't contain any references to its own RT - * index, except in the security barrier quals, so we can - * save a few cycles by applying ChangeVarNodes before we - * append the RTE to the rangetable. + * index, except in its securityQuals, so we can save a + * few cycles by applying ChangeVarNodes to the rest of + * the rangetable before we append the RTE to it. */ newrti = list_length(subroot->parse->rtable) + 1; ChangeVarNodes((Node *) subroot->parse, rti, newrti, 0); @@ -1325,21 +1387,25 @@ inheritance_planner(PlannerInfo *root) grouping_planner(subroot, true, 0.0 /* retrieve all tuples */ ); /* - * Planning may have modified the query result relation (if there were - * security barrier quals on the result RTE). - */ - appinfo->child_relid = subroot->parse->resultRelation; - - /* - * We'll use the first child relation (even if it's excluded) as the - * nominal target relation of the ModifyTable node. Because of the - * way expand_inherited_rtentry works, this should always be the RTE - * representing the parent table in its role as a simple member of the - * inheritance set. (It would be logically cleaner to use the - * inheritance parent RTE as the nominal target; but since that RTE - * will not be otherwise referenced in the plan, doing so would give - * rise to confusing use of multiple aliases in EXPLAIN output for - * what the user will think is the "same" table.) + * Set the nomimal target relation of the ModifyTable node if not + * already done. We use the inheritance parent RTE as the nominal + * target relation if it's a partitioned table (see just above this + * loop). In the non-partitioned parent case, we'll use the first + * child relation (even if it's excluded) as the nominal target + * relation. Because of the way expand_inherited_rtentry works, the + * latter should be the RTE representing the parent table in its role + * as a simple member of the inheritance set. + * + * It would be logically cleaner to *always* use the inheritance + * parent RTE as the nominal relation; but that RTE is not otherwise + * referenced in the plan in the non-partitioned inheritance case. + * Instead the duplicate child RTE created by expand_inherited_rtentry + * is used elsewhere in the plan, so using the original parent RTE + * would give rise to confusing use of multiple aliases in EXPLAIN + * output for what the user will think is the "same" table. OTOH, + * it's not a problem in the partitioned inheritance case, because the + * duplicate child RTE added for the parent does not appear anywhere + * else in the plan tree. */ if (nominalRelation < 0) nominalRelation = appinfo->child_relid; @@ -1402,41 +1468,9 @@ inheritance_planner(PlannerInfo *root) if (final_rtable == NIL) final_rtable = subroot->parse->rtable; else - { - List *tmp_rtable = NIL; - ListCell *cell1, - *cell2; - - /* - * Check to see if any of the original RTEs were turned into - * subqueries during planning. Currently, this should only ever - * happen due to securityQuals being involved which push a - * relation down under a subquery, to ensure that the security - * barrier quals are evaluated first. - * - * When this happens, we want to use the new subqueries in the - * final rtable. - */ - forboth(cell1, final_rtable, cell2, subroot->parse->rtable) - { - RangeTblEntry *rte1 = (RangeTblEntry *) lfirst(cell1); - RangeTblEntry *rte2 = (RangeTblEntry *) lfirst(cell2); - - if (rte1->rtekind == RTE_RELATION && - rte2->rtekind == RTE_SUBQUERY) - { - /* Should only be when there are securityQuals today */ - Assert(rte1->securityQuals != NIL); - tmp_rtable = lappend(tmp_rtable, rte2); - } - else - tmp_rtable = lappend(tmp_rtable, rte1); - } - - final_rtable = list_concat(tmp_rtable, + final_rtable = list_concat(final_rtable, list_copy_tail(subroot->parse->rtable, list_length(final_rtable))); - } /* * We need to collect all the RelOptInfos from all child plans into @@ -1479,6 +1513,13 @@ inheritance_planner(PlannerInfo *root) Assert(!parse->onConflict); } + if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE) + { + partitioned_rels = get_partitioned_child_rels(root, parentRTindex); + /* The root partitioned table is included as a child rel */ + Assert(list_length(partitioned_rels) >= 1); + } + /* Result path must go into outer query's FINAL upperrel */ final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL); @@ -1532,6 +1573,7 @@ inheritance_planner(PlannerInfo *root) parse->commandType, parse->canSetTag, nominalRelation, + partitioned_rels, resultRelations, subpaths, subroots, @@ -1580,8 +1622,9 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, int64 count_est = 0; double limit_tuples = -1.0; bool have_postponed_srfs = false; - double tlist_rows; PathTarget *final_target; + List *final_targets; + List *final_targets_contain_srfs; RelOptInfo *current_rel; RelOptInfo *final_rel; ListCell *lc; @@ -1644,6 +1687,10 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, /* Also extract the PathTarget form of the setop result tlist */ final_target = current_rel->cheapest_total_path->pathtarget; + /* The setop result tlist couldn't contain any SRFs */ + Assert(!parse->hasTargetSRFs); + final_targets = final_targets_contain_srfs = NIL; + /* * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have * checked already, but let's make sure). @@ -1669,14 +1716,19 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, { /* No set operations, do regular planning */ PathTarget *sort_input_target; + List *sort_input_targets; + List *sort_input_targets_contain_srfs; PathTarget *grouping_target; + List *grouping_targets; + List *grouping_targets_contain_srfs; PathTarget *scanjoin_target; + List *scanjoin_targets; + List *scanjoin_targets_contain_srfs; bool have_grouping; AggClauseCosts agg_costs; WindowFuncLists *wflists = NULL; List *activeWindows = NIL; - List *rollup_lists = NIL; - List *rollup_groupclauses = NIL; + grouping_sets_data *gset_data = NULL; standard_qp_extra qp_extra; /* A recursive query should always have setOperations */ @@ -1685,84 +1737,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, /* Preprocess grouping sets and GROUP BY clause, if any */ if (parse->groupingSets) { - int *tleref_to_colnum_map; - List *sets; - int maxref; - ListCell *lc; - ListCell *lc2; - ListCell *lc_set; - - parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1); - - /* Identify max SortGroupRef in groupClause, for array sizing */ - maxref = 0; - foreach(lc, parse->groupClause) - { - SortGroupClause *gc = lfirst(lc); - - if (gc->tleSortGroupRef > maxref) - maxref = gc->tleSortGroupRef; - } - - /* Allocate workspace array for remapping */ - tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int)); - - /* Examine the rollup sets */ - sets = extract_rollup_sets(parse->groupingSets); - - foreach(lc_set, sets) - { - List *current_sets = (List *) lfirst(lc_set); - List *groupclause; - int ref; - - /* - * Reorder the current list of grouping sets into correct - * prefix order. If only one aggregation pass is needed, try - * to make the list match the ORDER BY clause; if more than - * one pass is needed, we don't bother with that. - */ - current_sets = reorder_grouping_sets(current_sets, - (list_length(sets) == 1 - ? parse->sortClause - : NIL)); - - /* - * Order the groupClause appropriately. If the first grouping - * set is empty, this can match regular GROUP BY - * preprocessing, otherwise we have to force the groupClause - * to match that grouping set's order. - */ - groupclause = preprocess_groupclause(root, - linitial(current_sets)); - - /* - * Now that we've pinned down an order for the groupClause for - * this list of grouping sets, we need to remap the entries in - * the grouping sets from sortgrouprefs to plain indices - * (0-based) into the groupClause for this collection of - * grouping sets. - */ - ref = 0; - foreach(lc, groupclause) - { - SortGroupClause *gc = lfirst(lc); - - tleref_to_colnum_map[gc->tleSortGroupRef] = ref++; - } - - foreach(lc, current_sets) - { - foreach(lc2, (List *) lfirst(lc)) - { - lfirst_int(lc2) = tleref_to_colnum_map[lfirst_int(lc2)]; - } - } - - /* Save the reordered sets and corresponding groupclauses */ - rollup_lists = lcons(current_sets, rollup_lists); - rollup_groupclauses = lcons(groupclause, rollup_groupclauses); - } + gset_data = preprocess_grouping_sets(root); } else { @@ -1781,12 +1756,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, parse->rtable); /* - * Expand any rangetable entries that have security barrier quals. - * This may add new security barrier subquery RTEs to the rangetable. - */ - expand_security_quals(root, tlist); - - /* * We are now done hacking up the query's targetlist. Most of the * remaining planning work will be done with the PathTarget * representation of tlists, but save aside the full representation so @@ -1846,16 +1815,14 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, * Figure out whether there's a hard limit on the number of rows that * query_planner's result subplan needs to return. Even if we know a * hard limit overall, it doesn't apply if the query has any - * grouping/aggregation operations. (XXX it also doesn't apply if the - * tlist contains any SRFs; but checking for that here seems more - * costly than it's worth, since root->limit_tuples is only used for - * cost estimates, and only in a small number of cases.) + * grouping/aggregation operations, or SRFs in the tlist. */ if (parse->groupClause || parse->groupingSets || parse->distinctClause || parse->hasAggs || parse->hasWindowFuncs || + parse->hasTargetSRFs || root->hasHavingQual) root->limit_tuples = -1.0; else @@ -1864,8 +1831,9 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, /* Set up data needed by standard_qp_callback */ qp_extra.tlist = tlist; qp_extra.activeWindows = activeWindows; - qp_extra.groupClause = - parse->groupingSets ? llast(rollup_groupclauses) : parse->groupClause; + qp_extra.groupClause = (gset_data + ? (gset_data->rollups ? ((RollupData *) linitial(gset_data->rollups))->groupClause : NIL) + : parse->groupClause); /* * Generate the best unsorted and presorted paths for the scan/join @@ -1923,8 +1891,50 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, scanjoin_target = grouping_target; /* - * Forcibly apply scan/join target to all the Paths for the scan/join - * rel. + * If there are any SRFs in the targetlist, we must separate each of + * these PathTargets into SRF-computing and SRF-free targets. Replace + * each of the named targets with a SRF-free version, and remember the + * list of additional projection steps we need to add afterwards. + */ + if (parse->hasTargetSRFs) + { + /* final_target doesn't recompute any SRFs in sort_input_target */ + split_pathtarget_at_srfs(root, final_target, sort_input_target, + &final_targets, + &final_targets_contain_srfs); + final_target = (PathTarget *) linitial(final_targets); + Assert(!linitial_int(final_targets_contain_srfs)); + /* likewise for sort_input_target vs. grouping_target */ + split_pathtarget_at_srfs(root, sort_input_target, grouping_target, + &sort_input_targets, + &sort_input_targets_contain_srfs); + sort_input_target = (PathTarget *) linitial(sort_input_targets); + Assert(!linitial_int(sort_input_targets_contain_srfs)); + /* likewise for grouping_target vs. scanjoin_target */ + split_pathtarget_at_srfs(root, grouping_target, scanjoin_target, + &grouping_targets, + &grouping_targets_contain_srfs); + grouping_target = (PathTarget *) linitial(grouping_targets); + Assert(!linitial_int(grouping_targets_contain_srfs)); + /* scanjoin_target will not have any SRFs precomputed for it */ + split_pathtarget_at_srfs(root, scanjoin_target, NULL, + &scanjoin_targets, + &scanjoin_targets_contain_srfs); + scanjoin_target = (PathTarget *) linitial(scanjoin_targets); + Assert(!linitial_int(scanjoin_targets_contain_srfs)); + } + else + { + /* initialize lists, just to keep compiler quiet */ + final_targets = final_targets_contain_srfs = NIL; + sort_input_targets = sort_input_targets_contain_srfs = NIL; + grouping_targets = grouping_targets_contain_srfs = NIL; + scanjoin_targets = scanjoin_targets_contain_srfs = NIL; + } + + /* + * Forcibly apply SRF-free scan/join target to all the Paths for the + * scan/join rel. * * In principle we should re-run set_cheapest() here to identify the * cheapest path, but it seems unlikely that adding the same tlist @@ -1961,7 +1971,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, * computed by partial paths. */ if (current_rel->partial_pathlist && - !has_parallel_hazard((Node *) scanjoin_target->exprs, false)) + is_parallel_safe(root, (Node *) scanjoin_target->exprs)) { /* Apply the scan/join target to each partial path */ foreach(lc, current_rel->partial_pathlist) @@ -1995,6 +2005,12 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, current_rel->partial_pathlist = NIL; } + /* Now fix things up if scan/join target contains SRFs */ + if (parse->hasTargetSRFs) + adjust_paths_for_srfs(root, current_rel, + scanjoin_targets, + scanjoin_targets_contain_srfs); + /* * Save the various upper-rel PathTargets we just computed into * root->upper_targets[]. The core code doesn't use this, but it @@ -2017,8 +2033,12 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, current_rel, grouping_target, &agg_costs, - rollup_lists, - rollup_groupclauses); + gset_data); + /* Fix things up if grouping_target contains SRFs */ + if (parse->hasTargetSRFs) + adjust_paths_for_srfs(root, current_rel, + grouping_targets, + grouping_targets_contain_srfs); } /* @@ -2034,6 +2054,11 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, tlist, wflists, activeWindows); + /* Fix things up if sort_input_target contains SRFs */ + if (parse->hasTargetSRFs) + adjust_paths_for_srfs(root, current_rel, + sort_input_targets, + sort_input_targets_contain_srfs); } /* @@ -2045,7 +2070,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, current_rel = create_distinct_paths(root, current_rel); } - } /* end of if (setOperations) */ /* @@ -2062,36 +2086,11 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, final_target, have_postponed_srfs ? -1.0 : limit_tuples); - } - - /* - * If there are set-returning functions in the tlist, scale up the output - * rowcounts of all surviving Paths to account for that. Note that if any - * SRFs appear in sorting or grouping columns, we'll have underestimated - * the numbers of rows passing through earlier steps; but that's such a - * weird usage that it doesn't seem worth greatly complicating matters to - * account for it. - */ - tlist_rows = tlist_returns_set_rows(tlist); - if (tlist_rows > 1) - { - foreach(lc, current_rel->pathlist) - { - Path *path = (Path *) lfirst(lc); - - /* - * We assume that execution costs of the tlist as such were - * already accounted for. However, it still seems appropriate to - * charge something more for the executor's general costs of - * processing the added tuples. The cost is probably less than - * cpu_tuple_cost, though, so we arbitrarily use half of that. - */ - path->total_cost += path->rows * (tlist_rows - 1) * - cpu_tuple_cost / 2; - - path->rows *= tlist_rows; - } - /* No need to run set_cheapest; we're keeping all paths anyway. */ + /* Fix things up if final_target contains SRFs */ + if (parse->hasTargetSRFs) + adjust_paths_for_srfs(root, current_rel, + final_targets, + final_targets_contain_srfs); } /* @@ -2107,8 +2106,8 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, * query. */ if (current_rel->consider_parallel && - !has_parallel_hazard(parse->limitOffset, false) && - !has_parallel_hazard(parse->limitCount, false)) + is_parallel_safe(root, parse->limitOffset) && + is_parallel_safe(root, parse->limitCount)) final_rel->consider_parallel = true; /* @@ -2234,6 +2233,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, parse->commandType, parse->canSetTag, parse->resultRelation, + NIL, list_make1_int(parse->resultRelation), list_make1(path), list_make1(root), @@ -2268,6 +2268,221 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, /* Note: currently, we leave it to callers to do set_cheapest() */ } +/* + * Do preprocessing for groupingSets clause and related data. This handles the + * preliminary steps of expanding the grouping sets, organizing them into lists + * of rollups, and preparing annotations which will later be filled in with + * size estimates. + */ +static grouping_sets_data * +preprocess_grouping_sets(PlannerInfo *root) +{ + Query *parse = root->parse; + List *sets; + int maxref = 0; + ListCell *lc; + ListCell *lc_set; + grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data)); + + parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1); + + gd->any_hashable = false; + gd->unhashable_refs = NULL; + gd->unsortable_refs = NULL; + gd->unsortable_sets = NIL; + + if (parse->groupClause) + { + ListCell *lc; + + foreach(lc, parse->groupClause) + { + SortGroupClause *gc = lfirst(lc); + Index ref = gc->tleSortGroupRef; + + if (ref > maxref) + maxref = ref; + + if (!gc->hashable) + gd->unhashable_refs = bms_add_member(gd->unhashable_refs, ref); + + if (!OidIsValid(gc->sortop)) + gd->unsortable_refs = bms_add_member(gd->unsortable_refs, ref); + } + } + + /* Allocate workspace array for remapping */ + gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int)); + + /* + * If we have any unsortable sets, we must extract them before trying to + * prepare rollups. Unsortable sets don't go through + * reorder_grouping_sets, so we must apply the GroupingSetData annotation + * here. + */ + if (!bms_is_empty(gd->unsortable_refs)) + { + List *sortable_sets = NIL; + + foreach(lc, parse->groupingSets) + { + List *gset = lfirst(lc); + + if (bms_overlap_list(gd->unsortable_refs, gset)) + { + GroupingSetData *gs = makeNode(GroupingSetData); + + gs->set = gset; + gd->unsortable_sets = lappend(gd->unsortable_sets, gs); + + /* + * We must enforce here that an unsortable set is hashable; + * later code assumes this. Parse analysis only checks that + * every individual column is either hashable or sortable. + * + * Note that passing this test doesn't guarantee we can + * generate a plan; there might be other showstoppers. + */ + if (bms_overlap_list(gd->unhashable_refs, gset)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not implement GROUP BY"), + errdetail("Some of the datatypes only support hashing, while others only support sorting."))); + } + else + sortable_sets = lappend(sortable_sets, gset); + } + + if (sortable_sets) + sets = extract_rollup_sets(sortable_sets); + else + sets = NIL; + } + else + sets = extract_rollup_sets(parse->groupingSets); + + foreach(lc_set, sets) + { + List *current_sets = (List *) lfirst(lc_set); + RollupData *rollup = makeNode(RollupData); + GroupingSetData *gs; + + /* + * Reorder the current list of grouping sets into correct prefix + * order. If only one aggregation pass is needed, try to make the + * list match the ORDER BY clause; if more than one pass is needed, we + * don't bother with that. + * + * Note that this reorders the sets from smallest-member-first to + * largest-member-first, and applies the GroupingSetData annotations, + * though the data will be filled in later. + */ + current_sets = reorder_grouping_sets(current_sets, + (list_length(sets) == 1 + ? parse->sortClause + : NIL)); + + /* + * Get the initial (and therefore largest) grouping set. + */ + gs = linitial(current_sets); + + /* + * Order the groupClause appropriately. If the first grouping set is + * empty, then the groupClause must also be empty; otherwise we have + * to force the groupClause to match that grouping set's order. + * + * (The first grouping set can be empty even though parse->groupClause + * is not empty only if all non-empty grouping sets are unsortable. + * The groupClauses for hashed grouping sets are built later on.) + */ + if (gs->set) + rollup->groupClause = preprocess_groupclause(root, gs->set); + else + rollup->groupClause = NIL; + + /* + * Is it hashable? We pretend empty sets are hashable even though we + * actually force them not to be hashed later. But don't bother if + * there's nothing but empty sets (since in that case we can't hash + * anything). + */ + if (gs->set && + !bms_overlap_list(gd->unhashable_refs, gs->set)) + { + rollup->hashable = true; + gd->any_hashable = true; + } + + /* + * Now that we've pinned down an order for the groupClause for this + * list of grouping sets, we need to remap the entries in the grouping + * sets from sortgrouprefs to plain indices (0-based) into the + * groupClause for this collection of grouping sets. We keep the + * original form for later use, though. + */ + rollup->gsets = remap_to_groupclause_idx(rollup->groupClause, + current_sets, + gd->tleref_to_colnum_map); + rollup->gsets_data = current_sets; + + gd->rollups = lappend(gd->rollups, rollup); + } + + if (gd->unsortable_sets) + { + /* + * We have not yet pinned down a groupclause for this, but we will + * need index-based lists for estimation purposes. Construct + * hash_sets_idx based on the entire original groupclause for now. + */ + gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause, + gd->unsortable_sets, + gd->tleref_to_colnum_map); + gd->any_hashable = true; + } + + return gd; +} + +/* + * Given a groupclause and a list of GroupingSetData, return equivalent sets + * (without annotation) mapped to indexes into the given groupclause. + */ +static List * +remap_to_groupclause_idx(List *groupClause, + List *gsets, + int *tleref_to_colnum_map) +{ + int ref = 0; + List *result = NIL; + ListCell *lc; + + foreach(lc, groupClause) + { + SortGroupClause *gc = lfirst(lc); + + tleref_to_colnum_map[gc->tleSortGroupRef] = ref++; + } + + foreach(lc, gsets) + { + List *set = NIL; + ListCell *lc2; + GroupingSetData *gs = lfirst(lc); + + foreach(lc2, gs->set) + { + set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]); + } + + result = lappend(result, set); + } + + return result; +} + + /* * Detect whether a plan node is a "dummy" plan created when a relation @@ -2301,52 +2516,6 @@ is_dummy_plan(Plan *plan) } /* - * Create a bitmapset of the RT indexes of live base relations - * - * Helper for preprocess_rowmarks ... at this point in the proceedings, - * the only good way to distinguish baserels from appendrel children - * is to see what is in the join tree. - */ -static Bitmapset * -get_base_rel_indexes(Node *jtnode) -{ - Bitmapset *result; - - if (jtnode == NULL) - return NULL; - if (IsA(jtnode, RangeTblRef)) - { - int varno = ((RangeTblRef *) jtnode)->rtindex; - - result = bms_make_singleton(varno); - } - else if (IsA(jtnode, FromExpr)) - { - FromExpr *f = (FromExpr *) jtnode; - ListCell *l; - - result = NULL; - foreach(l, f->fromlist) - result = bms_join(result, - get_base_rel_indexes(lfirst(l))); - } - else if (IsA(jtnode, JoinExpr)) - { - JoinExpr *j = (JoinExpr *) jtnode; - - result = bms_join(get_base_rel_indexes(j->larg), - get_base_rel_indexes(j->rarg)); - } - else - { - elog(ERROR, "unrecognized node type: %d", - (int) nodeTag(jtnode)); - result = NULL; /* keep compiler quiet */ - } - return result; -} - -/* * preprocess_rowmarks - set up PlanRowMarks if needed */ static void @@ -2371,7 +2540,8 @@ preprocess_rowmarks(PlannerInfo *root) if (parse->jointree) { - Bitmapset *baserels = get_base_rel_indexes((Node *) parse->jointree); + Bitmapset *baserels = get_relids_in_jointree((Node *) + parse->jointree, false); int x, num_rels = 0; bool dist_found = false; @@ -2410,7 +2580,7 @@ preprocess_rowmarks(PlannerInfo *root) * make a bitmapset of all base rels and then remove the items we don't * need or have FOR [KEY] UPDATE/SHARE marks for. */ - rels = get_base_rel_indexes((Node *) parse->jointree); + rels = get_relids_in_jointree((Node *) parse->jointree, false); if (parse->resultRelation) rels = bms_del_member(rels, parse->resultRelation); @@ -2513,17 +2683,8 @@ select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength) /* * We don't need a tuple lock, only the ability to re-fetch - * the row. Regular tables support ROW_MARK_REFERENCE, but if - * this RTE has security barrier quals, it will be turned into - * a subquery during planning, so use ROW_MARK_COPY. - * - * This is only necessary for LCS_NONE, since real tuple locks - * on an RTE with security barrier quals are supported by - * pushing the lock down into the subquery --- see - * expand_security_qual. + * the row. */ - if (rte->securityQuals != NIL) - return ROW_MARK_COPY; return ROW_MARK_REFERENCE; break; case LCS_FORKEYSHARE: @@ -3264,7 +3425,7 @@ extract_rollup_sets(List *groupingSets) /* * Reorder the elements of a list of grouping sets such that they have correct - * prefix relationships. + * prefix relationships. Also inserts the GroupingSetData annotations. * * The input must be ordered with smallest sets first; the result is returned * with largest sets first. Note that the result shares no list substructure @@ -3287,6 +3448,7 @@ reorder_grouping_sets(List *groupingsets, List *sortclause) { List *candidate = lfirst(lc); List *new_elems = list_difference_int(candidate, previous); + GroupingSetData *gs = makeNode(GroupingSetData); if (list_length(new_elems) > 0) { @@ -3314,7 +3476,8 @@ reorder_grouping_sets(List *groupingsets, List *sortclause) } } - result = lcons(list_copy(previous), result); + gs->set = list_copy(previous); + result = lcons(gs, result); list_free(new_elems); } @@ -3409,15 +3572,16 @@ standard_qp_callback(PlannerInfo *root, void *extra) * Estimate number of groups produced by grouping clauses (1 if not grouping) * * path_rows: number of output rows from scan/join step - * rollup_lists: list of grouping sets, or NIL if not doing grouping sets - * rollup_groupclauses: list of grouping clauses for grouping sets, - * or NIL if not doing grouping sets + * gsets: grouping set data, or NULL if not doing grouping sets + * + * If doing grouping sets, we also annotate the gsets data with the estimates + * for each set and each individual rollup list, with a view to later + * determining whether some combination of them could be hashed instead. */ static double get_number_of_groups(PlannerInfo *root, double path_rows, - List *rollup_lists, - List *rollup_groupclauses) + grouping_sets_data *gd) { Query *parse = root->parse; double dNumGroups; @@ -3429,28 +3593,62 @@ get_number_of_groups(PlannerInfo *root, if (parse->groupingSets) { /* Add up the estimates for each grouping set */ - ListCell *lc, - *lc2; + ListCell *lc; + ListCell *lc2; + + Assert(gd); /* keep Coverity happy */ dNumGroups = 0; - forboth(lc, rollup_groupclauses, lc2, rollup_lists) + + foreach(lc, gd->rollups) { - List *groupClause = (List *) lfirst(lc); - List *gsets = (List *) lfirst(lc2); - ListCell *lc3; + RollupData *rollup = lfirst(lc); + ListCell *lc; - groupExprs = get_sortgrouplist_exprs(groupClause, + groupExprs = get_sortgrouplist_exprs(rollup->groupClause, parse->targetList); - foreach(lc3, gsets) + rollup->numGroups = 0.0; + + forboth(lc, rollup->gsets, lc2, rollup->gsets_data) { - List *gset = (List *) lfirst(lc3); + List *gset = (List *) lfirst(lc); + GroupingSetData *gs = lfirst(lc2); + double numGroups = estimate_num_groups(root, + groupExprs, + path_rows, + &gset); + + gs->numGroups = numGroups; + rollup->numGroups += numGroups; + } - dNumGroups += estimate_num_groups(root, - groupExprs, - path_rows, - &gset); + dNumGroups += rollup->numGroups; + } + + if (gd->hash_sets_idx) + { + ListCell *lc; + + gd->dNumHashGroups = 0; + + groupExprs = get_sortgrouplist_exprs(parse->groupClause, + parse->targetList); + + forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets) + { + List *gset = (List *) lfirst(lc); + GroupingSetData *gs = lfirst(lc2); + double numGroups = estimate_num_groups(root, + groupExprs, + path_rows, + &gset); + + gs->numGroups = numGroups; + gd->dNumHashGroups += numGroups; } + + dNumGroups += gd->dNumHashGroups; } } else @@ -3486,6 +3684,11 @@ get_number_of_groups(PlannerInfo *root, * estimate_hashagg_tablesize * estimate the number of bytes that a hash aggregate hashtable will * require based on the agg_costs, path width and dNumGroups. + * + * XXX this may be over-estimating the size now that hashagg knows to omit + * unneeded columns from the hashtable. Also for mixed-mode grouping sets, + * grouping columns not in the hashed set are counted here even though hashagg + * won't store them. Is this a problem? */ static Size estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs, @@ -3502,6 +3705,12 @@ estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs, /* plus the per-hash-entry overhead */ hashentrysize += hash_agg_entry_size(agg_costs->numAggs); + /* + * Note that this disregards the effect of fill-factor and growth policy + * of the hash-table. That's probably ok, given default the default + * fill-factor is relatively high. It'd be hard to meaningfully factor in + * "double-in-size" growth policies here. + */ return hashentrysize * dNumGroups; } @@ -3530,8 +3739,7 @@ create_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, PathTarget *target, const AggClauseCosts *agg_costs, - List *rollup_lists, - List *rollup_groupclauses) + grouping_sets_data *gd) { Query *parse = root->parse; Path *cheapest_path = input_rel->cheapest_total_path; @@ -3558,8 +3766,8 @@ create_grouping_paths(PlannerInfo *root, * target list and HAVING quals are parallel-safe. */ if (input_rel->consider_parallel && - !has_parallel_hazard((Node *) target->exprs, false) && - !has_parallel_hazard((Node *) parse->havingQual, false)) + is_parallel_safe(root, (Node *) target->exprs) && + is_parallel_safe(root, (Node *) parse->havingQual)) grouped_rel->consider_parallel = true; /* @@ -3615,7 +3823,8 @@ create_grouping_paths(PlannerInfo *root, create_append_path(grouped_rel, paths, NULL, - 0); + 0, + NIL); path->pathtarget = target; } else @@ -3640,8 +3849,7 @@ create_grouping_paths(PlannerInfo *root, */ dNumGroups = get_number_of_groups(root, cheapest_path->rows, - rollup_lists, - rollup_groupclauses); + gd); /* * Determine whether it's possible to perform sort-based implementations @@ -3649,15 +3857,22 @@ create_grouping_paths(PlannerInfo *root, * grouping_is_sortable() is trivially true, and all the * pathkeys_contained_in() tests will succeed too, so that we'll consider * every surviving input path.) + * + * If we have grouping sets, we might be able to sort some but not all of + * them; in this case, we need can_sort to be true as long as we must + * consider any sorted-input plan. */ - can_sort = grouping_is_sortable(parse->groupClause); + can_sort = (gd && gd->rollups != NIL) + || grouping_is_sortable(parse->groupClause); /* * Determine whether we should consider hash-based implementations of * grouping. * - * Hashed aggregation only applies if we're grouping. We currently can't - * hash if there are grouping sets, though. + * Hashed aggregation only applies if we're grouping. If we have grouping + * sets, some groups might be hashable but others not; in this case we set + * can_hash true as long as there is nothing globally preventing us from + * hashing (and we should therefore consider plans with hashes). * * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY * aggregates. (Doing so would imply storing *all* the input values in @@ -3670,9 +3885,8 @@ create_grouping_paths(PlannerInfo *root, * other gating conditions, so we want to do it last. */ can_hash = (parse->groupClause != NIL && - parse->groupingSets == NIL && agg_costs->numOrderedAggs == 0 && - grouping_is_hashable(parse->groupClause)); + (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause))); /* * If grouped_rel->consider_parallel is true, then paths that we generate @@ -3770,8 +3984,7 @@ create_grouping_paths(PlannerInfo *root, /* Estimate number of partial groups. */ dNumPartialGroups = get_number_of_groups(root, cheapest_partial_path->rows, - NIL, - NIL); + gd); /* * Collect statistics about aggregates for estimating costs of @@ -3949,20 +4162,9 @@ create_grouping_paths(PlannerInfo *root, /* Now decide what to stick atop it */ if (parse->groupingSets) { - /* - * We have grouping sets, possibly with aggregation. Make - * a GroupingSetsPath. - */ - add_path(grouped_rel, (Path *) - create_groupingsets_path(root, - grouped_rel, - path, - target, - (List *) parse->havingQual, - rollup_lists, - rollup_groupclauses, - agg_costs, - dNumGroups)); + consider_groupingsets_paths(root, grouped_rel, + path, true, can_hash, target, + gd, agg_costs, dNumGroups); } else if (parse->hasAggs) { @@ -4007,8 +4209,7 @@ create_grouping_paths(PlannerInfo *root, /* * Now generate a complete GroupAgg Path atop of the cheapest partial - * path. We need only bother with the cheapest path here, as the - * output of Gather is never sorted. + * path. We can do this using either Gather or Gather Merge. */ if (grouped_rel->partial_pathlist) { @@ -4023,11 +4224,11 @@ create_grouping_paths(PlannerInfo *root, &total_groups); /* - * Gather is always unsorted, so we'll need to sort, unless - * there's no GROUP BY clause, in which case there will only be a - * single group. + * Since Gather's output is always unsorted, we'll need to sort, + * unless there's no GROUP BY clause or a degenerate (constant) + * one, in which case there will only be a single group. */ - if (parse->groupClause) + if (root->group_pathkeys) path = (Path *) create_sort_path(root, grouped_rel, path, @@ -4070,54 +4271,130 @@ create_grouping_paths(PlannerInfo *root, parse->groupClause, (List *) parse->havingQual, dNumGroups)); + + /* + * The point of using Gather Merge rather than Gather is that it + * can preserve the ordering of the input path, so there's no + * reason to try it unless (1) it's possible to produce more than + * one output row and (2) we want the output path to be ordered. + */ + if (parse->groupClause != NIL && root->group_pathkeys != NIL) + { + foreach(lc, grouped_rel->partial_pathlist) + { + Path *subpath = (Path *) lfirst(lc); + Path *gmpath; + double total_groups; + + /* + * It's useful to consider paths that are already properly + * ordered for Gather Merge, because those don't need a + * sort. It's also useful to consider the cheapest path, + * because sorting it in parallel and then doing Gather + * Merge may be better than doing an unordered Gather + * followed by a sort. But there's no point in + * considering non-cheapest paths that aren't already + * sorted correctly. + */ + if (path != subpath && + !pathkeys_contained_in(root->group_pathkeys, + subpath->pathkeys)) + continue; + + total_groups = subpath->rows * subpath->parallel_workers; + + gmpath = (Path *) + create_gather_merge_path(root, + grouped_rel, + subpath, + partial_grouping_target, + root->group_pathkeys, + NULL, + &total_groups); + + if (parse->hasAggs) + add_path(grouped_rel, (Path *) + create_agg_path(root, + grouped_rel, + gmpath, + target, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_FINAL_DESERIAL, + parse->groupClause, + (List *) parse->havingQual, + &agg_final_costs, + dNumGroups)); + else + add_path(grouped_rel, (Path *) + create_group_path(root, + grouped_rel, + gmpath, + target, + parse->groupClause, + (List *) parse->havingQual, + dNumGroups)); + } + } } } if (can_hash) { - hashaggtablesize = estimate_hashagg_tablesize(cheapest_path, - agg_costs, - dNumGroups); - - /* - * Provided that the estimated size of the hashtable does not exceed - * work_mem, we'll generate a HashAgg Path, although if we were unable - * to sort above, then we'd better generate a Path, so that we at - * least have one. - */ - if (hashaggtablesize < work_mem * 1024L || - grouped_rel->pathlist == NIL) + if (parse->groupingSets) { - /* Don't mess with the cheapest path directly. */ - Path *path = cheapest_path; - /* - * If the grouping can't be fully pushed down, we'll push down the - * first phase of the aggregate, and redistribute only the partial - * results. - * - * If if can be pushed down, disable construction of complex - * distributed paths. + * Try for a hash-only groupingsets path over unsorted input. */ - if (! can_push_down_grouping(root, parse, path)) - path = create_remotesubplan_path(root, path, NULL); - else - try_distributed_aggregation = false; + consider_groupingsets_paths(root, grouped_rel, + cheapest_path, false, true, target, + gd, agg_costs, dNumGroups); + } + else + { + hashaggtablesize = estimate_hashagg_tablesize(cheapest_path, + agg_costs, + dNumGroups); /* - * We just need an Agg over the cheapest-total input path, since - * input order won't matter. + * Provided that the estimated size of the hashtable does not + * exceed work_mem, we'll generate a HashAgg Path, although if we + * were unable to sort above, then we'd better generate a Path, so + * that we at least have one. */ - add_path(grouped_rel, (Path *) - create_agg_path(root, grouped_rel, - path, - target, - AGG_HASHED, - AGGSPLIT_SIMPLE, - parse->groupClause, - (List *) parse->havingQual, - agg_costs, - dNumGroups)); + if (hashaggtablesize < work_mem * 1024L || + grouped_rel->pathlist == NIL) + { + /* Don't mess with the cheapest path directly. */ + Path *path = cheapest_path; + + /* + * If the grouping can't be fully pushed down, we'll push down the + * first phase of the aggregate, and redistribute only the partial + * results. + * + * If if can be pushed down, disable construction of complex + * distributed paths. + */ + if (! can_push_down_grouping(root, parse, path)) + path = create_remotesubplan_path(root, path, NULL); + else + try_distributed_aggregation = false; + + /* + * We just need an Agg over the cheapest-total input path, + * since input order won't matter. + */ + add_path(grouped_rel, (Path *) + create_agg_path(root, grouped_rel, + path, + target, + AGG_HASHED, + AGGSPLIT_SIMPLE, + parse->groupClause, + (List *) parse->havingQual, + agg_costs, + dNumGroups)); + } } /* @@ -4203,8 +4480,7 @@ create_grouping_paths(PlannerInfo *root, /* Estimate number of partial groups. */ dNumPartialGroups = get_number_of_groups(root, cheapest_path->rows, - NIL, - NIL); + gd); /* * Collect statistics about aggregates for estimating costs of @@ -4242,7 +4518,7 @@ create_grouping_paths(PlannerInfo *root, bool is_sorted; is_sorted = pathkeys_contained_in(root->group_pathkeys, - path->pathkeys); + path->pathkeys); /* * XL: Can it happen that the cheapest path can't be pushed down, @@ -4640,10 +4916,357 @@ create_grouping_paths(PlannerInfo *root, /* Now choose the best path(s) */ set_cheapest(grouped_rel); + /* + * We've been using the partial pathlist for the grouped relation to hold + * partially aggregated paths, but that's actually a little bit bogus + * because it's unsafe for later planning stages -- like ordered_rel --- + * to get the idea that they can use these partial paths as if they didn't + * need a FinalizeAggregate step. Zap the partial pathlist at this stage + * so we don't get confused. + */ + grouped_rel->partial_pathlist = NIL; return grouped_rel; } + +/* + * For a given input path, consider the possible ways of doing grouping sets on + * it, by combinations of hashing and sorting. This can be called multiple + * times, so it's important that it not scribble on input. No result is + * returned, but any generated paths are added to grouped_rel. + */ +static void +consider_groupingsets_paths(PlannerInfo *root, + RelOptInfo *grouped_rel, + Path *path, + bool is_sorted, + bool can_hash, + PathTarget *target, + grouping_sets_data *gd, + const AggClauseCosts *agg_costs, + double dNumGroups) +{ + Query *parse = root->parse; + + /* + * If we're not being offered sorted input, then only consider plans that + * can be done entirely by hashing. + * + * We can hash everything if it looks like it'll fit in work_mem. But if + * the input is actually sorted despite not being advertised as such, we + * prefer to make use of that in order to use less memory. + * + * If none of the grouping sets are sortable, then ignore the work_mem + * limit and generate a path anyway, since otherwise we'll just fail. + */ + if (!is_sorted) + { + List *new_rollups = NIL; + RollupData *unhashed_rollup = NULL; + List *sets_data; + List *empty_sets_data = NIL; + List *empty_sets = NIL; + ListCell *lc; + ListCell *l_start = list_head(gd->rollups); + AggStrategy strat = AGG_HASHED; + Size hashsize; + double exclude_groups = 0.0; + + Assert(can_hash); + + if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys)) + { + unhashed_rollup = lfirst(l_start); + exclude_groups = unhashed_rollup->numGroups; + l_start = lnext(l_start); + } + + hashsize = estimate_hashagg_tablesize(path, + agg_costs, + dNumGroups - exclude_groups); + + /* + * gd->rollups is empty if we have only unsortable columns to work + * with. Override work_mem in that case; otherwise, we'll rely on the + * sorted-input case to generate usable mixed paths. + */ + if (hashsize > work_mem * 1024L && gd->rollups) + return; /* nope, won't fit */ + + /* + * We need to burst the existing rollups list into individual grouping + * sets and recompute a groupClause for each set. + */ + sets_data = list_copy(gd->unsortable_sets); + + for_each_cell(lc, l_start) + { + RollupData *rollup = lfirst(lc); + + /* + * If we find an unhashable rollup that's not been skipped by the + * "actually sorted" check above, we can't cope; we'd need sorted + * input (with a different sort order) but we can't get that here. + * So bail out; we'll get a valid path from the is_sorted case + * instead. + * + * The mere presence of empty grouping sets doesn't make a rollup + * unhashable (see preprocess_grouping_sets), we handle those + * specially below. + */ + if (!rollup->hashable) + return; + else + sets_data = list_concat(sets_data, list_copy(rollup->gsets_data)); + } + foreach(lc, sets_data) + { + GroupingSetData *gs = lfirst(lc); + List *gset = gs->set; + RollupData *rollup; + + if (gset == NIL) + { + /* Empty grouping sets can't be hashed. */ + empty_sets_data = lappend(empty_sets_data, gs); + empty_sets = lappend(empty_sets, NIL); + } + else + { + rollup = makeNode(RollupData); + + rollup->groupClause = preprocess_groupclause(root, gset); + rollup->gsets_data = list_make1(gs); + rollup->gsets = remap_to_groupclause_idx(rollup->groupClause, + rollup->gsets_data, + gd->tleref_to_colnum_map); + rollup->numGroups = gs->numGroups; + rollup->hashable = true; + rollup->is_hashed = true; + new_rollups = lappend(new_rollups, rollup); + } + } + + /* + * If we didn't find anything nonempty to hash, then bail. We'll + * generate a path from the is_sorted case. + */ + if (new_rollups == NIL) + return; + + /* + * If there were empty grouping sets they should have been in the + * first rollup. + */ + Assert(!unhashed_rollup || !empty_sets); + + if (unhashed_rollup) + { + new_rollups = lappend(new_rollups, unhashed_rollup); + strat = AGG_MIXED; + } + else if (empty_sets) + { + RollupData *rollup = makeNode(RollupData); + + rollup->groupClause = NIL; + rollup->gsets_data = empty_sets_data; + rollup->gsets = empty_sets; + rollup->numGroups = list_length(empty_sets); + rollup->hashable = false; + rollup->is_hashed = false; + new_rollups = lappend(new_rollups, rollup); + strat = AGG_MIXED; + } + + add_path(grouped_rel, (Path *) + create_groupingsets_path(root, + grouped_rel, + path, + target, + (List *) parse->havingQual, + strat, + new_rollups, + agg_costs, + dNumGroups)); + return; + } + + /* + * If we have sorted input but nothing we can do with it, bail. + */ + if (list_length(gd->rollups) == 0) + return; + + /* + * Given sorted input, we try and make two paths: one sorted and one mixed + * sort/hash. (We need to try both because hashagg might be disabled, or + * some columns might not be sortable.) + * + * can_hash is passed in as false if some obstacle elsewhere (such as + * ordered aggs) means that we shouldn't consider hashing at all. + */ + if (can_hash && gd->any_hashable) + { + List *rollups = NIL; + List *hash_sets = list_copy(gd->unsortable_sets); + double availspace = (work_mem * 1024.0); + ListCell *lc; + + /* + * Account first for space needed for groups we can't sort at all. + */ + availspace -= (double) estimate_hashagg_tablesize(path, + agg_costs, + gd->dNumHashGroups); + + if (availspace > 0 && list_length(gd->rollups) > 1) + { + double scale; + int num_rollups = list_length(gd->rollups); + int k_capacity; + int *k_weights = palloc(num_rollups * sizeof(int)); + Bitmapset *hash_items = NULL; + int i; + + /* + * We treat this as a knapsack problem: the knapsack capacity + * represents work_mem, the item weights are the estimated memory + * usage of the hashtables needed to implement a single rollup, + * and we really ought to use the cost saving as the item value; + * however, currently the costs assigned to sort nodes don't + * reflect the comparison costs well, and so we treat all items as + * of equal value (each rollup we hash instead saves us one sort). + * + * To use the discrete knapsack, we need to scale the values to a + * reasonably small bounded range. We choose to allow a 5% error + * margin; we have no more than 4096 rollups in the worst possible + * case, which with a 5% error margin will require a bit over 42MB + * of workspace. (Anyone wanting to plan queries that complex had + * better have the memory for it. In more reasonable cases, with + * no more than a couple of dozen rollups, the memory usage will + * be negligible.) + * + * k_capacity is naturally bounded, but we clamp the values for + * scale and weight (below) to avoid overflows or underflows (or + * uselessly trying to use a scale factor less than 1 byte). + */ + scale = Max(availspace / (20.0 * num_rollups), 1.0); + k_capacity = (int) floor(availspace / scale); + + /* + * We leave the first rollup out of consideration since it's the + * one that matches the input sort order. We assign indexes "i" + * to only those entries considered for hashing; the second loop, + * below, must use the same condition. + */ + i = 0; + for_each_cell(lc, lnext(list_head(gd->rollups))) + { + RollupData *rollup = lfirst(lc); + + if (rollup->hashable) + { + double sz = estimate_hashagg_tablesize(path, + agg_costs, + rollup->numGroups); + + /* + * If sz is enormous, but work_mem (and hence scale) is + * small, avoid integer overflow here. + */ + k_weights[i] = (int) Min(floor(sz / scale), + k_capacity + 1.0); + ++i; + } + } + + /* + * Apply knapsack algorithm; compute the set of items which + * maximizes the value stored (in this case the number of sorts + * saved) while keeping the total size (approximately) within + * capacity. + */ + if (i > 0) + hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL); + + if (!bms_is_empty(hash_items)) + { + rollups = list_make1(linitial(gd->rollups)); + + i = 0; + for_each_cell(lc, lnext(list_head(gd->rollups))) + { + RollupData *rollup = lfirst(lc); + + if (rollup->hashable) + { + if (bms_is_member(i, hash_items)) + hash_sets = list_concat(hash_sets, + list_copy(rollup->gsets_data)); + else + rollups = lappend(rollups, rollup); + ++i; + } + else + rollups = lappend(rollups, rollup); + } + } + } + + if (!rollups && hash_sets) + rollups = list_copy(gd->rollups); + + foreach(lc, hash_sets) + { + GroupingSetData *gs = lfirst(lc); + RollupData *rollup = makeNode(RollupData); + + Assert(gs->set != NIL); + + rollup->groupClause = preprocess_groupclause(root, gs->set); + rollup->gsets_data = list_make1(gs); + rollup->gsets = remap_to_groupclause_idx(rollup->groupClause, + rollup->gsets_data, + gd->tleref_to_colnum_map); + rollup->numGroups = gs->numGroups; + rollup->hashable = true; + rollup->is_hashed = true; + rollups = lcons(rollup, rollups); + } + + if (rollups) + { + add_path(grouped_rel, (Path *) + create_groupingsets_path(root, + grouped_rel, + path, + target, + (List *) parse->havingQual, + AGG_MIXED, + rollups, + agg_costs, + dNumGroups)); + } + } + + /* + * Now try the simple sorted case. + */ + if (!gd->unsortable_sets) + add_path(grouped_rel, (Path *) + create_groupingsets_path(root, + grouped_rel, + path, + target, + (List *) parse->havingQual, + AGG_SORTED, + gd->rollups, + agg_costs, + dNumGroups)); +} + /* * create_window_paths * @@ -4679,8 +5302,8 @@ create_window_paths(PlannerInfo *root, * target list and active windows for non-parallel-safe constructs. */ if (input_rel->consider_parallel && - !has_parallel_hazard((Node *) output_target->exprs, false) && - !has_parallel_hazard((Node *) activeWindows, false)) + is_parallel_safe(root, (Node *) output_target->exprs) && + is_parallel_safe(root, (Node *) activeWindows)) window_rel->consider_parallel = true; /* @@ -4807,9 +5430,8 @@ create_one_window_path(PlannerInfo *root, window_target = copy_pathtarget(window_target); foreach(lc2, wflists->windowFuncs[wc->winref]) { - WindowFunc *wfunc = (WindowFunc *) lfirst(lc2); + WindowFunc *wfunc = lfirst_node(WindowFunc, lc2); - Assert(IsA(wfunc, WindowFunc)); add_column_to_pathtarget(window_target, (Expr *) wfunc, 0); window_target->width += get_typavgwidth(wfunc->wintype, -1); } @@ -5096,7 +5718,7 @@ create_ordered_paths(PlannerInfo *root, * target list is parallel-safe. */ if (input_rel->consider_parallel && - !has_parallel_hazard((Node *) target->exprs, false)) + is_parallel_safe(root, (Node *) target->exprs)) ordered_rel->consider_parallel = true; /* @@ -5136,6 +5758,56 @@ create_ordered_paths(PlannerInfo *root, } /* + * generate_gather_paths() will have already generated a simple Gather + * path for the best parallel path, if any, and the loop above will have + * considered sorting it. Similarly, generate_gather_paths() will also + * have generated order-preserving Gather Merge plans which can be used + * without sorting if they happen to match the sort_pathkeys, and the loop + * above will have handled those as well. However, there's one more + * possibility: it may make sense to sort the cheapest partial path + * according to the required output order and then use Gather Merge. + */ + if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL && + input_rel->partial_pathlist != NIL) + { + Path *cheapest_partial_path; + + cheapest_partial_path = linitial(input_rel->partial_pathlist); + + /* + * If cheapest partial path doesn't need a sort, this is redundant + * with what's already been tried. + */ + if (!pathkeys_contained_in(root->sort_pathkeys, + cheapest_partial_path->pathkeys)) + { + Path *path; + double total_groups; + + path = (Path *) create_sort_path(root, + ordered_rel, + cheapest_partial_path, + root->sort_pathkeys, + -1.0); + + total_groups = cheapest_partial_path->rows * + cheapest_partial_path->parallel_workers; + path = (Path *) + create_gather_merge_path(root, ordered_rel, + path, + target, root->sort_pathkeys, NULL, + &total_groups); + + /* Add projection step if needed */ + if (path->pathtarget != target) + path = apply_projection_to_path(root, ordered_rel, + path, target); + + add_path(ordered_rel, path); + } + } + + /* * If there is an FDW that's responsible for all baserels of the query, * let it consider adding ForeignPaths. */ @@ -5713,7 +6385,7 @@ make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc, * bloat the sort dataset, and because it might cause unexpected output order * if the sort isn't stable. However there's a constraint on that: all SRFs * in the tlist should be evaluated at the same plan step, so that they can - * run in sync in ExecTargetList. So if any SRFs are in sort columns, we + * run in sync in nodeProjectSet. So if any SRFs are in sort columns, we * mustn't postpone any SRFs. (Note that in principle that policy should * probably get applied to the group/window input targetlists too, but we * have not done that historically.) Lastly, expensive expressions are @@ -5804,7 +6476,8 @@ make_sort_input_target(PlannerInfo *root, * Check for SRF or volatile functions. Check the SRF case first * because we must know whether we have any postponed SRFs. */ - if (expression_returns_set((Node *) expr)) + if (parse->hasTargetSRFs && + expression_returns_set((Node *) expr)) { /* We'll decide below whether these are postponable */ col_is_srf[i] = true; @@ -5843,6 +6516,7 @@ make_sort_input_target(PlannerInfo *root, { /* For sortgroupref cols, just check if any contain SRFs */ if (!have_srf_sortcols && + parse->hasTargetSRFs && expression_returns_set((Node *) expr)) have_srf_sortcols = true; } @@ -5952,6 +6626,109 @@ get_cheapest_fractional_path(RelOptInfo *rel, double tuple_fraction) } /* + * adjust_paths_for_srfs + * Fix up the Paths of the given upperrel to handle tSRFs properly. + * + * The executor can only handle set-returning functions that appear at the + * top level of the targetlist of a ProjectSet plan node. If we have any SRFs + * that are not at top level, we need to split up the evaluation into multiple + * plan levels in which each level satisfies this constraint. This function + * modifies each Path of an upperrel that (might) compute any SRFs in its + * output tlist to insert appropriate projection steps. + * + * The given targets and targets_contain_srfs lists are from + * split_pathtarget_at_srfs(). We assume the existing Paths emit the first + * target in targets. + */ +static void +adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel, + List *targets, List *targets_contain_srfs) +{ + ListCell *lc; + + Assert(list_length(targets) == list_length(targets_contain_srfs)); + Assert(!linitial_int(targets_contain_srfs)); + + /* If no SRFs appear at this plan level, nothing to do */ + if (list_length(targets) == 1) + return; + + /* + * Stack SRF-evaluation nodes atop each path for the rel. + * + * In principle we should re-run set_cheapest() here to identify the + * cheapest path, but it seems unlikely that adding the same tlist eval + * costs to all the paths would change that, so we don't bother. Instead, + * just assume that the cheapest-startup and cheapest-total paths remain + * so. (There should be no parameterized paths anymore, so we needn't + * worry about updating cheapest_parameterized_paths.) + */ + foreach(lc, rel->pathlist) + { + Path *subpath = (Path *) lfirst(lc); + Path *newpath = subpath; + ListCell *lc1, + *lc2; + + Assert(subpath->param_info == NULL); + forboth(lc1, targets, lc2, targets_contain_srfs) + { + PathTarget *thistarget = (PathTarget *) lfirst(lc1); + bool contains_srfs = (bool) lfirst_int(lc2); + + /* If this level doesn't contain SRFs, do regular projection */ + if (contains_srfs) + newpath = (Path *) create_set_projection_path(root, + rel, + newpath, + thistarget); + else + newpath = (Path *) apply_projection_to_path(root, + rel, + newpath, + thistarget); + } + lfirst(lc) = newpath; + if (subpath == rel->cheapest_startup_path) + rel->cheapest_startup_path = newpath; + if (subpath == rel->cheapest_total_path) + rel->cheapest_total_path = newpath; + } + + /* Likewise for partial paths, if any */ + foreach(lc, rel->partial_pathlist) + { + Path *subpath = (Path *) lfirst(lc); + Path *newpath = subpath; + ListCell *lc1, + *lc2; + + Assert(subpath->param_info == NULL); + forboth(lc1, targets, lc2, targets_contain_srfs) + { + PathTarget *thistarget = (PathTarget *) lfirst(lc1); + bool contains_srfs = (bool) lfirst_int(lc2); + + /* If this level doesn't contain SRFs, do regular projection */ + if (contains_srfs) + newpath = (Path *) create_set_projection_path(root, + rel, + newpath, + thistarget); + else + { + /* avoid apply_projection_to_path, in case of multiple refs */ + newpath = (Path *) create_projection_path(root, + rel, + newpath, + thistarget); + } + } + lfirst(lc) = newpath; + } +} + +/* * expression_planner * Perform planner's transformations on a standalone expression. * @@ -6050,7 +6827,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid) setup_simple_rel_arrays(root); /* Build RelOptInfo */ - rel = build_simple_rel(root, 1, RELOPT_BASEREL); + rel = build_simple_rel(root, 1, NULL); /* Locate IndexOptInfo for the target index */ indexInfo = NULL; @@ -6099,7 +6876,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid) indexScanPath = create_index_path(root, indexInfo, NIL, NIL, NIL, NIL, NIL, ForwardScanDirection, false, - NULL, 1.0); + NULL, 1.0, false); return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost); } @@ -6167,6 +6944,37 @@ grouping_distribution_match(PlannerInfo *root, Query *parse, Path *path, return matches_key; } +/* + * get_partitioned_child_rels + * Returns a list of the RT indexes of the partitioned child relations + * with rti as the root parent RT index. + * + * Note: Only call this function on RTEs known to be partitioned tables. + */ +List * +get_partitioned_child_rels(PlannerInfo *root, Index rti) +{ + List *result = NIL; + ListCell *l; + + foreach(l, root->pcinfo_list) + { + PartitionedChildRelInfo *pc = lfirst(l); + + if (pc->parent_relid == rti) + { + result = pc->child_rels; + break; + } + } + + /* The root partitioned table is included as a child rel */ + Assert(list_length(result) >= 1); + + return result; +} + + static bool groupingsets_distribution_match(PlannerInfo *root, Query *parse, Path *path) { diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index d5bc9e0760..398586e98a 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -5,7 +5,7 @@ * vars, compute regproc values for operators, etc * * Portions Copyright (c) 2012-2014, TransLattice, Inc. - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -123,10 +123,10 @@ static Var *search_indexed_tlist_for_var(Var *var, indexed_tlist *itlist, Index newvarno, int rtoffset); -static Var *search_indexed_tlist_for_non_var(Node *node, +static Var *search_indexed_tlist_for_non_var(Expr *node, indexed_tlist *itlist, Index newvarno); -static Var *search_indexed_tlist_for_sortgroupref(Node *node, +static Var *search_indexed_tlist_for_sortgroupref(Expr *node, Index sortgroupref, indexed_tlist *itlist, Index newvarno); @@ -240,11 +240,9 @@ set_plan_references(PlannerInfo *root, Plan *plan) */ foreach(lc, root->rowMarks) { - PlanRowMark *rc = (PlanRowMark *) lfirst(lc); + PlanRowMark *rc = lfirst_node(PlanRowMark, lc); PlanRowMark *newrc; - Assert(IsA(rc, PlanRowMark)); - /* flat copy is enough since all fields are scalars */ newrc = (PlanRowMark *) palloc(sizeof(PlanRowMark)); memcpy(newrc, rc, sizeof(PlanRowMark)); @@ -411,11 +409,11 @@ add_rte_to_flat_rtable(PlannerGlobal *glob, RangeTblEntry *rte) newrte->subquery = NULL; newrte->joinaliasvars = NIL; newrte->functions = NIL; + newrte->tablefunc = NULL; newrte->values_lists = NIL; - newrte->values_collations = NIL; - newrte->ctecoltypes = NIL; - newrte->ctecoltypmods = NIL; - newrte->ctecolcollations = NIL; + newrte->coltypes = NIL; + newrte->coltypmods = NIL; + newrte->colcollations = NIL; newrte->securityQuals = NIL; glob->finalrtable = lappend(glob->finalrtable, newrte); @@ -572,6 +570,19 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) fix_scan_list(root, splan->functions, rtoffset); } break; + case T_TableFuncScan: + { + TableFuncScan *splan = (TableFuncScan *) plan; + + splan->scan.scanrelid += rtoffset; + splan->scan.plan.targetlist = + fix_scan_list(root, splan->scan.plan.targetlist, rtoffset); + splan->scan.plan.qual = + fix_scan_list(root, splan->scan.plan.qual, rtoffset); + splan->tablefunc = (TableFunc *) + fix_scan_expr(root, (Node *) splan->tablefunc, rtoffset); + } + break; case T_ValuesScan: { ValuesScan *splan = (ValuesScan *) plan; @@ -596,6 +607,17 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) fix_scan_list(root, splan->scan.plan.qual, rtoffset); } break; + case T_NamedTuplestoreScan: + { + NamedTuplestoreScan *splan = (NamedTuplestoreScan *) plan; + + splan->scan.scanrelid += rtoffset; + splan->scan.plan.targetlist = + fix_scan_list(root, splan->scan.plan.targetlist, rtoffset); + splan->scan.plan.qual = + fix_scan_list(root, splan->scan.plan.qual, rtoffset); + } + break; case T_WorkTableScan: { WorkTableScan *splan = (WorkTableScan *) plan; @@ -650,6 +672,7 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) break; case T_Gather: + case T_GatherMerge: set_upper_references(root, plan, rtoffset); break; @@ -779,6 +802,9 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) fix_scan_expr(root, splan->resconstantqual, rtoffset); } break; + case T_ProjectSet: + set_upper_references(root, plan, rtoffset); + break; case T_ModifyTable: { ModifyTable *splan = (ModifyTable *) plan; @@ -865,6 +891,10 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) splan->nominalRelation += rtoffset; splan->exclRelRTI += rtoffset; + foreach(l, splan->partitioned_rels) + { + lfirst_int(l) += rtoffset; + } foreach(l, splan->resultRelations) { lfirst_int(l) += rtoffset; @@ -893,6 +923,27 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) root->glob->resultRelations = list_concat(root->glob->resultRelations, list_copy(splan->resultRelations)); + + /* + * If the main target relation is a partitioned table, the + * following list contains the RT indexes of partitioned child + * relations including the root, which are not included in the + * above list. We also keep RT indexes of the roots + * separately to be identitied as such during the executor + * initialization. + */ + if (splan->partitioned_rels != NIL) + { + root->glob->nonleafResultRelations = + list_concat(root->glob->nonleafResultRelations, + list_copy(splan->partitioned_rels)); + /* Remember where this root will be in the global list. */ + splan->rootResultRelIndex = + list_length(root->glob->rootResultRelations); + root->glob->rootResultRelations = + lappend_int(root->glob->rootResultRelations, + linitial_int(splan->partitioned_rels)); + } } break; case T_Append: @@ -905,6 +956,10 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) */ set_dummy_tlist_references(plan, rtoffset); Assert(splan->plan.qual == NIL); + foreach(l, splan->partitioned_rels) + { + lfirst_int(l) += rtoffset; + } foreach(l, splan->appendplans) { lfirst(l) = set_plan_refs(root, @@ -923,6 +978,10 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) */ set_dummy_tlist_references(plan, rtoffset); Assert(splan->plan.qual == NIL); + foreach(l, splan->partitioned_rels) + { + lfirst_int(l) += rtoffset; + } foreach(l, splan->mergeplans) { lfirst(l) = set_plan_refs(root, @@ -1452,7 +1511,7 @@ fix_param_node(PlannerInfo *root, Param *p) elog(ERROR, "unexpected PARAM_MULTIEXPR ID: %d", p->paramid); return copyObject(list_nth(params, colno - 1)); } - return copyObject(p); + return (Node *) copyObject(p); } /* @@ -1740,7 +1799,7 @@ set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset) if (tle->ressortgroupref != 0 && !IsA(tle->expr, Var)) { newexpr = (Node *) - search_indexed_tlist_for_sortgroupref((Node *) tle->expr, + search_indexed_tlist_for_sortgroupref(tle->expr, tle->ressortgroupref, subplan_itlist, OUTER_VAR); @@ -1823,7 +1882,7 @@ convert_combining_aggrefs(Node *node, void *context) */ child_agg->args = NIL; child_agg->aggfilter = NULL; - parent_agg = (Aggref *) copyObject(child_agg); + parent_agg = copyObject(child_agg); child_agg->args = orig_agg->args; child_agg->aggfilter = orig_agg->aggfilter; @@ -1872,6 +1931,19 @@ set_dummy_tlist_references(Plan *plan, int rtoffset) Var *oldvar = (Var *) tle->expr; Var *newvar; + /* + * As in search_indexed_tlist_for_non_var(), we prefer to keep Consts + * as Consts, not Vars referencing Consts. Here, there's no speed + * advantage to be had, but it makes EXPLAIN output look cleaner, and + * again it avoids confusing the executor. + */ + if (IsA(oldvar, Const)) + { + /* just reuse the existing TLE node */ + output_targetlist = lappend(output_targetlist, tle); + continue; + } + newvar = makeVar(OUTER_VAR, tle->resno, exprType((Node *) oldvar), @@ -2054,11 +2126,21 @@ search_indexed_tlist_for_var(Var *var, indexed_tlist *itlist, * so there's a correctness reason not to call it unless that's set. */ static Var * -search_indexed_tlist_for_non_var(Node *node, +search_indexed_tlist_for_non_var(Expr *node, indexed_tlist *itlist, Index newvarno) { TargetEntry *tle; + /* + * If it's a simple Const, replacing it with a Var is silly, even if there + * happens to be an identical Const below; a Var is more expensive to + * execute than a Const. What's more, replacing it could confuse some + * places in the executor that expect to see simple Consts for, eg, + * dropped columns. + */ + if (IsA(node, Const)) + return NULL; + tle = tlist_member(node, itlist->tlist); if (tle) { @@ -2085,7 +2167,7 @@ search_indexed_tlist_for_non_var(Node *node, * And it's also faster than search_indexed_tlist_for_non_var. */ static Var * -search_indexed_tlist_for_sortgroupref(Node *node, +search_indexed_tlist_for_sortgroupref(Expr *node, Index sortgroupref, indexed_tlist *itlist, Index newvarno) @@ -2219,7 +2301,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) /* See if the PlaceHolderVar has bubbled up from a lower plan node */ if (context->outer_itlist && context->outer_itlist->has_ph_vars) { - newvar = search_indexed_tlist_for_non_var((Node *) phv, + newvar = search_indexed_tlist_for_non_var((Expr *) phv, context->outer_itlist, OUTER_VAR); if (newvar) @@ -2227,7 +2309,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) } if (context->inner_itlist && context->inner_itlist->has_ph_vars) { - newvar = search_indexed_tlist_for_non_var((Node *) phv, + newvar = search_indexed_tlist_for_non_var((Expr *) phv, context->inner_itlist, INNER_VAR); if (newvar) @@ -2242,7 +2324,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) /* Try matching more complex expressions too, if tlists have any */ if (context->outer_itlist && context->outer_itlist->has_non_vars) { - newvar = search_indexed_tlist_for_non_var(node, + newvar = search_indexed_tlist_for_non_var((Expr *) node, context->outer_itlist, OUTER_VAR); if (newvar) @@ -2250,7 +2332,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) } if (context->inner_itlist && context->inner_itlist->has_non_vars) { - newvar = search_indexed_tlist_for_non_var(node, + newvar = search_indexed_tlist_for_non_var((Expr *) node, context->inner_itlist, INNER_VAR); if (newvar) @@ -2334,7 +2416,7 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context) /* See if the PlaceHolderVar has bubbled up from a lower plan node */ if (context->subplan_itlist->has_ph_vars) { - newvar = search_indexed_tlist_for_non_var((Node *) phv, + newvar = search_indexed_tlist_for_non_var((Expr *) phv, context->subplan_itlist, context->newvarno); if (newvar) @@ -2370,7 +2452,7 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context) /* Try matching more complex expressions too, if tlist has any */ if (context->subplan_itlist->has_non_vars) { - newvar = search_indexed_tlist_for_non_var(node, + newvar = search_indexed_tlist_for_non_var((Expr *) node, context->subplan_itlist, context->newvarno); if (newvar) @@ -2561,6 +2643,11 @@ extract_query_dependencies_walker(Node *node, PlannerInfo *context) if (rte->rtekind == RTE_RELATION) context->glob->relationOids = lappend_oid(context->glob->relationOids, rte->relid); + else if (rte->rtekind == RTE_NAMEDTUPLESTORE && + OidIsValid(rte->relid)) + context->glob->relationOids = + lappend_oid(context->glob->relationOids, + rte->relid); } /* And recurse into the query's subexpressions */ diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index bc2cbcee6b..d8545f2bdd 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -4,7 +4,7 @@ * Planning routines for subselects and parameters. * * Portions Copyright (c) 2012-2014, TransLattice, Inc. - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -85,6 +85,7 @@ static Bitmapset *finalize_plan(PlannerInfo *root, Bitmapset *valid_params, Bitmapset *scan_params); static bool finalize_primnode(Node *node, finalize_primnode_context *context); +static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context); /* @@ -127,7 +128,7 @@ assign_param_for_var(PlannerInfo *root, Var *var) } /* Nope, so make a new one */ - var = (Var *) copyObject(var); + var = copyObject(var); var->varlevelsup = 0; pitem = makeNode(PlannerParamItem); @@ -226,7 +227,7 @@ assign_param_for_placeholdervar(PlannerInfo *root, PlaceHolderVar *phv) } /* Nope, so make a new one */ - phv = (PlaceHolderVar *) copyObject(phv); + phv = copyObject(phv); if (phv->phlevelsup != 0) { IncrementVarSublevelsUp((Node *) phv, -((int) phv->phlevelsup), 0); @@ -318,7 +319,7 @@ replace_outer_agg(PlannerInfo *root, Aggref *agg) * It does not seem worthwhile to try to match duplicate outer aggs. Just * make a new slot every time. */ - agg = (Aggref *) copyObject(agg); + agg = copyObject(agg); IncrementVarSublevelsUp((Node *) agg, -((int) agg->agglevelsup), 0); Assert(agg->agglevelsup == 0); @@ -360,7 +361,7 @@ replace_outer_grouping(PlannerInfo *root, GroupingFunc *grp) * It does not seem worthwhile to try to match duplicate outer aggs. Just * make a new slot every time. */ - grp = (GroupingFunc *) copyObject(grp); + grp = copyObject(grp); IncrementVarSublevelsUp((Node *) grp, -((int) grp->agglevelsup), 0); Assert(grp->agglevelsup == 0); @@ -435,9 +436,8 @@ get_first_col_type(Plan *plan, Oid *coltype, int32 *coltypmod, /* In cases such as EXISTS, tlist might be empty; arbitrarily use VOID */ if (plan->targetlist) { - TargetEntry *tent = (TargetEntry *) linitial(plan->targetlist); + TargetEntry *tent = linitial_node(TargetEntry, plan->targetlist); - Assert(IsA(tent, TargetEntry)); if (!tent->resjunk) { *coltype = exprType((Node *) tent->expr); @@ -494,7 +494,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, * same sub-Query node, but the planner wants to scribble on the Query. * Try to clean this up when we do querytree redesign... */ - subquery = (Query *) copyObject(orig_subquery); + subquery = copyObject(orig_subquery); /* * If it's an EXISTS subplan, we might be able to simplify it. @@ -593,7 +593,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, List *paramIds; /* Make a second copy of the original subquery */ - subquery = (Query *) copyObject(orig_subquery); + subquery = copyObject(orig_subquery); /* and re-simplify */ simple_exists = simplify_EXISTS_query(root, subquery); Assert(simple_exists); @@ -625,13 +625,13 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, AlternativeSubPlan *asplan; /* OK, convert to SubPlan format. */ - hashplan = (SubPlan *) build_subplan(root, plan, subroot, - plan_params, - ANY_SUBLINK, 0, - newtestexpr, - false, true); + hashplan = castNode(SubPlan, + build_subplan(root, plan, subroot, + plan_params, + ANY_SUBLINK, 0, + newtestexpr, + false, true)); /* Check we got what we expected */ - Assert(IsA(hashplan, SubPlan)); Assert(hashplan->parParam == NIL); Assert(hashplan->useHashTable); /* build_subplan won't have filled in paramIds */ @@ -678,6 +678,7 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, &splan->firstColCollation); splan->useHashTable = false; splan->unknownEqFalse = unknownEqFalse; + splan->parallel_safe = plan->parallel_safe; splan->setParam = NIL; splan->parParam = NIL; splan->args = NIL; @@ -1256,6 +1257,13 @@ SS_process_ctes(PlannerInfo *root) &splan->firstColCollation); splan->useHashTable = false; splan->unknownEqFalse = false; + + /* + * CTE scans are not considered for parallelism (cf + * set_rel_consider_parallel), and even if they were, initPlans aren't + * parallel-safe. + */ + splan->parallel_safe = false; splan->setParam = NIL; splan->parParam = NIL; splan->args = NIL; @@ -1466,7 +1474,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, * Copy the subquery so we can modify it safely (see comments in * make_subplan). */ - subselect = (Query *) copyObject(subselect); + subselect = copyObject(subselect); /* * See if the subquery can be simplified based on the knowledge that it's @@ -1605,7 +1613,7 @@ simplify_EXISTS_query(PlannerInfo *root, Query *query) { /* * We don't try to simplify at all if the query uses set operations, - * aggregates, grouping sets, modifying CTEs, HAVING, OFFSET, or FOR + * aggregates, grouping sets, SRFs, modifying CTEs, HAVING, OFFSET, or FOR * UPDATE/SHARE; none of these seem likely in normal usage and their * possible effects are complex. (Note: we could ignore an "OFFSET 0" * clause, but that traditionally is used as an optimization fence, so we @@ -1616,6 +1624,7 @@ simplify_EXISTS_query(PlannerInfo *root, Query *query) query->hasAggs || query->groupingSets || query->hasWindowFuncs || + query->hasTargetSRFs || query->hasModifyingCTE || query->havingQual || query->limitOffset || @@ -1657,13 +1666,6 @@ simplify_EXISTS_query(PlannerInfo *root, Query *query) } /* - * Mustn't throw away the targetlist if it contains set-returning - * functions; those could affect whether zero rows are returned! - */ - if (expression_returns_set((Node *) query->targetList)) - return false; - - /* * Otherwise, we can throw away the targetlist, as well as any GROUP, * WINDOW, DISTINCT, and ORDER BY clauses; none of those clauses will * change a nonzero-rows result to zero rows or vice versa. (Furthermore, @@ -2177,11 +2179,13 @@ SS_identify_outer_params(PlannerInfo *root) } /* - * SS_charge_for_initplans - account for cost of initplans in Path costs + * SS_charge_for_initplans - account for initplans in Path costs & parallelism * * If any initPlans have been created in the current query level, they will * get attached to the Plan tree created from whichever Path we select from - * the given rel; so increment all the rel's Paths' costs to account for them. + * the given rel. Increment all that rel's Paths' costs to account for them, + * and make sure the paths get marked as parallel-unsafe, since we can't + * currently transmit initPlans to parallel workers. * * This is separate from SS_attach_initplans because we might conditionally * create more initPlans during create_plan(), depending on which Path we @@ -2213,7 +2217,7 @@ SS_charge_for_initplans(PlannerInfo *root, RelOptInfo *final_rel) } /* - * Now adjust the costs. + * Now adjust the costs and parallel_safe flags. */ foreach(lc, final_rel->pathlist) { @@ -2221,6 +2225,7 @@ SS_charge_for_initplans(PlannerInfo *root, RelOptInfo *final_rel) path->startup_cost += initplan_cost; path->total_cost += initplan_cost; + path->parallel_safe = false; } /* We needn't do set_cheapest() here, caller will do it */ @@ -2471,6 +2476,12 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, } break; + case T_TableFuncScan: + finalize_primnode((Node *) ((TableFuncScan *) plan)->tablefunc, + &context); + context.paramids = bms_add_members(context.paramids, scan_params); + break; + case T_ValuesScan: finalize_primnode((Node *) ((ValuesScan *) plan)->values_lists, &context); @@ -2520,6 +2531,10 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, context.paramids = bms_add_members(context.paramids, scan_params); break; + case T_NamedTuplestoreScan: + context.paramids = bms_add_members(context.paramids, scan_params); + break; + case T_ForeignScan: { ForeignScan *fscan = (ForeignScan *) plan; @@ -2719,6 +2734,29 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, locally_added_param); break; + case T_Agg: + { + Agg *agg = (Agg *) plan; + + /* + * AGG_HASHED plans need to know which Params are referenced + * in aggregate calls. Do a separate scan to identify them. + */ + if (agg->aggstrategy == AGG_HASHED) + { + finalize_primnode_context aggcontext; + + aggcontext.root = root; + aggcontext.paramids = NULL; + finalize_agg_primnode((Node *) agg->plan.targetlist, + &aggcontext); + finalize_agg_primnode((Node *) agg->plan.qual, + &aggcontext); + agg->aggParams = aggcontext.paramids; + } + } + break; + case T_WindowAgg: finalize_primnode(((WindowAgg *) plan)->startOffset, &context); @@ -2726,14 +2764,16 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, &context); break; + case T_ProjectSet: case T_Hash: - case T_Agg: case T_Material: case T_Sort: case T_Unique: case T_Gather: + case T_GatherMerge: case T_SetOp: case T_Group: + /* no node-type-specific fields need fixing */ break; default: @@ -2879,6 +2919,29 @@ finalize_primnode(Node *node, finalize_primnode_context *context) } /* + * finalize_agg_primnode: find all Aggref nodes in the given expression tree, + * and add IDs of all PARAM_EXEC params appearing within their aggregated + * arguments to the result set. + */ +static bool +finalize_agg_primnode(Node *node, finalize_primnode_context *context) +{ + if (node == NULL) + return false; + if (IsA(node, Aggref)) + { + Aggref *agg = (Aggref *) node; + + /* we should not consider the direct arguments, if any */ + finalize_primnode((Node *) agg->args, context); + finalize_primnode((Node *) agg->aggfilter, context); + return false; /* there can't be any Aggrefs below here */ + } + return expression_tree_walker(node, finalize_agg_primnode, + (void *) context); +} + +/* * SS_make_initplan_output_param - make a Param for an initPlan's output * * The plan is expected to return a scalar value of the given type/collation. diff --git a/src/backend/optimizer/prep/Makefile b/src/backend/optimizer/prep/Makefile index 5195d9b0ba..86301bfbd3 100644 --- a/src/backend/optimizer/prep/Makefile +++ b/src/backend/optimizer/prep/Makefile @@ -12,6 +12,6 @@ subdir = src/backend/optimizer/prep top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = prepjointree.o prepqual.o prepsecurity.o preptlist.o prepunion.o +OBJS = prepjointree.o prepqual.o preptlist.o prepunion.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 5fa672d02c..41a930428f 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -12,7 +12,7 @@ * reduce_outer_joins * * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -916,6 +916,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, subroot->processed_tlist = NIL; subroot->grouping_map = NULL; subroot->minmax_aggs = NIL; + subroot->qual_security_level = 0; subroot->hasInheritedTarget = false; subroot->hasRecursion = false; subroot->wt_param_id = -1; @@ -1121,6 +1122,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, case RTE_SUBQUERY: case RTE_FUNCTION: case RTE_VALUES: + case RTE_TABLEFUNC: child_rte->lateral = true; break; case RTE_JOIN: @@ -1128,6 +1130,7 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, #ifdef XCP case RTE_REMOTE_DUMMY: #endif + case RTE_NAMEDTUPLESTORE: /* these can't contain any lateral references */ break; } @@ -1194,9 +1197,12 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, */ parse->hasSubLinks |= subquery->hasSubLinks; + /* If subquery had any RLS conditions, now main query does too */ + parse->hasRowSecurity |= subquery->hasRowSecurity; + /* - * subquery won't be pulled up if it hasAggs or hasWindowFuncs, so no work - * needed on those flags + * subquery won't be pulled up if it hasAggs, hasWindowFuncs, or + * hasTargetSRFs, so no work needed on those flags */ /* @@ -1413,8 +1419,7 @@ is_simple_subquery(Query *subquery, RangeTblEntry *rte, * Let's just make sure it's a valid subselect ... */ if (!IsA(subquery, Query) || - subquery->commandType != CMD_SELECT || - subquery->utilityStmt != NULL) + subquery->commandType != CMD_SELECT) elog(ERROR, "subquery is bogus"); /* @@ -1426,8 +1431,8 @@ is_simple_subquery(Query *subquery, RangeTblEntry *rte, return false; /* - * Can't pull up a subquery involving grouping, aggregation, sorting, - * limiting, or WITH. (XXX WITH could possibly be allowed later) + * Can't pull up a subquery involving grouping, aggregation, SRFs, + * sorting, limiting, or WITH. (XXX WITH could possibly be allowed later) * * We also don't pull up a subquery that has explicit FOR UPDATE/SHARE * clauses, because pullup would cause the locking to occur semantically @@ -1437,6 +1442,7 @@ is_simple_subquery(Query *subquery, RangeTblEntry *rte, */ if (subquery->hasAggs || subquery->hasWindowFuncs || + subquery->hasTargetSRFs || subquery->groupClause || subquery->groupingSets || subquery->havingQual || @@ -1550,15 +1556,6 @@ is_simple_subquery(Query *subquery, RangeTblEntry *rte, } /* - * Don't pull up a subquery that has any set-returning functions in its - * targetlist. Otherwise we might well wind up inserting set-returning - * functions into places where they mustn't go, such as quals of higher - * queries. This also ensures deletion of an empty jointree is valid. - */ - if (expression_returns_set((Node *) subquery->targetList)) - return false; - - /* * Don't pull up a subquery that has any volatile functions in its * targetlist. Otherwise we might introduce multiple evaluations of these * functions, if they get copied to multiple places in the upper query, @@ -1603,7 +1600,7 @@ pull_up_simple_values(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte) * Need a modifiable copy of the VALUES list to hack on, just in case it's * multiply referenced. */ - values_list = (List *) copyObject(linitial(rte->values_lists)); + values_list = copyObject(linitial(rte->values_lists)); /* * The VALUES RTE can't contain any Vars of level zero, let alone any that @@ -1756,15 +1753,13 @@ is_simple_union_all(Query *subquery) /* Let's just make sure it's a valid subselect ... */ if (!IsA(subquery, Query) || - subquery->commandType != CMD_SELECT || - subquery->utilityStmt != NULL) + subquery->commandType != CMD_SELECT) elog(ERROR, "subquery is bogus"); /* Is it a set-operation query at all? */ - topop = (SetOperationStmt *) subquery->setOperations; + topop = castNode(SetOperationStmt, subquery->setOperations); if (!topop) return false; - Assert(IsA(topop, SetOperationStmt)); /* Can't handle ORDER BY, LIMIT/OFFSET, locking, or WITH */ if (subquery->sortClause || @@ -1978,6 +1973,11 @@ replace_vars_in_jointree(Node *jtnode, pullup_replace_vars((Node *) rte->functions, context); break; + case RTE_TABLEFUNC: + rte->tablefunc = (TableFunc *) + pullup_replace_vars((Node *) rte->tablefunc, + context); + break; case RTE_VALUES: rte->values_lists = (List *) pullup_replace_vars((Node *) rte->values_lists, @@ -1988,6 +1988,7 @@ replace_vars_in_jointree(Node *jtnode, #ifdef XCP case RTE_REMOTE_DUMMY: #endif + case RTE_NAMEDTUPLESTORE: /* these shouldn't be marked LATERAL */ Assert(false); break; @@ -2146,7 +2147,7 @@ pullup_replace_vars_callback(Var *var, varattno); /* Make a copy of the tlist item to return */ - newnode = copyObject(tle->expr); + newnode = (Node *) copyObject(tle->expr); /* Insert PlaceHolderVar if needed */ if (rcon->need_phvs) @@ -2346,8 +2347,8 @@ flatten_simple_union_all(PlannerInfo *root) RangeTblRef *rtr; /* Shouldn't be called unless query has setops */ - topop = (SetOperationStmt *) parse->setOperations; - Assert(topop && IsA(topop, SetOperationStmt)); + topop = castNode(SetOperationStmt, parse->setOperations); + Assert(topop); /* Can't optimize away a recursive UNION */ if (root->hasRecursion) diff --git a/src/backend/optimizer/prep/prepqual.c b/src/backend/optimizer/prep/prepqual.c index 0cc8856732..f75b3274ad 100644 --- a/src/backend/optimizer/prep/prepqual.c +++ b/src/backend/optimizer/prep/prepqual.c @@ -19,7 +19,7 @@ * tree after local transformations that might introduce nested AND/ORs. * * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/optimizer/prep/prepsecurity.c b/src/backend/optimizer/prep/prepsecurity.c deleted file mode 100644 index 01eddf0fbf..0000000000 --- a/src/backend/optimizer/prep/prepsecurity.c +++ /dev/null @@ -1,486 +0,0 @@ -/*------------------------------------------------------------------------- - * - * prepsecurity.c - * Routines for preprocessing security barrier quals. - * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * src/backend/optimizer/prep/prepsecurity.c - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - -#include "access/heapam.h" -#include "access/sysattr.h" -#include "catalog/heap.h" -#include "nodes/makefuncs.h" -#include "nodes/nodeFuncs.h" -#include "optimizer/prep.h" -#include "parser/analyze.h" -#include "parser/parsetree.h" -#include "rewrite/rewriteManip.h" -#include "utils/rel.h" - - -typedef struct -{ - int rt_index; /* Index of security barrier RTE */ - int sublevels_up; /* Current nesting depth */ - Relation rel; /* RTE relation at rt_index */ - List *targetlist; /* Targetlist for new subquery RTE */ - List *colnames; /* Column names in subquery RTE */ - List *vars_processed; /* List of Vars already processed */ -} security_barrier_replace_vars_context; - -static void expand_security_qual(PlannerInfo *root, List *tlist, int rt_index, - RangeTblEntry *rte, Node *qual, bool targetRelation); - -static void security_barrier_replace_vars(Node *node, - security_barrier_replace_vars_context *context); - -static bool security_barrier_replace_vars_walker(Node *node, - security_barrier_replace_vars_context *context); - - -/* - * expand_security_quals - - * expands any security barrier quals on RTEs in the query rtable, turning - * them into security barrier subqueries. - * - * Any given RTE may have multiple security barrier quals in a list, from which - * we create a set of nested subqueries to isolate each security barrier from - * the others, providing protection against malicious user-defined security - * barriers. The first security barrier qual in the list will be used in the - * innermost subquery. - * - * In practice, the only RTEs that will have security barrier quals are those - * that refer to tables with row-level security, or which are the target - * relation of an update to an auto-updatable security barrier view. RTEs - * that read from a security barrier view will have already been expanded by - * the rewriter. - */ -void -expand_security_quals(PlannerInfo *root, List *tlist) -{ - Query *parse = root->parse; - int rt_index; - ListCell *cell; - - /* - * Process each RTE in the rtable list. - * - * We only ever modify entries in place and append to the rtable, so it is - * safe to use a foreach loop here. - */ - rt_index = 0; - foreach(cell, parse->rtable) - { - bool targetRelation = false; - RangeTblEntry *rte = (RangeTblEntry *) lfirst(cell); - - rt_index++; - - if (rte->securityQuals == NIL) - continue; - - /* - * Ignore any RTEs that aren't used in the query (such RTEs may be - * present for permissions checks). - */ - if (rt_index != parse->resultRelation && - !rangeTableEntry_used((Node *) parse, rt_index, 0)) - continue; - - /* - * If this RTE is the target then we need to make a copy of it before - * expanding it. The unexpanded copy will become the new target, and - * the original RTE will be expanded to become the source of rows to - * update/delete. - */ - if (rt_index == parse->resultRelation) - { - RangeTblEntry *newrte = copyObject(rte); - - /* - * We need to let expand_security_qual know if this is the target - * relation, as it has additional work to do in that case. - * - * Capture that information here as we're about to replace - * parse->resultRelation. - */ - targetRelation = true; - - parse->rtable = lappend(parse->rtable, newrte); - parse->resultRelation = list_length(parse->rtable); - - /* - * Wipe out any copied security barrier quals on the new target to - * prevent infinite recursion. - */ - newrte->securityQuals = NIL; - - /* - * There's no need to do permissions checks twice, so wipe out the - * permissions info for the original RTE (we prefer to keep the - * bits set on the result RTE). - */ - rte->requiredPerms = 0; - rte->checkAsUser = InvalidOid; - rte->selectedCols = NULL; - rte->insertedCols = NULL; - rte->updatedCols = NULL; - - /* - * For the most part, Vars referencing the original relation - * should remain as they are, meaning that they pull OLD values - * from the expanded RTE. But in the RETURNING list and in any - * WITH CHECK OPTION quals, we want such Vars to represent NEW - * values, so change them to reference the new RTE. - */ - ChangeVarNodes((Node *) parse->returningList, rt_index, - parse->resultRelation, 0); - - ChangeVarNodes((Node *) parse->withCheckOptions, rt_index, - parse->resultRelation, 0); - } - - /* - * Process each security barrier qual in turn, starting with the - * innermost one (the first in the list) and working outwards. - * - * We remove each qual from the list before processing it, so that its - * variables aren't modified by expand_security_qual. Also we don't - * necessarily want the attributes referred to by the qual to be - * exposed by the newly built subquery. - */ - while (rte->securityQuals != NIL) - { - Node *qual = (Node *) linitial(rte->securityQuals); - - rte->securityQuals = list_delete_first(rte->securityQuals); - - ChangeVarNodes(qual, rt_index, 1, 0); - expand_security_qual(root, tlist, rt_index, rte, qual, - targetRelation); - } - } -} - - -/* - * expand_security_qual - - * expand the specified security barrier qual on a query RTE, turning the - * RTE into a security barrier subquery. - */ -static void -expand_security_qual(PlannerInfo *root, List *tlist, int rt_index, - RangeTblEntry *rte, Node *qual, bool targetRelation) -{ - Query *parse = root->parse; - Oid relid = rte->relid; - Query *subquery; - RangeTblEntry *subrte; - RangeTblRef *subrtr; - PlanRowMark *rc; - security_barrier_replace_vars_context context; - ListCell *cell; - - /* - * There should only be 2 possible cases: - * - * 1. A relation RTE, which we turn into a subquery RTE containing all - * referenced columns. - * - * 2. A subquery RTE (either from a prior call to this function or from an - * expanded view). In this case we build a new subquery on top of it to - * isolate this security barrier qual from any other quals. - */ - switch (rte->rtekind) - { - case RTE_RELATION: - - /* - * Turn the relation RTE into a security barrier subquery RTE, - * moving all permissions checks down into the subquery. - */ - subquery = makeNode(Query); - subquery->commandType = CMD_SELECT; - subquery->querySource = QSRC_INSTEAD_RULE; - - subrte = copyObject(rte); - subrte->inFromCl = true; - subrte->securityQuals = NIL; - subquery->rtable = list_make1(subrte); - - subrtr = makeNode(RangeTblRef); - subrtr->rtindex = 1; - subquery->jointree = makeFromExpr(list_make1(subrtr), qual); - subquery->hasSubLinks = checkExprHasSubLink(qual); - - rte->rtekind = RTE_SUBQUERY; - rte->relid = InvalidOid; - rte->subquery = subquery; - rte->security_barrier = true; - rte->inh = false; /* must not be set for a subquery */ - - /* the permissions checks have now been moved down */ - rte->requiredPerms = 0; - rte->checkAsUser = InvalidOid; - rte->selectedCols = NULL; - rte->insertedCols = NULL; - rte->updatedCols = NULL; - - /* - * Now deal with any PlanRowMark on this RTE by requesting a lock - * of the same strength on the RTE copied down to the subquery. - * - * Note that we can only push down user-defined quals if they are - * only using leakproof (and therefore trusted) functions and - * operators. As a result, we may end up locking more rows than - * strictly necessary (and, in the worst case, we could end up - * locking all rows which pass the securityQuals). This is - * currently documented behavior, but it'd be nice to come up with - * a better solution some day. - */ - rc = get_plan_rowmark(root->rowMarks, rt_index); - if (rc != NULL) - { - if (rc->strength != LCS_NONE) - applyLockingClause(subquery, 1, rc->strength, - rc->waitPolicy, false); - root->rowMarks = list_delete_ptr(root->rowMarks, rc); - } - - /* - * When we are replacing the target relation with a subquery, we - * need to make sure to add a locking clause explicitly to the - * generated subquery since there won't be any row marks against - * the target relation itself. - */ - if (targetRelation) - applyLockingClause(subquery, 1, LCS_FORUPDATE, - LockWaitBlock, false); - - /* - * Replace any variables in the outer query that refer to the - * original relation RTE with references to columns that we will - * expose in the new subquery, building the subquery's targetlist - * as we go. Also replace any references in the translated_vars - * lists of any appendrels. - */ - context.rt_index = rt_index; - context.sublevels_up = 0; - context.rel = heap_open(relid, NoLock); - context.targetlist = NIL; - context.colnames = NIL; - context.vars_processed = NIL; - - security_barrier_replace_vars((Node *) parse, &context); - security_barrier_replace_vars((Node *) tlist, &context); - security_barrier_replace_vars((Node *) root->append_rel_list, - &context); - - heap_close(context.rel, NoLock); - - /* Now we know what columns the subquery needs to expose */ - rte->subquery->targetList = context.targetlist; - rte->eref = makeAlias(rte->eref->aliasname, context.colnames); - - break; - - case RTE_SUBQUERY: - - /* - * Build a new subquery that includes all the same columns as the - * original subquery. - */ - subquery = makeNode(Query); - subquery->commandType = CMD_SELECT; - subquery->querySource = QSRC_INSTEAD_RULE; - subquery->targetList = NIL; - - foreach(cell, rte->subquery->targetList) - { - TargetEntry *tle; - Var *var; - - tle = (TargetEntry *) lfirst(cell); - var = makeVarFromTargetEntry(1, tle); - - tle = makeTargetEntry((Expr *) var, - list_length(subquery->targetList) + 1, - pstrdup(tle->resname), - tle->resjunk); - subquery->targetList = lappend(subquery->targetList, tle); - } - - subrte = makeNode(RangeTblEntry); - subrte->rtekind = RTE_SUBQUERY; - subrte->subquery = rte->subquery; - subrte->security_barrier = rte->security_barrier; - subrte->eref = copyObject(rte->eref); - subrte->inFromCl = true; - subquery->rtable = list_make1(subrte); - - subrtr = makeNode(RangeTblRef); - subrtr->rtindex = 1; - subquery->jointree = makeFromExpr(list_make1(subrtr), qual); - subquery->hasSubLinks = checkExprHasSubLink(qual); - - rte->subquery = subquery; - rte->security_barrier = true; - - break; - - default: - elog(ERROR, "invalid range table entry for security barrier qual"); - } -} - - -/* - * security_barrier_replace_vars - - * Apply security barrier variable replacement to an expression tree. - * - * This also builds/updates a targetlist with entries for each replacement - * variable that needs to be exposed by the security barrier subquery RTE. - * - * NOTE: although this has the form of a walker, we cheat and modify the - * nodes in-place. The given expression tree should have been copied - * earlier to ensure that no unwanted side-effects occur! - */ -static void -security_barrier_replace_vars(Node *node, - security_barrier_replace_vars_context *context) -{ - /* - * Must be prepared to start with a Query or a bare expression tree; if - * it's a Query, go straight to query_tree_walker to make sure that - * sublevels_up doesn't get incremented prematurely. - */ - if (node && IsA(node, Query)) - query_tree_walker((Query *) node, - security_barrier_replace_vars_walker, - (void *) context, 0); - else - security_barrier_replace_vars_walker(node, context); -} - -static bool -security_barrier_replace_vars_walker(Node *node, - security_barrier_replace_vars_context *context) -{ - if (node == NULL) - return false; - - if (IsA(node, Var)) - { - Var *var = (Var *) node; - - /* - * Note that the same Var may be present in different lists, so we - * need to take care not to process it multiple times. - */ - if (var->varno == context->rt_index && - var->varlevelsup == context->sublevels_up && - !list_member_ptr(context->vars_processed, var)) - { - /* - * Found a matching variable. Make sure that it is in the subquery - * targetlist and map its attno accordingly. - */ - AttrNumber attno; - ListCell *l; - TargetEntry *tle; - char *attname; - Var *newvar; - - /* Search for the base attribute in the subquery targetlist */ - attno = InvalidAttrNumber; - foreach(l, context->targetlist) - { - tle = (TargetEntry *) lfirst(l); - attno++; - - Assert(IsA(tle->expr, Var)); - if (((Var *) tle->expr)->varattno == var->varattno && - ((Var *) tle->expr)->varcollid == var->varcollid) - { - /* Map the variable onto this subquery targetlist entry */ - var->varattno = var->varoattno = attno; - /* Mark this var as having been processed */ - context->vars_processed = lappend(context->vars_processed, var); - return false; - } - } - - /* Not in the subquery targetlist, so add it. Get its name. */ - if (var->varattno < 0) - { - Form_pg_attribute att_tup; - - att_tup = SystemAttributeDefinition(var->varattno, - context->rel->rd_rel->relhasoids); - attname = NameStr(att_tup->attname); - } - else if (var->varattno == InvalidAttrNumber) - { - attname = "wholerow"; - } - else if (var->varattno <= context->rel->rd_att->natts) - { - Form_pg_attribute att_tup; - - att_tup = context->rel->rd_att->attrs[var->varattno - 1]; - attname = NameStr(att_tup->attname); - } - else - { - elog(ERROR, "invalid attribute number %d in security_barrier_replace_vars", var->varattno); - } - - /* New variable for subquery targetlist */ - newvar = copyObject(var); - newvar->varno = newvar->varnoold = 1; - newvar->varlevelsup = 0; - - attno = list_length(context->targetlist) + 1; - tle = makeTargetEntry((Expr *) newvar, - attno, - pstrdup(attname), - false); - - context->targetlist = lappend(context->targetlist, tle); - - context->colnames = lappend(context->colnames, - makeString(pstrdup(attname))); - - /* Update the outer query's variable */ - var->varattno = var->varoattno = attno; - - /* Remember this Var so that we don't process it again */ - context->vars_processed = lappend(context->vars_processed, var); - } - return false; - } - - if (IsA(node, Query)) - { - /* Recurse into subselects */ - bool result; - - context->sublevels_up++; - result = query_tree_walker((Query *) node, - security_barrier_replace_vars_walker, - (void *) context, 0); - context->sublevels_up--; - return result; - } - - return expression_tree_walker(node, security_barrier_replace_vars_walker, - (void *) context); -} diff --git a/src/backend/optimizer/prep/preptlist.c b/src/backend/optimizer/prep/preptlist.c index 64cd7262d0..4d47272781 100644 --- a/src/backend/optimizer/prep/preptlist.c +++ b/src/backend/optimizer/prep/preptlist.c @@ -27,7 +27,7 @@ * that because it's faster in typical non-inherited cases. * * Portions Copyright (c) 2012-2014, TransLattice, Inc. - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -301,7 +301,7 @@ preprocess_targetlist(PlannerInfo *root, List *tlist) var->varno == result_relation) continue; /* don't need it */ - if (tlist_member((Node *) var, tlist)) + if (tlist_member((Expr *) var, tlist)) continue; /* already got it */ tle = makeTargetEntry((Expr *) var, diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 2522636392..66c684c065 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -18,7 +18,7 @@ * * * Portions Copyright (c) 2012-2014, TransLattice, Inc. - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -57,7 +57,6 @@ typedef struct { PlannerInfo *root; AppendRelInfo *appinfo; - int sublevels_up; } adjust_appendrel_attrs_context; static Path *recurse_set_operations(Node *setOp, PlannerInfo *root, @@ -131,7 +130,7 @@ RelOptInfo * plan_set_operations(PlannerInfo *root) { Query *parse = root->parse; - SetOperationStmt *topop = (SetOperationStmt *) parse->setOperations; + SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations); Node *node; RangeTblEntry *leftmostRTE; Query *leftmostQuery; @@ -139,7 +138,7 @@ plan_set_operations(PlannerInfo *root) Path *path; List *top_tlist; - Assert(topop && IsA(topop, SetOperationStmt)); + Assert(topop); /* check for unsupported stuff */ Assert(parse->jointree->fromlist == NIL); @@ -273,7 +272,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, * used for much here, but it carries the subroot data structures * forward to setrefs.c processing. */ - rel = build_simple_rel(root, rtr->rtindex, RELOPT_BASEREL); + rel = build_simple_rel(root, rtr->rtindex, NULL); /* plan_params should not be in use in current query level */ Assert(root->plan_params == NIL); @@ -345,6 +344,16 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, * Estimate number of groups if caller wants it. If the subquery used * grouping or aggregation, its output is probably mostly unique * anyway; otherwise do statistical estimation. + * + * XXX you don't really want to know about this: we do the estimation + * using the subquery's original targetlist expressions, not the + * subroot->processed_tlist which might seem more appropriate. The + * reason is that if the subquery is itself a setop, it may return a + * processed_tlist containing "varno 0" Vars generated by + * generate_append_tlist, and those would confuse estimate_num_groups + * mightily. We ought to get rid of the "varno 0" hack, but that + * requires a redesign of the parsetree representation of setops, so + * that there can be an RTE corresponding to each setop's output. */ if (pNumGroups) { @@ -354,7 +363,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, *pNumGroups = subpath->rows; else *pNumGroups = estimate_num_groups(subroot, - get_tlist_exprs(subroot->processed_tlist, false), + get_tlist_exprs(subquery->targetList, false), subpath->rows, NULL); } @@ -635,7 +644,7 @@ generate_union_path(SetOperationStmt *op, PlannerInfo *root, /* * Append the child results together. */ - path = (Path *) create_append_path(result_rel, pathlist, NULL, 0); + path = (Path *) create_append_path(result_rel, pathlist, NULL, 0, NIL); /* We have to manually jam the right tlist into the path; ick */ path->pathtarget = create_pathtarget(root, tlist); @@ -747,7 +756,7 @@ generate_nonunion_path(SetOperationStmt *op, PlannerInfo *root, /* * Append the child results together. */ - path = (Path *) create_append_path(result_rel, pathlist, NULL, 0); + path = (Path *) create_append_path(result_rel, pathlist, NULL, 0, NIL); /* We have to manually jam the right tlist into the path; ick */ path->pathtarget = create_pathtarget(root, tlist); @@ -1343,7 +1352,7 @@ generate_append_tlist(List *colTypes, List *colCollations, static List * generate_setop_grouplist(SetOperationStmt *op, List *targetlist) { - List *grouplist = (List *) copyObject(op->groupClauses); + List *grouplist = copyObject(op->groupClauses); ListCell *lg; ListCell *lt; @@ -1433,6 +1442,9 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti) List *inhOIDs; List *appinfos; ListCell *l; + bool need_append; + PartitionedChildRelInfo *pcinfo; + List *partitioned_child_rels = NIL; /* Does RT entry allow inheritance? */ if (!rte->inh) @@ -1504,6 +1516,7 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti) /* Scan the inheritance set and expand it */ appinfos = NIL; + need_append = false; foreach(l, inhOIDs) { Oid childOID = lfirst_oid(l); @@ -1535,46 +1548,63 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti) * We copy most fields of the parent's RTE, but replace relation OID * and relkind, and set inh = false. Also, set requiredPerms to zero * since all required permissions checks are done on the original RTE. + * Likewise, set the child's securityQuals to empty, because we only + * want to apply the parent's RLS conditions regardless of what RLS + * properties individual children may have. (This is an intentional + * choice to make inherited RLS work like regular permissions checks.) + * The parent securityQuals will be propagated to children along with + * other base restriction clauses, so we don't need to do it here. */ childrte = copyObject(rte); childrte->relid = childOID; childrte->relkind = newrelation->rd_rel->relkind; childrte->inh = false; childrte->requiredPerms = 0; + childrte->securityQuals = NIL; parse->rtable = lappend(parse->rtable, childrte); childRTindex = list_length(parse->rtable); /* - * Build an AppendRelInfo for this parent and child. - */ - appinfo = makeNode(AppendRelInfo); - appinfo->parent_relid = rti; - appinfo->child_relid = childRTindex; - appinfo->parent_reltype = oldrelation->rd_rel->reltype; - appinfo->child_reltype = newrelation->rd_rel->reltype; - make_inh_translation_list(oldrelation, newrelation, childRTindex, - &appinfo->translated_vars); - appinfo->parent_reloid = parentOID; - appinfos = lappend(appinfos, appinfo); - - /* - * Translate the column permissions bitmaps to the child's attnums (we - * have to build the translated_vars list before we can do this). But - * if this is the parent table, leave copyObject's result alone. - * - * Note: we need to do this even though the executor won't run any - * permissions checks on the child RTE. The insertedCols/updatedCols - * bitmaps may be examined for trigger-firing purposes. + * Build an AppendRelInfo for this parent and child, unless the child + * is a partitioned table. */ - if (childOID != parentOID) + if (childrte->relkind != RELKIND_PARTITIONED_TABLE) { - childrte->selectedCols = translate_col_privs(rte->selectedCols, + need_append = true; + appinfo = makeNode(AppendRelInfo); + appinfo->parent_relid = rti; + appinfo->child_relid = childRTindex; + appinfo->parent_reltype = oldrelation->rd_rel->reltype; + appinfo->child_reltype = newrelation->rd_rel->reltype; + make_inh_translation_list(oldrelation, newrelation, childRTindex, + &appinfo->translated_vars); + appinfo->parent_reloid = parentOID; + appinfos = lappend(appinfos, appinfo); + + /* + * Translate the column permissions bitmaps to the child's attnums + * (we have to build the translated_vars list before we can do + * this). But if this is the parent table, leave copyObject's + * result alone. + * + * Note: we need to do this even though the executor won't run any + * permissions checks on the child RTE. The + * insertedCols/updatedCols bitmaps may be examined for + * trigger-firing purposes. + */ + if (childOID != parentOID) + { + childrte->selectedCols = translate_col_privs(rte->selectedCols, appinfo->translated_vars); - childrte->insertedCols = translate_col_privs(rte->insertedCols, + childrte->insertedCols = translate_col_privs(rte->insertedCols, appinfo->translated_vars); - childrte->updatedCols = translate_col_privs(rte->updatedCols, + childrte->updatedCols = translate_col_privs(rte->updatedCols, appinfo->translated_vars); + } } + else + partitioned_child_rels = lappend_int(partitioned_child_rels, + childRTindex); /* * Build a PlanRowMark if parent is marked FOR UPDATE/SHARE. @@ -1591,7 +1621,14 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti) newrc->allMarkTypes = (1 << newrc->markType); newrc->strength = oldrc->strength; newrc->waitPolicy = oldrc->waitPolicy; - newrc->isParent = false; + + /* + * We mark RowMarks for partitioned child tables as parent + * RowMarks so that the executor ignores them (except their + * existence means that the child tables be locked using + * appropriate mode). + */ + newrc->isParent = (childrte->relkind == RELKIND_PARTITIONED_TABLE); /* Include child's rowmark type in parent's allMarkTypes */ oldrc->allMarkTypes |= newrc->allMarkTypes; @@ -1607,17 +1644,37 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti) heap_close(oldrelation, NoLock); /* - * If all the children were temp tables, pretend it's a non-inheritance - * situation. The duplicate RTE we added for the parent table is - * harmless, so we don't bother to get rid of it. + * If all the children were temp tables or a partitioned parent did not + * have any leaf partitions, pretend it's a non-inheritance situation; we + * don't need Append node in that case. The duplicate RTE we added for + * the parent table is harmless, so we don't bother to get rid of it; + * ditto for the useless PlanRowMark node. */ - if (list_length(appinfos) < 2) + if (!need_append) { /* Clear flag before returning */ rte->inh = false; return; } + /* + * We keep a list of objects in root, each of which maps a partitioned + * parent RT index to the list of RT indexes of its partitioned child + * tables. When creating an Append or a ModifyTable path for the parent, + * we copy the child RT index list verbatim to the path so that it could + * be carried over to the executor so that the latter could identify the + * partitioned child tables. + */ + if (partitioned_child_rels != NIL) + { + pcinfo = makeNode(PartitionedChildRelInfo); + + Assert(rte->relkind == RELKIND_PARTITIONED_TABLE); + pcinfo->parent_relid = rti; + pcinfo->child_rels = partitioned_child_rels; + root->pcinfo_list = lappend(root->pcinfo_list, pcinfo); + } + /* Otherwise, OK to add to root->append_rel_list */ root->append_rel_list = list_concat(root->append_rel_list, appinfos); } @@ -1762,12 +1819,11 @@ translate_col_privs(const Bitmapset *parent_privs, attno = InvalidAttrNumber; foreach(lc, translated_vars) { - Var *var = (Var *) lfirst(lc); + Var *var = lfirst_node(Var, lc); attno++; if (var == NULL) /* ignore dropped columns */ continue; - Assert(IsA(var, Var)); if (whole_row || bms_is_member(attno - FirstLowInvalidHeapAttributeNumber, parent_privs)) @@ -1785,9 +1841,8 @@ translate_col_privs(const Bitmapset *parent_privs, * child rel instead. We also update rtindexes appearing outside Vars, * such as resultRelation and jointree relids. * - * Note: this is applied after conversion of sublinks to subplans in the - * query jointree, but there may still be sublinks in the security barrier - * quals of RTEs, so we do need to cope with recursion into sub-queries. + * Note: this is only applied after conversion of sublinks to subplans, + * so we don't need to cope with recursion into sub-queries. * * Note: this is not hugely different from what pullup_replace_vars() does; * maybe we should try to fold the two routines together. @@ -1800,12 +1855,9 @@ adjust_appendrel_attrs(PlannerInfo *root, Node *node, AppendRelInfo *appinfo) context.root = root; context.appinfo = appinfo; - context.sublevels_up = 0; /* - * Must be prepared to start with a Query or a bare expression tree; if - * it's a Query, go straight to query_tree_walker to make sure that - * sublevels_up doesn't get incremented prematurely. + * Must be prepared to start with a Query or a bare expression tree. */ if (node && IsA(node, Query)) { @@ -1844,7 +1896,7 @@ adjust_appendrel_attrs_mutator(Node *node, { Var *var = (Var *) copyObject(node); - if (var->varlevelsup == context->sublevels_up && + if (var->varlevelsup == 0 && var->varno == appinfo->parent_relid) { var->varno = appinfo->child_relid; @@ -1861,7 +1913,6 @@ adjust_appendrel_attrs_mutator(Node *node, if (newnode == NULL) elog(ERROR, "attribute %d of relation \"%s\" does not exist", var->varattno, get_rel_name(appinfo->parent_reloid)); - ((Var *) newnode)->varlevelsup += context->sublevels_up; return newnode; } else if (var->varattno == 0) @@ -1904,17 +1955,10 @@ adjust_appendrel_attrs_mutator(Node *node, RowExpr *rowexpr; List *fields; RangeTblEntry *rte; - ListCell *lc; rte = rt_fetch(appinfo->parent_relid, context->root->parse->rtable); - fields = (List *) copyObject(appinfo->translated_vars); - foreach(lc, fields) - { - Var *field = (Var *) lfirst(lc); - - field->varlevelsup += context->sublevels_up; - } + fields = copyObject(appinfo->translated_vars); rowexpr = makeNode(RowExpr); rowexpr->args = fields; rowexpr->row_typeid = var->vartype; @@ -1933,8 +1977,7 @@ adjust_appendrel_attrs_mutator(Node *node, { CurrentOfExpr *cexpr = (CurrentOfExpr *) copyObject(node); - if (context->sublevels_up == 0 && - cexpr->cvarno == appinfo->parent_relid) + if (cexpr->cvarno == appinfo->parent_relid) cexpr->cvarno = appinfo->child_relid; return (Node *) cexpr; } @@ -1942,8 +1985,7 @@ adjust_appendrel_attrs_mutator(Node *node, { RangeTblRef *rtr = (RangeTblRef *) copyObject(node); - if (context->sublevels_up == 0 && - rtr->rtindex == appinfo->parent_relid) + if (rtr->rtindex == appinfo->parent_relid) rtr->rtindex = appinfo->child_relid; return (Node *) rtr; } @@ -1956,8 +1998,7 @@ adjust_appendrel_attrs_mutator(Node *node, adjust_appendrel_attrs_mutator, (void *) context); /* now fix JoinExpr's rtindex (probably never happens) */ - if (context->sublevels_up == 0 && - j->rtindex == appinfo->parent_relid) + if (j->rtindex == appinfo->parent_relid) j->rtindex = appinfo->child_relid; return (Node *) j; } @@ -1970,7 +2011,7 @@ adjust_appendrel_attrs_mutator(Node *node, adjust_appendrel_attrs_mutator, (void *) context); /* now fix PlaceHolderVar's relid sets */ - if (phv->phlevelsup == context->sublevels_up) + if (phv->phlevelsup == 0) phv->phrels = adjust_relid_set(phv->phrels, appinfo->parent_relid, appinfo->child_relid); @@ -2041,29 +2082,12 @@ adjust_appendrel_attrs_mutator(Node *node, return (Node *) newinfo; } - if (IsA(node, Query)) - { - /* - * Recurse into sublink subqueries. This should only be possible in - * security barrier quals of top-level RTEs. All other sublinks should - * have already been converted to subplans during expression - * preprocessing, but this doesn't happen for security barrier quals, - * since they are destined to become quals of a subquery RTE, which - * will be recursively planned, and so should not be preprocessed at - * this stage. - * - * We don't explicitly Assert() for securityQuals here simply because - * it's not trivial to do so. - */ - Query *newnode; - - context->sublevels_up++; - newnode = query_tree_mutator((Query *) node, - adjust_appendrel_attrs_mutator, - (void *) context, 0); - context->sublevels_up--; - return (Node *) newnode; - } + /* + * NOTE: we do not need to recurse into sublinks, because they should + * already have been converted to subplans before we see them. + */ + Assert(!IsA(node, SubLink)); + Assert(!IsA(node, Query)); return expression_tree_mutator(node, adjust_appendrel_attrs_mutator, (void *) context); @@ -2198,7 +2222,7 @@ adjust_appendrel_attrs_multilevel(PlannerInfo *root, Node *node, RelOptInfo *parent_rel = find_base_rel(root, appinfo->parent_relid); /* If parent is also a child, first recurse to apply its translations */ - if (parent_rel->reloptkind == RELOPT_OTHER_MEMBER_REL) + if (IS_OTHER_REL(parent_rel)) node = adjust_appendrel_attrs_multilevel(root, node, parent_rel); else Assert(parent_rel->reloptkind == RELOPT_BASEREL); diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 4e23898ff9..a1dafc8e0f 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -3,7 +3,7 @@ * clauses.c * routines to manipulate qualification clauses * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -91,20 +91,21 @@ typedef struct typedef struct { - bool allow_restricted; -} has_parallel_hazard_arg; + char max_hazard; /* worst proparallel hazard found so far */ + char max_interesting; /* worst proparallel hazard of interest */ + List *safe_param_ids; /* PARAM_EXEC Param IDs to treat as safe */ +} max_parallel_hazard_context; static bool contain_agg_clause_walker(Node *node, void *context); static bool get_agg_clause_costs_walker(Node *node, get_agg_clause_costs_context *context); static bool find_window_functions_walker(Node *node, WindowFuncLists *lists); -static bool expression_returns_set_rows_walker(Node *node, double *count); static bool contain_subplans_walker(Node *node, void *context); static bool contain_mutable_functions_walker(Node *node, void *context); static bool contain_volatile_functions_walker(Node *node, void *context); static bool contain_volatile_functions_not_nextval_walker(Node *node, void *context); -static bool has_parallel_hazard_walker(Node *node, - has_parallel_hazard_arg *context); +static bool max_parallel_hazard_walker(Node *node, + max_parallel_hazard_context *context); static bool contain_nonstrict_functions_walker(Node *node, void *context); static bool contain_context_dependent_node(Node *clause); static bool contain_context_dependent_node_walker(Node *node, int *flags); @@ -354,8 +355,8 @@ make_and_qual(Node *qual1, Node *qual2) } /* - * Sometimes (such as in the input of ExecQual), we use lists of expression - * nodes with implicit AND semantics. + * The planner frequently prefers to represent qualification expressions + * as lists of boolean expressions with implicit AND semantics. * * These functions convert between an AND-semantics expression list and the * ordinary representation of a boolean expression. @@ -646,6 +647,16 @@ get_agg_clause_costs_walker(Node *node, get_agg_clause_costs_context *context) /* Use average width if aggregate definition gave one */ if (aggtransspace > 0) avgwidth = aggtransspace; + else if (aggtransfn == F_ARRAY_APPEND) + { + /* + * If the transition function is array_append(), it'll use an + * expanded array as transvalue, which will occupy at least + * ALLOCSET_SMALL_INITSIZE and possibly more. Use that as the + * estimate for lack of a better idea. + */ + avgwidth = ALLOCSET_SMALL_INITSIZE; + } else { /* @@ -779,114 +790,37 @@ find_window_functions_walker(Node *node, WindowFuncLists *lists) /* * expression_returns_set_rows * Estimate the number of rows returned by a set-returning expression. - * The result is 1 if there are no set-returning functions. + * The result is 1 if it's not a set-returning expression. * - * We use the product of the rowcount estimates of all the functions in - * the given tree (this corresponds to the behavior of ExecMakeFunctionResult - * for nested set-returning functions). + * We should only examine the top-level function or operator; it used to be + * appropriate to recurse, but not anymore. (Even if there are more SRFs in + * the function's inputs, their multipliers are accounted for separately.) * * Note: keep this in sync with expression_returns_set() in nodes/nodeFuncs.c. */ double expression_returns_set_rows(Node *clause) { - double result = 1; - - (void) expression_returns_set_rows_walker(clause, &result); - return clamp_row_est(result); -} - -static bool -expression_returns_set_rows_walker(Node *node, double *count) -{ - if (node == NULL) - return false; - if (IsA(node, FuncExpr)) + if (clause == NULL) + return 1.0; + if (IsA(clause, FuncExpr)) { - FuncExpr *expr = (FuncExpr *) node; + FuncExpr *expr = (FuncExpr *) clause; if (expr->funcretset) - *count *= get_func_rows(expr->funcid); + return clamp_row_est(get_func_rows(expr->funcid)); } - if (IsA(node, OpExpr)) + if (IsA(clause, OpExpr)) { - OpExpr *expr = (OpExpr *) node; + OpExpr *expr = (OpExpr *) clause; if (expr->opretset) { set_opfuncid(expr); - *count *= get_func_rows(expr->opfuncid); + return clamp_row_est(get_func_rows(expr->opfuncid)); } } - - /* Avoid recursion for some cases that can't return a set */ - if (IsA(node, Aggref)) - return false; - if (IsA(node, WindowFunc)) - return false; - if (IsA(node, DistinctExpr)) - return false; - if (IsA(node, NullIfExpr)) - return false; - if (IsA(node, ScalarArrayOpExpr)) - return false; - if (IsA(node, BoolExpr)) - return false; - if (IsA(node, SubLink)) - return false; - if (IsA(node, SubPlan)) - return false; - if (IsA(node, AlternativeSubPlan)) - return false; - if (IsA(node, ArrayExpr)) - return false; - if (IsA(node, RowExpr)) - return false; - if (IsA(node, RowCompareExpr)) - return false; - if (IsA(node, CoalesceExpr)) - return false; - if (IsA(node, MinMaxExpr)) - return false; - if (IsA(node, XmlExpr)) - return false; - - return expression_tree_walker(node, expression_returns_set_rows_walker, - (void *) count); -} - -/* - * tlist_returns_set_rows - * Estimate the number of rows returned by a set-returning targetlist. - * The result is 1 if there are no set-returning functions. - * - * Here, the result is the largest rowcount estimate of any of the tlist's - * expressions, not the product as you would get from naively applying - * expression_returns_set_rows() to the whole tlist. The behavior actually - * implemented by ExecTargetList produces a number of rows equal to the least - * common multiple of the expression rowcounts, so that the product would be - * a worst-case estimate that is typically not realistic. Taking the max as - * we do here is a best-case estimate that might not be realistic either, - * but it's probably closer for typical usages. We don't try to compute the - * actual LCM because we're working with very approximate estimates, so their - * LCM would be unduly noisy. - */ -double -tlist_returns_set_rows(List *tlist) -{ - double result = 1; - ListCell *lc; - - foreach(lc, tlist) - { - TargetEntry *tle = (TargetEntry *) lfirst(lc); - double colresult; - - colresult = expression_returns_set_rows((Node *) tle->expr); - if (result < colresult) - result = colresult; - } - return result; + return 1.0; } @@ -962,6 +896,12 @@ contain_mutable_functions_walker(Node *node, void *context) context)) return true; + if (IsA(node, SQLValueFunction)) + { + /* all variants of SQLValueFunction are stable */ + return true; + } + /* * It should be safe to treat MinMaxExpr as immutable, because it will * depend on a non-cross-type btree comparison function, and those should @@ -1031,7 +971,8 @@ contain_volatile_functions_walker(Node *node, void *context) /* * See notes in contain_mutable_functions_walker about why we treat - * MinMaxExpr, XmlExpr, and CoerceToDomain as immutable. + * MinMaxExpr, XmlExpr, and CoerceToDomain as immutable, while + * SQLValueFunction is stable. Hence, none of them are of interest here. */ /* Recurse to check arguments */ @@ -1076,7 +1017,8 @@ contain_volatile_functions_not_nextval_walker(Node *node, void *context) /* * See notes in contain_mutable_functions_walker about why we treat - * MinMaxExpr, XmlExpr, and CoerceToDomain as immutable. + * MinMaxExpr, XmlExpr, and CoerceToDomain as immutable, while + * SQLValueFunction is stable. Hence, none of them are of interest here. */ /* Recurse to check arguments */ @@ -1092,46 +1034,106 @@ contain_volatile_functions_not_nextval_walker(Node *node, void *context) context); } + /***************************************************************************** * Check queries for parallel unsafe and/or restricted constructs *****************************************************************************/ /* - * Check whether a node tree contains parallel hazards. This is used both on - * the entire query tree, to see whether the query can be parallelized at all - * (with allow_restricted = true), and also to evaluate whether a particular - * expression is safe to run within a parallel worker (with allow_restricted = - * false). We could separate these concerns into two different functions, but - * there's enough overlap that it doesn't seem worthwhile. + * max_parallel_hazard + * Find the worst parallel-hazard level in the given query + * + * Returns the worst function hazard property (the earliest in this list: + * PROPARALLEL_UNSAFE, PROPARALLEL_RESTRICTED, PROPARALLEL_SAFE) that can + * be found in the given parsetree. We use this to find out whether the query + * can be parallelized at all. The caller will also save the result in + * PlannerGlobal so as to short-circuit checks of portions of the querytree + * later, in the common case where everything is SAFE. + */ +char +max_parallel_hazard(Query *parse) +{ + max_parallel_hazard_context context; + + context.max_hazard = PROPARALLEL_SAFE; + context.max_interesting = PROPARALLEL_UNSAFE; + context.safe_param_ids = NIL; + (void) max_parallel_hazard_walker((Node *) parse, &context); + return context.max_hazard; +} + +/* + * is_parallel_safe + * Detect whether the given expr contains only parallel-safe functions + * + * root->glob->maxParallelHazard must previously have been set to the + * result of max_parallel_hazard() on the whole query. */ bool -has_parallel_hazard(Node *node, bool allow_restricted) +is_parallel_safe(PlannerInfo *root, Node *node) { - has_parallel_hazard_arg context; + max_parallel_hazard_context context; - context.allow_restricted = allow_restricted; - return has_parallel_hazard_walker(node, &context); + /* + * Even if the original querytree contained nothing unsafe, we need to + * search the expression if we have generated any PARAM_EXEC Params while + * planning, because those are parallel-restricted and there might be one + * in this expression. But otherwise we don't need to look. + */ + if (root->glob->maxParallelHazard == PROPARALLEL_SAFE && + root->glob->nParamExec == 0) + return true; + /* Else use max_parallel_hazard's search logic, but stop on RESTRICTED */ + context.max_hazard = PROPARALLEL_SAFE; + context.max_interesting = PROPARALLEL_RESTRICTED; + context.safe_param_ids = NIL; + return !max_parallel_hazard_walker(node, &context); } +/* core logic for all parallel-hazard checks */ static bool -has_parallel_hazard_checker(Oid func_id, void *context) +max_parallel_hazard_test(char proparallel, max_parallel_hazard_context *context) { - char proparallel = func_parallel(func_id); + switch (proparallel) + { + case PROPARALLEL_SAFE: + /* nothing to see here, move along */ + break; + case PROPARALLEL_RESTRICTED: + /* increase max_hazard to RESTRICTED */ + Assert(context->max_hazard != PROPARALLEL_UNSAFE); + context->max_hazard = proparallel; + /* done if we are not expecting any unsafe functions */ + if (context->max_interesting == proparallel) + return true; + break; + case PROPARALLEL_UNSAFE: + context->max_hazard = proparallel; + /* we're always done at the first unsafe construct */ + return true; + default: + elog(ERROR, "unrecognized proparallel value \"%c\"", proparallel); + break; + } + return false; +} - if (((has_parallel_hazard_arg *) context)->allow_restricted) - return (proparallel == PROPARALLEL_UNSAFE); - else - return (proparallel != PROPARALLEL_SAFE); +/* check_functions_in_node callback */ +static bool +max_parallel_hazard_checker(Oid func_id, void *context) +{ + return max_parallel_hazard_test(func_parallel(func_id), + (max_parallel_hazard_context *) context); } static bool -has_parallel_hazard_walker(Node *node, has_parallel_hazard_arg *context) +max_parallel_hazard_walker(Node *node, max_parallel_hazard_context *context) { if (node == NULL) return false; /* Check for hazardous functions in node itself */ - if (check_functions_in_node(node, has_parallel_hazard_checker, + if (check_functions_in_node(node, max_parallel_hazard_checker, context)) return true; @@ -1143,11 +1145,12 @@ has_parallel_hazard_walker(Node *node, has_parallel_hazard_arg *context) * (Note: in principle that's wrong because a domain constraint could * contain a parallel-unsafe function; but useful constraints probably * never would have such, and assuming they do would cripple use of - * parallel query in the presence of domain types.) + * parallel query in the presence of domain types.) SQLValueFunction + * should be safe in all cases. */ if (IsA(node, CoerceToDomain)) { - if (!context->allow_restricted) + if (max_parallel_hazard_test(PROPARALLEL_RESTRICTED, context)) return true; } @@ -1158,33 +1161,62 @@ has_parallel_hazard_walker(Node *node, has_parallel_hazard_arg *context) { RestrictInfo *rinfo = (RestrictInfo *) node; - return has_parallel_hazard_walker((Node *) rinfo->clause, context); + return max_parallel_hazard_walker((Node *) rinfo->clause, context); } /* - * Since we don't have the ability to push subplans down to workers at - * present, we treat subplan references as parallel-restricted. We need - * not worry about examining their contents; if they are unsafe, we would - * have found that out while examining the whole tree before reduction of - * sublinks to subplans. (Really we should not see SubLink during a - * not-allow_restricted scan, but if we do, return true.) + * Really we should not see SubLink during a max_interesting == restricted + * scan, but if we do, return true. */ - else if (IsA(node, SubLink) || - IsA(node, SubPlan) || - IsA(node, AlternativeSubPlan)) + else if (IsA(node, SubLink)) { - if (!context->allow_restricted) + if (max_parallel_hazard_test(PROPARALLEL_RESTRICTED, context)) return true; } /* + * Only parallel-safe SubPlans can be sent to workers. Within the + * testexpr of the SubPlan, Params representing the output columns of the + * subplan can be treated as parallel-safe, so temporarily add their IDs + * to the safe_param_ids list while examining the testexpr. + */ + else if (IsA(node, SubPlan)) + { + SubPlan *subplan = (SubPlan *) node; + List *save_safe_param_ids; + + if (!subplan->parallel_safe && + max_parallel_hazard_test(PROPARALLEL_RESTRICTED, context)) + return true; + save_safe_param_ids = context->safe_param_ids; + context->safe_param_ids = list_concat(list_copy(subplan->paramIds), + context->safe_param_ids); + if (max_parallel_hazard_walker(subplan->testexpr, context)) + return true; /* no need to restore safe_param_ids */ + context->safe_param_ids = save_safe_param_ids; + /* we must also check args, but no special Param treatment there */ + if (max_parallel_hazard_walker((Node *) subplan->args, context)) + return true; + /* don't want to recurse normally, so we're done */ + return false; + } + + /* * We can't pass Params to workers at the moment either, so they are also - * parallel-restricted. + * parallel-restricted, unless they are PARAM_EXEC Params listed in + * safe_param_ids, meaning they could be generated within the worker. */ else if (IsA(node, Param)) { - if (!context->allow_restricted) - return true; + Param *param = (Param *) node; + + if (param->paramkind != PARAM_EXEC || + !list_member_int(context->safe_param_ids, param->paramid)) + { + if (max_parallel_hazard_test(PROPARALLEL_RESTRICTED, context)) + return true; + } + return false; /* nothing to recurse to */ } /* @@ -1198,20 +1230,24 @@ has_parallel_hazard_walker(Node *node, has_parallel_hazard_arg *context) /* SELECT FOR UPDATE/SHARE must be treated as unsafe */ if (query->rowMarks != NULL) + { + context->max_hazard = PROPARALLEL_UNSAFE; return true; + } /* Recurse into subselects */ return query_tree_walker(query, - has_parallel_hazard_walker, + max_parallel_hazard_walker, context, 0); } /* Recurse to check arguments */ return expression_tree_walker(node, - has_parallel_hazard_walker, + max_parallel_hazard_walker, context); } + /***************************************************************************** * Check clauses for nonstrict functions *****************************************************************************/ @@ -1418,10 +1454,8 @@ contain_context_dependent_node_walker(Node *node, int *flags) * * Returns true if the clause contains any non-leakproof functions that are * passed Var nodes of the current query level, and which might therefore leak - * data. Qualifiers from outside a security_barrier view that might leak data - * in this way should not be pushed down into the view in case the contents of - * tuples intended to be filtered out by the view are revealed by the leaky - * functions. + * data. Such clauses must be applied after any lower-level security barrier + * clauses. */ bool contain_leaked_vars(Node *clause) @@ -1458,6 +1492,7 @@ contain_leaked_vars_walker(Node *node, void *context) case T_CaseTestExpr: case T_RowExpr: case T_MinMaxExpr: + case T_SQLValueFunction: case T_NullTest: case T_BooleanTest: case T_List: @@ -1515,10 +1550,10 @@ contain_leaked_vars_walker(Node *node, void *context) case T_CurrentOfExpr: /* - * WHERE CURRENT OF doesn't contain function calls. Moreover, it - * is important that this can be pushed down into a - * security_barrier view, since the planner must always generate a - * TID scan when CURRENT OF is present -- c.f. cost_tidscan. + * WHERE CURRENT OF doesn't contain leaky function calls. + * Moreover, it is essential that this is considered non-leaky, + * since the planner must always generate a TID scan when CURRENT + * OF is present -- c.f. cost_tidscan. */ return false; @@ -2699,9 +2734,8 @@ eval_const_expressions_mutator(Node *node, * Since the underlying operator is "=", must negate * its result */ - Const *csimple = (Const *) simple; + Const *csimple = castNode(Const, simple); - Assert(IsA(csimple, Const)); csimple->constvalue = BoolGetDatum(!DatumGetBool(csimple->constvalue)); return (Node *) csimple; @@ -3090,12 +3124,10 @@ eval_const_expressions_mutator(Node *node, const_true_cond = false; foreach(arg, caseexpr->args) { - CaseWhen *oldcasewhen = (CaseWhen *) lfirst(arg); + CaseWhen *oldcasewhen = lfirst_node(CaseWhen, arg); Node *casecond; Node *caseresult; - Assert(IsA(oldcasewhen, CaseWhen)); - /* Simplify this alternative's test condition */ casecond = eval_const_expressions_mutator((Node *) oldcasewhen->expr, context); @@ -3263,6 +3295,23 @@ eval_const_expressions_mutator(Node *node, newcoalesce->location = coalesceexpr->location; return (Node *) newcoalesce; } + case T_SQLValueFunction: + { + /* + * All variants of SQLValueFunction are stable, so if we are + * estimating the expression's value, we should evaluate the + * current function value. Otherwise just copy. + */ + SQLValueFunction *svf = (SQLValueFunction *) node; + + if (context->estimate) + return (Node *) evaluate_expr((Expr *) svf, + svf->type, + svf->typmod, + InvalidOid); + else + return copyObject((Node *) svf); + } case T_FieldSelect: { /* @@ -3380,7 +3429,7 @@ eval_const_expressions_mutator(Node *node, * Else, make a scalar (argisrow == false) NullTest * for this field. Scalar semantics are required * because IS [NOT] NULL doesn't recurse; see comments - * in ExecEvalNullTest(). + * in ExecEvalRowNullInt(). */ newntest = makeNode(NullTest); newntest->arg = (Expr *) relem; @@ -3524,8 +3573,8 @@ eval_const_expressions_mutator(Node *node, * FALSE: drop (does not affect result) * TRUE: force result to TRUE * NULL: keep only one - * We must keep one NULL input because ExecEvalOr returns NULL when no input - * is TRUE and at least one is NULL. We don't actually include the NULL + * We must keep one NULL input because OR expressions evaluate to NULL when no + * input is TRUE and at least one is NULL. We don't actually include the NULL * here, that's supposed to be done by the caller. * * The output arguments *haveNull and *forceTrue must be initialized FALSE @@ -3636,9 +3685,9 @@ simplify_or_arguments(List *args, * TRUE: drop (does not affect result) * FALSE: force result to FALSE * NULL: keep only one - * We must keep one NULL input because ExecEvalAnd returns NULL when no input - * is FALSE and at least one is NULL. We don't actually include the NULL - * here, that's supposed to be done by the caller. + * We must keep one NULL input because AND expressions evaluate to NULL when + * no input is FALSE and at least one is NULL. We don't actually include the + * NULL here, that's supposed to be done by the caller. * * The output arguments *haveNull and *forceFalse must be initialized FALSE * by the caller. They will be set TRUE if a null constant or false constant, @@ -4063,8 +4112,7 @@ fetch_function_defaults(HeapTuple func_tuple) if (isnull) elog(ERROR, "not enough default arguments"); str = TextDatumGetCString(proargdefaults); - defaults = (List *) stringToNode(str); - Assert(IsA(defaults, List)); + defaults = castNode(List, stringToNode(str)); pfree(str); return defaults; } @@ -4311,9 +4359,7 @@ inline_function(Oid funcid, Oid result_type, Oid result_collid, */ mycxt = AllocSetContextCreate(CurrentMemoryContext, "inline_function", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); + ALLOCSET_DEFAULT_SIZES); oldcxt = MemoryContextSwitchTo(mycxt); /* Fetch the function body */ @@ -4381,9 +4427,9 @@ inline_function(Oid funcid, Oid result_type, Oid result_collid, */ if (!IsA(querytree, Query) || querytree->commandType != CMD_SELECT || - querytree->utilityStmt || querytree->hasAggs || querytree->hasWindowFuncs || + querytree->hasTargetSRFs || querytree->hasSubLinks || querytree->cteList || querytree->rtable || @@ -4424,17 +4470,13 @@ inline_function(Oid funcid, Oid result_type, Oid result_collid, Assert(!modifyTargetList); /* - * Additional validity checks on the expression. It mustn't return a set, - * and it mustn't be more volatile than the surrounding function (this is - * to avoid breaking hacks that involve pretending a function is immutable - * when it really ain't). If the surrounding function is declared strict, - * then the expression must contain only strict constructs and must use - * all of the function parameters (this is overkill, but an exact analysis - * is hard). + * Additional validity checks on the expression. It mustn't be more + * volatile than the surrounding function (this is to avoid breaking hacks + * that involve pretending a function is immutable when it really ain't). + * If the surrounding function is declared strict, then the expression + * must contain only strict constructs and must use all of the function + * parameters (this is overkill, but an exact analysis is hard). */ - if (expression_returns_set(newexpr)) - goto fail; - if (funcform->provolatile == PROVOLATILE_IMMUTABLE && contain_mutable_functions(newexpr)) goto fail; @@ -4671,7 +4713,7 @@ evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod, */ const_val = ExecEvalExprSwitchContext(exprstate, GetPerTupleExprContext(estate), - &const_is_null, NULL); + &const_is_null); /* Get info needed about result datatype */ get_typlenbyval(result_type, &resultTypLen, &resultTypByVal); @@ -4829,9 +4871,7 @@ inline_set_returning_function(PlannerInfo *root, RangeTblEntry *rte) */ mycxt = AllocSetContextCreate(CurrentMemoryContext, "inline_set_returning_function", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); + ALLOCSET_DEFAULT_SIZES); oldcxt = MemoryContextSwitchTo(mycxt); /* @@ -4904,7 +4944,7 @@ inline_set_returning_function(PlannerInfo *root, RangeTblEntry *rte) querytree_list = pg_analyze_and_rewrite_params(linitial(raw_parsetree_list), src, (ParserSetupHook) sql_fn_parser_setup, - pinfo); + pinfo, NULL); if (list_length(querytree_list) != 1) goto fail; querytree = linitial(querytree_list); @@ -4913,8 +4953,7 @@ inline_set_returning_function(PlannerInfo *root, RangeTblEntry *rte) * The single command must be a plain SELECT. */ if (!IsA(querytree, Query) || - querytree->commandType != CMD_SELECT || - querytree->utilityStmt) + querytree->commandType != CMD_SELECT) goto fail; /* diff --git a/src/backend/optimizer/util/joininfo.c b/src/backend/optimizer/util/joininfo.c index 97d5fba391..62629ee7d8 100644 --- a/src/backend/optimizer/util/joininfo.c +++ b/src/backend/optimizer/util/joininfo.c @@ -3,7 +3,7 @@ * joininfo.c * joininfo list manipulation routines * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -24,7 +24,7 @@ * Detect whether there is a joinclause that involves * the two given relations. * - * Note: the joinclause does not have to be evaluatable with only these two + * Note: the joinclause does not have to be evaluable with only these two * relations. This is intentional. For example consider * SELECT * FROM a, b, c WHERE a.x = (b.y + c.z) * If a is much larger than the other tables, it may be worthwhile to diff --git a/src/backend/optimizer/util/orclauses.c b/src/backend/optimizer/util/orclauses.c index 13570f006e..b6867e3001 100644 --- a/src/backend/optimizer/util/orclauses.c +++ b/src/backend/optimizer/util/orclauses.c @@ -3,7 +3,7 @@ * orclauses.c * Routines to extract restriction OR clauses from join OR clauses * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -188,9 +188,8 @@ extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel) foreach(lc2, andargs) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc2); + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2); - Assert(IsA(rinfo, RestrictInfo)); if (restriction_is_or_clause(rinfo)) { /* @@ -211,11 +210,11 @@ extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel) } else { - Assert(IsA(orarg, RestrictInfo)); - Assert(!restriction_is_or_clause((RestrictInfo *) orarg)); - if (is_safe_restriction_clause_for((RestrictInfo *) orarg, rel)) - subclauses = lappend(subclauses, - ((RestrictInfo *) orarg)->clause); + RestrictInfo *rinfo = castNode(RestrictInfo, orarg); + + Assert(!restriction_is_or_clause(rinfo)); + if (is_safe_restriction_clause_for(rinfo, rel)) + subclauses = lappend(subclauses, rinfo->clause); } /* @@ -270,6 +269,7 @@ consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel, true, false, false, + join_or_rinfo->security_level, NULL, NULL, NULL); @@ -296,6 +296,8 @@ consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel, * OK, add it to the rel's restriction-clause list. */ rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo); + rel->baserestrict_min_security = Min(rel->baserestrict_min_security, + or_rinfo->security_level); /* * Adjust the original join OR clause's cached selectivity to compensate diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 971ffa8822..0ccf4bd47d 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -4,7 +4,7 @@ * Routines to manipulate pathlists and create path nodes * * Portions Copyright (c) 2012-2014, TransLattice, Inc. - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -762,10 +762,9 @@ add_path_precheck(RelOptInfo *parent_rel, * As with add_path, we pfree paths that are found to be dominated by * another partial path; this requires that there be no other references to * such paths yet. Hence, GatherPaths must not be created for a rel until - * we're done creating all partial paths for it. We do not currently build - * partial indexscan paths, so there is no need for an exception for - * IndexPaths here; for safety, we instead Assert that a path to be freed - * isn't an IndexPath. + * we're done creating all partial paths for it. Unlike add_path, we don't + * take an exception for IndexPaths as partial index paths won't be + * referenced by partial BitmapHeapPaths. */ void add_partial_path(RelOptInfo *parent_rel, Path *new_path) @@ -844,8 +843,6 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path) { parent_rel->partial_pathlist = list_delete_cell(parent_rel->partial_pathlist, p1, p1_prev); - /* we should not see IndexPaths here, so always safe to delete */ - Assert(!IsA(old_path, IndexPath)); pfree(old_path); /* p1_prev does not advance */ } @@ -878,8 +875,6 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path) } else { - /* we should not see IndexPaths here, so always safe to delete */ - Assert(!IsA(new_path, IndexPath)); /* Reject and recycle the new path */ pfree(new_path); } @@ -2176,6 +2171,7 @@ create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer * 'required_outer' is the set of outer relids for a parameterized path. * 'loop_count' is the number of repetitions of the indexscan to factor into * estimates of caching behavior. + * 'partial_path' is true if constructing a parallel index scan path. * * Returns the new path node. */ @@ -2190,7 +2186,8 @@ create_index_path(PlannerInfo *root, ScanDirection indexscandir, bool indexonly, Relids required_outer, - double loop_count) + double loop_count, + bool partial_path) { IndexPath *pathnode = makeNode(IndexPath); RelOptInfo *rel = index->rel; @@ -2232,7 +2229,7 @@ create_index_path(PlannerInfo *root, } } #endif - cost_index(pathnode, root, loop_count); + cost_index(pathnode, root, loop_count, partial_path); return pathnode; } @@ -2254,7 +2251,8 @@ create_bitmap_heap_path(PlannerInfo *root, RelOptInfo *rel, Path *bitmapqual, Relids required_outer, - double loop_count) + double loop_count, + int parallel_degree) { BitmapHeapPath *pathnode = makeNode(BitmapHeapPath); @@ -2263,9 +2261,9 @@ create_bitmap_heap_path(PlannerInfo *root, pathnode->path.pathtarget = rel->reltarget; pathnode->path.param_info = get_baserel_parampathinfo(root, rel, required_outer); - pathnode->path.parallel_aware = false; + pathnode->path.parallel_aware = parallel_degree > 0 ? true : false; pathnode->path.parallel_safe = rel->consider_parallel; - pathnode->path.parallel_workers = 0; + pathnode->path.parallel_workers = parallel_degree; pathnode->path.pathkeys = NIL; /* always unordered */ pathnode->bitmapqual = bitmapqual; @@ -2414,7 +2412,7 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals, */ AppendPath * create_append_path(RelOptInfo *rel, List *subpaths, Relids required_outer, - int parallel_workers) + int parallel_workers, List *partitioned_rels) { AppendPath *pathnode = makeNode(AppendPath); ListCell *l; @@ -2502,6 +2500,8 @@ create_append_path(RelOptInfo *rel, List *subpaths, Relids required_outer, pathnode->path.distribution = distribution; } #endif + + pathnode->partitioned_rels = list_copy(partitioned_rels); pathnode->subpaths = subpaths; /* @@ -2544,7 +2544,8 @@ create_merge_append_path(PlannerInfo *root, RelOptInfo *rel, List *subpaths, List *pathkeys, - Relids required_outer) + Relids required_outer, + List *partitioned_rels) { MergeAppendPath *pathnode = makeNode(MergeAppendPath); Cost input_startup_cost; @@ -2632,6 +2633,7 @@ create_merge_append_path(PlannerInfo *root, pathnode->path.parallel_safe = rel->consider_parallel; pathnode->path.parallel_workers = 0; pathnode->path.pathkeys = pathkeys; + pathnode->partitioned_rels = list_copy(partitioned_rels); pathnode->subpaths = subpaths; /* @@ -2689,7 +2691,7 @@ create_merge_append_path(PlannerInfo *root, cost_merge_append(&pathnode->path, root, pathkeys, list_length(subpaths), input_startup_cost, input_total_cost, - rel->tuples); + pathnode->path.rows); return pathnode; } @@ -2995,6 +2997,66 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, } /* + * create_gather_merge_path + * + * Creates a path corresponding to a gather merge scan, returning + * the pathnode. + */ +GatherMergePath * +create_gather_merge_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, + PathTarget *target, List *pathkeys, + Relids required_outer, double *rows) +{ + GatherMergePath *pathnode = makeNode(GatherMergePath); + Cost input_startup_cost = 0; + Cost input_total_cost = 0; + + Assert(subpath->parallel_safe); + Assert(pathkeys); + + pathnode->path.pathtype = T_GatherMerge; + pathnode->path.parent = rel; + pathnode->path.param_info = get_baserel_parampathinfo(root, rel, + required_outer); + pathnode->path.parallel_aware = false; + + pathnode->subpath = subpath; + pathnode->num_workers = subpath->parallel_workers; + pathnode->path.pathkeys = pathkeys; + pathnode->path.pathtarget = target ? target : rel->reltarget; + pathnode->path.rows += subpath->rows; + + if (pathkeys_contained_in(pathkeys, subpath->pathkeys)) + { + /* Subpath is adequately ordered, we won't need to sort it */ + input_startup_cost += subpath->startup_cost; + input_total_cost += subpath->total_cost; + } + else + { + /* We'll need to insert a Sort node, so include cost for that */ + Path sort_path; /* dummy for result of cost_sort */ + + cost_sort(&sort_path, + root, + pathkeys, + subpath->total_cost, + subpath->rows, + subpath->pathtarget->width, + 0.0, + work_mem, + -1); + input_startup_cost += sort_path.startup_cost; + input_total_cost += sort_path.total_cost; + } + + cost_gather_merge(pathnode, root, rel, pathnode->path.param_info, + input_startup_cost, input_total_cost, rows); + + return pathnode; +} + +/* * translate_sub_tlist - get subquery column numbers represented by tlist * * The given targetlist usually contains only Vars referencing the given relid. @@ -3046,19 +3108,20 @@ create_gather_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, required_outer); pathnode->path.parallel_aware = false; pathnode->path.parallel_safe = false; - pathnode->path.parallel_workers = subpath->parallel_workers; + pathnode->path.parallel_workers = 0; pathnode->path.pathkeys = NIL; /* Gather has unordered result */ /* distribution is the same as in the subpath */ pathnode->path.distribution = (Distribution *) copyObject(subpath->distribution); pathnode->subpath = subpath; + pathnode->num_workers = subpath->parallel_workers; pathnode->single_copy = false; - if (pathnode->path.parallel_workers == 0) + if (pathnode->num_workers == 0) { - pathnode->path.parallel_workers = 1; pathnode->path.pathkeys = subpath->pathkeys; + pathnode->num_workers = 1; pathnode->single_copy = true; } @@ -3131,6 +3194,32 @@ create_functionscan_path(PlannerInfo *root, RelOptInfo *rel, } /* + * create_tablefuncscan_path + * Creates a path corresponding to a sequential scan of a table function, + * returning the pathnode. + */ +Path * +create_tablefuncscan_path(PlannerInfo *root, RelOptInfo *rel, + Relids required_outer) +{ + Path *pathnode = makeNode(Path); + + pathnode->pathtype = T_TableFuncScan; + pathnode->parent = rel; + pathnode->pathtarget = rel->reltarget; + pathnode->param_info = get_baserel_parampathinfo(root, rel, + required_outer); + pathnode->parallel_aware = false; + pathnode->parallel_safe = rel->consider_parallel; + pathnode->parallel_workers = 0; + pathnode->pathkeys = NIL; /* result is always unordered */ + + cost_tablefuncscan(pathnode, root, rel, pathnode->param_info); + + return pathnode; +} + +/* * create_valuesscan_path * Creates a path corresponding to a scan of a VALUES list, * returning the pathnode. @@ -3182,6 +3271,32 @@ create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer) } /* + * create_namedtuplestorescan_path + * Creates a path corresponding to a scan of a named tuplestore, returning + * the pathnode. + */ +Path * +create_namedtuplestorescan_path(PlannerInfo *root, RelOptInfo *rel, + Relids required_outer) +{ + Path *pathnode = makeNode(Path); + + pathnode->pathtype = T_NamedTuplestoreScan; + pathnode->parent = rel; + pathnode->pathtarget = rel->reltarget; + pathnode->param_info = get_baserel_parampathinfo(root, rel, + required_outer); + pathnode->parallel_aware = false; + pathnode->parallel_safe = rel->consider_parallel; + pathnode->parallel_workers = 0; + pathnode->pathkeys = NIL; /* result is always unordered */ + + cost_namedtuplestorescan(pathnode, root, rel, pathnode->param_info); + + return pathnode; +} + +/* * create_worktablescan_path * Creates a path corresponding to a scan of a self-reference CTE, * returning the pathnode. @@ -3312,8 +3427,7 @@ calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path) * 'joinrel' is the join relation. * 'jointype' is the type of join required * 'workspace' is the result from initial_cost_nestloop - * 'sjinfo' is extra info about the join for selectivity estimation - * 'semifactors' contains valid data if jointype is SEMI or ANTI + * 'extra' contains various information about the join * 'outer_path' is the outer path * 'inner_path' is the inner path * 'restrict_clauses' are the RestrictInfo nodes to apply at the join @@ -3327,8 +3441,7 @@ create_nestloop_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors, + JoinPathExtraData *extra, Path *outer_path, Path *inner_path, List *restrict_clauses, @@ -3381,7 +3494,7 @@ create_nestloop_path(PlannerInfo *root, joinrel, outer_path, inner_path, - sjinfo, + extra->sjinfo, required_outer, &restrict_clauses); pathnode->path.parallel_aware = false; @@ -3391,6 +3504,7 @@ create_nestloop_path(PlannerInfo *root, pathnode->path.parallel_workers = outer_path->parallel_workers; pathnode->path.pathkeys = pathkeys; pathnode->jointype = jointype; + pathnode->inner_unique = extra->inner_unique; pathnode->outerjoinpath = outer_path; pathnode->innerjoinpath = inner_path; pathnode->joinrestrictinfo = restrict_clauses; @@ -3400,7 +3514,7 @@ create_nestloop_path(PlannerInfo *root, alternate = set_joinpath_distribution(root, pathnode); #endif - final_cost_nestloop(root, pathnode, workspace, sjinfo, semifactors); + final_cost_nestloop(root, pathnode, workspace, extra); #ifdef XCP /* @@ -3409,7 +3523,7 @@ create_nestloop_path(PlannerInfo *root, foreach(lc, alternate) { NestPath *altpath = (NestPath *) lfirst(lc); - final_cost_nestloop(root, altpath, workspace, sjinfo, semifactors); + final_cost_nestloop(root, altpath, workspace, extra); if (altpath->path.total_cost < pathnode->path.total_cost) pathnode = altpath; } @@ -3426,7 +3540,7 @@ create_nestloop_path(PlannerInfo *root, * 'joinrel' is the join relation * 'jointype' is the type of join required * 'workspace' is the result from initial_cost_mergejoin - * 'sjinfo' is extra info about the join for selectivity estimation + * 'extra' contains various information about the join * 'outer_path' is the outer path * 'inner_path' is the inner path * 'restrict_clauses' are the RestrictInfo nodes to apply at the join @@ -3442,7 +3556,7 @@ create_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo, + JoinPathExtraData *extra, Path *outer_path, Path *inner_path, List *restrict_clauses, @@ -3466,7 +3580,7 @@ create_mergejoin_path(PlannerInfo *root, joinrel, outer_path, inner_path, - sjinfo, + extra->sjinfo, required_outer, &restrict_clauses); pathnode->jpath.path.parallel_aware = false; @@ -3476,6 +3590,7 @@ create_mergejoin_path(PlannerInfo *root, pathnode->jpath.path.parallel_workers = outer_path->parallel_workers; pathnode->jpath.path.pathkeys = pathkeys; pathnode->jpath.jointype = jointype; + pathnode->jpath.inner_unique = extra->inner_unique; pathnode->jpath.outerjoinpath = outer_path; pathnode->jpath.innerjoinpath = inner_path; pathnode->jpath.joinrestrictinfo = restrict_clauses; @@ -3485,8 +3600,10 @@ create_mergejoin_path(PlannerInfo *root, #ifdef XCP alternate = set_joinpath_distribution(root, (JoinPath *) pathnode); #endif + /* pathnode->skip_mark_restore will be set by final_cost_mergejoin */ /* pathnode->materialize_inner will be set by final_cost_mergejoin */ - final_cost_mergejoin(root, pathnode, workspace, sjinfo); + + final_cost_mergejoin(root, pathnode, workspace, extra); #ifdef XCP /* @@ -3495,7 +3612,7 @@ create_mergejoin_path(PlannerInfo *root, foreach(lc, alternate) { MergePath *altpath = (MergePath *) lfirst(lc); - final_cost_mergejoin(root, altpath, workspace, sjinfo); + final_cost_mergejoin(root, altpath, workspace, extra); if (altpath->jpath.path.total_cost < pathnode->jpath.path.total_cost) pathnode = altpath; } @@ -3511,8 +3628,7 @@ create_mergejoin_path(PlannerInfo *root, * 'joinrel' is the join relation * 'jointype' is the type of join required * 'workspace' is the result from initial_cost_hashjoin - * 'sjinfo' is extra info about the join for selectivity estimation - * 'semifactors' contains valid data if jointype is SEMI or ANTI + * 'extra' contains various information about the join * 'outer_path' is the cheapest outer path * 'inner_path' is the cheapest inner path * 'restrict_clauses' are the RestrictInfo nodes to apply at the join @@ -3525,8 +3641,7 @@ create_hashjoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, - SpecialJoinInfo *sjinfo, - SemiAntiJoinFactors *semifactors, + JoinPathExtraData *extra, Path *outer_path, Path *inner_path, List *restrict_clauses, @@ -3547,7 +3662,7 @@ create_hashjoin_path(PlannerInfo *root, joinrel, outer_path, inner_path, - sjinfo, + extra->sjinfo, required_outer, &restrict_clauses); pathnode->jpath.path.parallel_aware = false; @@ -3569,6 +3684,7 @@ create_hashjoin_path(PlannerInfo *root, */ pathnode->jpath.path.pathkeys = NIL; pathnode->jpath.jointype = jointype; + pathnode->jpath.inner_unique = extra->inner_unique; pathnode->jpath.outerjoinpath = outer_path; pathnode->jpath.innerjoinpath = inner_path; pathnode->jpath.joinrestrictinfo = restrict_clauses; @@ -3577,7 +3693,8 @@ create_hashjoin_path(PlannerInfo *root, alternate = set_joinpath_distribution(root, (JoinPath *) pathnode); #endif /* final_cost_hashjoin will fill in pathnode->num_batches */ - final_cost_hashjoin(root, pathnode, workspace, sjinfo, semifactors); + + final_cost_hashjoin(root, pathnode, workspace, extra); #ifdef XCP /* @@ -3586,7 +3703,7 @@ create_hashjoin_path(PlannerInfo *root, foreach(lc, alternate) { HashPath *altpath = (HashPath *) lfirst(lc); - final_cost_hashjoin(root, altpath, workspace, sjinfo, semifactors); + final_cost_hashjoin(root, altpath, workspace, extra); if (altpath->jpath.path.total_cost < pathnode->jpath.path.total_cost) pathnode = altpath; } @@ -3620,7 +3737,7 @@ create_projection_path(PlannerInfo *root, pathnode->path.parallel_aware = false; pathnode->path.parallel_safe = rel->consider_parallel && subpath->parallel_safe && - !has_parallel_hazard((Node *) target->exprs, false); + is_parallel_safe(root, (Node *) target->exprs); pathnode->path.parallel_workers = subpath->parallel_workers; /* Projection does not change the sort order */ pathnode->path.pathkeys = subpath->pathkeys; @@ -3729,7 +3846,7 @@ apply_projection_to_path(PlannerInfo *root, * target expressions, then we can't. */ if (IsA(path, GatherPath) && - !has_parallel_hazard((Node *) target->exprs, false)) + is_parallel_safe(root, (Node *) target->exprs)) { GatherPath *gpath = (GatherPath *) path; @@ -3750,7 +3867,7 @@ apply_projection_to_path(PlannerInfo *root, target); } else if (path->parallel_safe && - has_parallel_hazard((Node *) target->exprs, false)) + !is_parallel_safe(root, (Node *) target->exprs)) { /* * We're inserting a parallel-restricted target list into a path @@ -3764,6 +3881,72 @@ apply_projection_to_path(PlannerInfo *root, } /* + * create_set_projection_path + * Creates a pathnode that represents performing a projection that + * includes set-returning functions. + * + * 'rel' is the parent relation associated with the result + * 'subpath' is the path representing the source of data + * 'target' is the PathTarget to be computed + */ +ProjectSetPath * +create_set_projection_path(PlannerInfo *root, + RelOptInfo *rel, + Path *subpath, + PathTarget *target) +{ + ProjectSetPath *pathnode = makeNode(ProjectSetPath); + double tlist_rows; + ListCell *lc; + + pathnode->path.pathtype = T_ProjectSet; + pathnode->path.parent = rel; + pathnode->path.pathtarget = target; + /* For now, assume we are above any joins, so no parameterization */ + pathnode->path.param_info = NULL; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel && + subpath->parallel_safe && + is_parallel_safe(root, (Node *) target->exprs); + pathnode->path.parallel_workers = subpath->parallel_workers; + /* Projection does not change the sort order XXX? */ + pathnode->path.pathkeys = subpath->pathkeys; + + pathnode->subpath = subpath; + + /* + * Estimate number of rows produced by SRFs for each row of input; if + * there's more than one in this node, use the maximum. + */ + tlist_rows = 1; + foreach(lc, target->exprs) + { + Node *node = (Node *) lfirst(lc); + double itemrows; + + itemrows = expression_returns_set_rows(node); + if (tlist_rows < itemrows) + tlist_rows = itemrows; + } + + /* + * In addition to the cost of evaluating the tlist, charge cpu_tuple_cost + * per input row, and half of cpu_tuple_cost for each added output row. + * This is slightly bizarre maybe, but it's what 9.6 did; we may revisit + * this estimate later. + */ + pathnode->path.rows = subpath->rows * tlist_rows; + pathnode->path.startup_cost = subpath->startup_cost + + target->cost.startup; + pathnode->path.total_cost = subpath->total_cost + + target->cost.startup + + (cpu_tuple_cost + target->cost.per_tuple) * subpath->rows + + (pathnode->path.rows - subpath->rows) * cpu_tuple_cost / 2; + + return pathnode; +} + +/* * create_sort_path * Creates a pathnode that represents performing an explicit sort. * @@ -3999,10 +4182,9 @@ create_agg_path(PlannerInfo *root, * 'subpath' is the path representing the source of data * 'target' is the PathTarget to be computed * 'having_qual' is the HAVING quals if any - * 'rollup_lists' is a list of grouping sets - * 'rollup_groupclauses' is a list of grouping clauses for grouping sets + * 'rollups' is a list of RollupData nodes * 'agg_costs' contains cost info about the aggregate functions to be computed - * 'numGroups' is the estimated number of groups + * 'numGroups' is the estimated total number of groups */ GroupingSetsPath * create_groupingsets_path(PlannerInfo *root, @@ -4010,13 +4192,15 @@ create_groupingsets_path(PlannerInfo *root, Path *subpath, PathTarget *target, List *having_qual, - List *rollup_lists, - List *rollup_groupclauses, + AggStrategy aggstrategy, + List *rollups, const AggClauseCosts *agg_costs, double numGroups) { GroupingSetsPath *pathnode = makeNode(GroupingSetsPath); - int numGroupCols; + ListCell *lc; + bool is_first = true; + bool is_first_sort = true; /* The topmost generated Plan node will be an Agg */ pathnode->path.pathtype = T_Agg; @@ -4033,74 +4217,109 @@ create_groupingsets_path(PlannerInfo *root, pathnode->path.distribution = (Distribution *) copyObject(subpath->distribution); /* + * Simplify callers by downgrading AGG_SORTED to AGG_PLAIN, and AGG_MIXED + * to AGG_HASHED, here if possible. + */ + if (aggstrategy == AGG_SORTED && + list_length(rollups) == 1 && + ((RollupData *) linitial(rollups))->groupClause == NIL) + aggstrategy = AGG_PLAIN; + + if (aggstrategy == AGG_MIXED && + list_length(rollups) == 1) + aggstrategy = AGG_HASHED; + + /* * Output will be in sorted order by group_pathkeys if, and only if, there * is a single rollup operation on a non-empty list of grouping * expressions. */ - if (list_length(rollup_groupclauses) == 1 && - ((List *) linitial(rollup_groupclauses)) != NIL) + if (aggstrategy == AGG_SORTED && list_length(rollups) == 1) pathnode->path.pathkeys = root->group_pathkeys; else pathnode->path.pathkeys = NIL; - pathnode->rollup_groupclauses = rollup_groupclauses; - pathnode->rollup_lists = rollup_lists; + pathnode->aggstrategy = aggstrategy; + pathnode->rollups = rollups; pathnode->qual = having_qual; - Assert(rollup_lists != NIL); - Assert(list_length(rollup_lists) == list_length(rollup_groupclauses)); - - /* Account for cost of the topmost Agg node */ - numGroupCols = list_length((List *) linitial((List *) llast(rollup_lists))); - - cost_agg(&pathnode->path, root, - (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN, - agg_costs, - numGroupCols, - numGroups, - subpath->startup_cost, - subpath->total_cost, - subpath->rows); + Assert(rollups != NIL); + Assert(aggstrategy != AGG_PLAIN || list_length(rollups) == 1); + Assert(aggstrategy != AGG_MIXED || list_length(rollups) > 1); - /* - * Add in the costs and output rows of the additional sorting/aggregation - * steps, if any. Only total costs count, since the extra sorts aren't - * run on startup. - */ - if (list_length(rollup_lists) > 1) + foreach(lc, rollups) { - ListCell *lc; + RollupData *rollup = lfirst(lc); + List *gsets = rollup->gsets; + int numGroupCols = list_length(linitial(gsets)); - foreach(lc, rollup_lists) + /* + * In AGG_SORTED or AGG_PLAIN mode, the first rollup takes the + * (already-sorted) input, and following ones do their own sort. + * + * In AGG_HASHED mode, there is one rollup for each grouping set. + * + * In AGG_MIXED mode, the first rollups are hashed, the first + * non-hashed one takes the (already-sorted) input, and following ones + * do their own sort. + */ + if (is_first) + { + cost_agg(&pathnode->path, root, + aggstrategy, + agg_costs, + numGroupCols, + rollup->numGroups, + subpath->startup_cost, + subpath->total_cost, + subpath->rows); + is_first = false; + if (!rollup->is_hashed) + is_first_sort = false; + } + else { - List *gsets = (List *) lfirst(lc); Path sort_path; /* dummy for result of cost_sort */ Path agg_path; /* dummy for result of cost_agg */ - /* We must iterate over all but the last rollup_lists element */ - if (lnext(lc) == NULL) - break; - - /* Account for cost of sort, but don't charge input cost again */ - cost_sort(&sort_path, root, NIL, - 0.0, - subpath->rows, - subpath->pathtarget->width, - 0.0, - work_mem, - -1.0); - - /* Account for cost of aggregation */ - numGroupCols = list_length((List *) linitial(gsets)); - - cost_agg(&agg_path, root, - AGG_SORTED, - agg_costs, - numGroupCols, - numGroups, /* XXX surely not right for all steps? */ - sort_path.startup_cost, - sort_path.total_cost, - sort_path.rows); + if (rollup->is_hashed || is_first_sort) + { + /* + * Account for cost of aggregation, but don't charge input + * cost again + */ + cost_agg(&agg_path, root, + rollup->is_hashed ? AGG_HASHED : AGG_SORTED, + agg_costs, + numGroupCols, + rollup->numGroups, + 0.0, 0.0, + subpath->rows); + if (!rollup->is_hashed) + is_first_sort = false; + } + else + { + /* Account for cost of sort, but don't charge input cost again */ + cost_sort(&sort_path, root, NIL, + 0.0, + subpath->rows, + subpath->pathtarget->width, + 0.0, + work_mem, + -1.0); + + /* Account for cost of aggregation */ + + cost_agg(&agg_path, root, + AGG_SORTED, + agg_costs, + numGroupCols, + rollup->numGroups, + sort_path.startup_cost, + sort_path.total_cost, + sort_path.rows); + } pathnode->path.total_cost += agg_path.total_cost; pathnode->path.rows += agg_path.rows; @@ -4430,7 +4649,7 @@ create_lockrows_path(PlannerInfo *root, RelOptInfo *rel, ModifyTablePath * create_modifytable_path(PlannerInfo *root, RelOptInfo *rel, CmdType operation, bool canSetTag, - Index nominalRelation, + Index nominalRelation, List *partitioned_rels, List *resultRelations, List *subpaths, List *subroots, List *withCheckOptionLists, List *returningLists, @@ -4497,6 +4716,7 @@ create_modifytable_path(PlannerInfo *root, RelOptInfo *rel, pathnode->operation = operation; pathnode->canSetTag = canSetTag; pathnode->nominalRelation = nominalRelation; + pathnode->partitioned_rels = list_copy(partitioned_rels); pathnode->resultRelations = resultRelations; pathnode->subpaths = subpaths; pathnode->subroots = subroots; @@ -4658,7 +4878,7 @@ reparameterize_path(PlannerInfo *root, Path *path, memcpy(newpath, ipath, sizeof(IndexPath)); newpath->path.param_info = get_baserel_parampathinfo(root, rel, required_outer); - cost_index(newpath, root, loop_count); + cost_index(newpath, root, loop_count, false); return (Path *) newpath; } case T_BitmapHeapScan: @@ -4669,7 +4889,7 @@ reparameterize_path(PlannerInfo *root, Path *path, rel, bpath->bitmapqual, required_outer, - loop_count); + loop_count, 0); } case T_SubqueryScan: #ifdef XCP diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c index b210914b85..698a387ac2 100644 --- a/src/backend/optimizer/util/placeholder.c +++ b/src/backend/optimizer/util/placeholder.c @@ -4,7 +4,7 @@ * PlaceHolderVar and PlaceHolderInfo manipulation routines * * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 2b50919b10..aa8f6cf020 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -5,7 +5,7 @@ * * * Portions Copyright (c) 2012-2014, TransLattice, Inc. - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -28,7 +28,9 @@ #include "catalog/catalog.h" #include "catalog/dependency.h" #include "catalog/heap.h" +#include "catalog/partition.h" #include "catalog/pg_am.h" +#include "catalog/pg_statistic_ext.h" #include "foreign/fdwapi.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -40,8 +42,11 @@ #include "parser/parse_relation.h" #include "parser/parsetree.h" #include "rewrite/rewriteManip.h" +#include "statistics/statistics.h" #include "storage/bufmgr.h" +#include "utils/builtins.h" #include "utils/lsyscache.h" +#include "utils/syscache.h" #include "utils/rel.h" #include "utils/snapmgr.h" #ifdef PGXC @@ -56,7 +61,7 @@ get_relation_info_hook_type get_relation_info_hook = NULL; static void get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel, - Relation relation); + Relation relation, bool inhparent); static bool infer_collation_opclass_match(InferenceElem *elem, Relation idxRel, List *idxExprs); static int32 get_rel_data_width(Relation rel, int32 *attr_widths); @@ -65,7 +70,7 @@ static List *get_relation_constraints(PlannerInfo *root, bool include_notnull); static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index, Relation heapRelation); - +static List *get_relation_statistics(RelOptInfo *rel, Relation relation); /* * get_relation_info - @@ -77,10 +82,12 @@ static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index, * min_attr lowest valid AttrNumber * max_attr highest valid AttrNumber * indexlist list of IndexOptInfos for relation's indexes + * statlist list of StatisticExtInfo for relation's statistic objects * serverid if it's a foreign table, the server OID * fdwroutine if it's a foreign table, the FDW function pointers * pages number of pages * tuples number of tuples + * rel_parallel_workers user-defined number of parallel workers * * Also, add information about the relation's foreign keys to root->fkey_list. * @@ -242,6 +249,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, info->amoptionalkey = amroutine->amoptionalkey; info->amsearcharray = amroutine->amsearcharray; info->amsearchnulls = amroutine->amsearchnulls; + info->amcanparallel = amroutine->amcanparallel; info->amhasgettuple = (amroutine->amgettuple != NULL); info->amhasgetbitmap = (amroutine->amgetbitmap != NULL); info->amcostestimate = amroutine->amcostestimate; @@ -408,6 +416,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, rel->indexlist = indexinfos; + rel->statlist = get_relation_statistics(rel, relation); + /* Grab foreign-table info using the relcache, while we have it */ if (relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE) { @@ -421,7 +431,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, } /* Collect info about relation's foreign keys, if relevant */ - get_relation_foreign_keys(root, rel, relation); + get_relation_foreign_keys(root, rel, relation, inhparent); heap_close(relation, NoLock); @@ -446,7 +456,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, */ static void get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel, - Relation relation) + Relation relation, bool inhparent) { List *rtable = root->parse->rtable; List *cachedfkeys; @@ -462,6 +472,15 @@ get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel, return; /* + * If it's the parent of an inheritance tree, ignore its FKs. We could + * make useful FK-based deductions if we found that all members of the + * inheritance tree have equivalent FK constraints, but detecting that + * would require code that hasn't been written. + */ + if (inhparent) + return; + + /* * Extract data about relation's FKs from the relcache. Note that this * list belongs to the relcache and might disappear in a cache flush, so * we must not do any further catalog access within this function. @@ -501,6 +520,9 @@ get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel, if (rte->rtekind != RTE_RELATION || rte->relid != cachedfk->confrelid) continue; + /* Ignore if it's an inheritance parent; doesn't really match */ + if (rte->inh) + continue; /* Ignore self-referential FKs; we only care about joins */ if (rti == rel->relid) continue; @@ -1152,6 +1174,7 @@ get_relation_constraints(PlannerInfo *root, Index varno = rel->relid; Relation relation; TupleConstr *constr; + List *pcqual; /* * We assume the relation has already been safely locked. @@ -1237,11 +1260,100 @@ get_relation_constraints(PlannerInfo *root, } } + /* Append partition predicates, if any */ + pcqual = RelationGetPartitionQual(relation); + if (pcqual) + { + /* + * Run each expression through const-simplification and + * canonicalization similar to check constraints. + */ + pcqual = (List *) eval_const_expressions(root, (Node *) pcqual); + pcqual = (List *) canonicalize_qual((Expr *) pcqual); + + /* Fix Vars to have the desired varno */ + if (varno != 1) + ChangeVarNodes((Node *) pcqual, 1, varno, 0); + + result = list_concat(result, pcqual); + } + heap_close(relation, NoLock); return result; } +/* + * get_relation_statistics + * Retrieve extended statistics defined on the table. + * + * Returns a List (possibly empty) of StatisticExtInfo objects describing + * the statistics. Note that this doesn't load the actual statistics data, + * just the identifying metadata. Only stats actually built are considered. + */ +static List * +get_relation_statistics(RelOptInfo *rel, Relation relation) +{ + List *statoidlist; + List *stainfos = NIL; + ListCell *l; + + statoidlist = RelationGetStatExtList(relation); + + foreach(l, statoidlist) + { + Oid statOid = lfirst_oid(l); + Form_pg_statistic_ext staForm; + HeapTuple htup; + Bitmapset *keys = NULL; + int i; + + htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid)); + if (!htup) + elog(ERROR, "cache lookup failed for statistics object %u", statOid); + staForm = (Form_pg_statistic_ext) GETSTRUCT(htup); + + /* + * First, build the array of columns covered. This is ultimately + * wasted if no stats within the object have actually been built, but + * it doesn't seem worth troubling over that case. + */ + for (i = 0; i < staForm->stxkeys.dim1; i++) + keys = bms_add_member(keys, staForm->stxkeys.values[i]); + + /* add one StatisticExtInfo for each kind built */ + if (statext_is_kind_built(htup, STATS_EXT_NDISTINCT)) + { + StatisticExtInfo *info = makeNode(StatisticExtInfo); + + info->statOid = statOid; + info->rel = rel; + info->kind = STATS_EXT_NDISTINCT; + info->keys = bms_copy(keys); + + stainfos = lcons(info, stainfos); + } + + if (statext_is_kind_built(htup, STATS_EXT_DEPENDENCIES)) + { + StatisticExtInfo *info = makeNode(StatisticExtInfo); + + info->statOid = statOid; + info->rel = rel; + info->kind = STATS_EXT_DEPENDENCIES; + info->keys = bms_copy(keys); + + stainfos = lcons(info, stainfos); + } + + ReleaseSysCache(htup); + bms_free(keys); + } + + list_free(statoidlist); + + return stainfos; +} /* * relation_excluded_by_constraints @@ -1263,6 +1375,9 @@ relation_excluded_by_constraints(PlannerInfo *root, List *safe_constraints; ListCell *lc; + /* As of now, constraint exclusion works only with simple relations. */ + Assert(IS_SIMPLE_REL(rel)); + /* * Regardless of the setting of constraint_exclusion, detect * constant-FALSE-or-NULL restriction clauses. Because const-folding will @@ -1372,8 +1487,9 @@ relation_excluded_by_constraints(PlannerInfo *root, * dropped cols. * * We also support building a "physical" tlist for subqueries, functions, - * values lists, and CTEs, since the same optimization can occur in - * SubqueryScan, FunctionScan, ValuesScan, CteScan, and WorkTableScan nodes. + * values lists, table expressions, and CTEs, since the same optimization can + * occur in SubqueryScan, FunctionScan, ValuesScan, CteScan, TableFunc, + * NamedTuplestoreScan, and WorkTableScan nodes. */ List * build_physical_tlist(PlannerInfo *root, RelOptInfo *rel) @@ -1445,8 +1561,10 @@ build_physical_tlist(PlannerInfo *root, RelOptInfo *rel) break; case RTE_FUNCTION: + case RTE_TABLEFUNC: case RTE_VALUES: case RTE_CTE: + case RTE_NAMEDTUPLESTORE: /* Not all of these can have dropped cols, but share code anyway */ expandRTE(rte, varno, 0, -1, true /* include dropped */ , NULL, &colvars); diff --git a/src/backend/optimizer/util/predtest.c b/src/backend/optimizer/util/predtest.c index 2c2efb1576..c4a04cfa95 100644 --- a/src/backend/optimizer/util/predtest.c +++ b/src/backend/optimizer/util/predtest.c @@ -4,7 +4,7 @@ * Routines to attempt to prove logical implications between predicate * expressions. * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -1596,7 +1596,7 @@ operator_predicate_proof(Expr *predicate, Node *clause, bool refute_it) /* And execute it. */ test_result = ExecEvalExprSwitchContext(test_exprstate, GetPerTupleExprContext(estate), - &isNull, NULL); + &isNull); /* Get back to outer memory context */ MemoryContextSwitchTo(oldcontext); diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index bdc8a5134c..3ab5ceb7d3 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -3,7 +3,7 @@ * relnode.c * Relation-node lookup/construction routines * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -14,6 +14,8 @@ */ #include "postgres.h" +#include <limits.h> + #include "miscadmin.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" @@ -50,6 +52,9 @@ static List *subbuild_joinrel_restrictlist(RelOptInfo *joinrel, static List *subbuild_joinrel_joinlist(RelOptInfo *joinrel, List *joininfo_list, List *new_joininfo); +static void set_foreign_rel_properties(RelOptInfo *joinrel, + RelOptInfo *outer_rel, RelOptInfo *inner_rel); +static void add_join_rel(PlannerInfo *root, RelOptInfo *joinrel); /* @@ -86,7 +91,7 @@ setup_simple_rel_arrays(PlannerInfo *root) * Construct a new RelOptInfo for a base relation or 'other' relation. */ RelOptInfo * -build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind) +build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) { RelOptInfo *rel; RangeTblEntry *rte; @@ -101,7 +106,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind) Assert(rte != NULL); rel = makeNode(RelOptInfo); - rel->reloptkind = reloptkind; + rel->reloptkind = parent ? RELOPT_OTHER_MEMBER_REL : RELOPT_BASEREL; rel->relids = bms_make_singleton(relid); rel->rows = 0; /* cheap startup cost is interesting iff not all tuples to be retrieved */ @@ -124,23 +129,43 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind) rel->lateral_vars = NIL; rel->lateral_referencers = NULL; rel->indexlist = NIL; + rel->statlist = NIL; rel->pages = 0; rel->tuples = 0; rel->allvisfrac = 0; rel->subroot = NULL; rel->subplan_params = NIL; - rel->rel_parallel_workers = -1; /* set up in GetRelationInfo */ + rel->rel_parallel_workers = -1; /* set up in get_relation_info */ rel->serverid = InvalidOid; rel->userid = rte->checkAsUser; rel->useridiscurrent = false; rel->fdwroutine = NULL; rel->fdw_private = NULL; + rel->unique_for_rels = NIL; + rel->non_unique_for_rels = NIL; rel->baserestrictinfo = NIL; rel->baserestrictcost.startup = 0; rel->baserestrictcost.per_tuple = 0; + rel->baserestrict_min_security = UINT_MAX; rel->joininfo = NIL; rel->has_eclass_joins = false; + /* + * Pass top parent's relids down the inheritance hierarchy. If the parent + * has top_parent_relids set, it's a direct or an indirect child of the + * top parent indicated by top_parent_relids. By extension this child is + * also an indirect child of that parent. + */ + if (parent) + { + if (parent->top_parent_relids) + rel->top_parent_relids = parent->top_parent_relids; + else + rel->top_parent_relids = bms_copy(parent->relids); + } + else + rel->top_parent_relids = NULL; + /* Check type of rtable entry */ switch (rte->rtekind) { @@ -150,12 +175,14 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind) break; case RTE_SUBQUERY: case RTE_FUNCTION: + case RTE_TABLEFUNC: case RTE_VALUES: case RTE_CTE: + case RTE_NAMEDTUPLESTORE: /* - * Subquery, function, or values list --- set up attr range and - * arrays + * Subquery, function, tablefunc, or values list --- set up attr + * range and arrays * * Note: 0 is included in range to support whole-row Vars */ @@ -176,6 +203,16 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind) root->simple_rel_array[relid] = rel; /* + * This is a convenient spot at which to note whether rels participating + * in the query have any securityQuals attached. If so, increase + * root->qual_security_level to ensure it's larger than the maximum + * security level needed for securityQuals. + */ + if (rte->securityQuals) + root->qual_security_level = Max(root->qual_security_level, + list_length(rte->securityQuals)); + + /* * If this rel is an appendrel parent, recurse to build "other rel" * RelOptInfos for its children. They are "other rels" because they are * not in the main join tree, but we will need RelOptInfos to plan access @@ -194,7 +231,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind) continue; (void) build_simple_rel(root, appinfo->child_relid, - RELOPT_OTHER_MEMBER_REL); + rel); } } @@ -317,6 +354,82 @@ find_join_rel(PlannerInfo *root, Relids relids) } /* + * set_foreign_rel_properties + * Set up foreign-join fields if outer and inner relation are foreign + * tables (or joins) belonging to the same server and assigned to the same + * user to check access permissions as. + * + * In addition to an exact match of userid, we allow the case where one side + * has zero userid (implying current user) and the other side has explicit + * userid that happens to equal the current user; but in that case, pushdown of + * the join is only valid for the current user. The useridiscurrent field + * records whether we had to make such an assumption for this join or any + * sub-join. + * + * Otherwise these fields are left invalid, so GetForeignJoinPaths will not be + * called for the join relation. + * + */ +static void +set_foreign_rel_properties(RelOptInfo *joinrel, RelOptInfo *outer_rel, + RelOptInfo *inner_rel) +{ + if (OidIsValid(outer_rel->serverid) && + inner_rel->serverid == outer_rel->serverid) + { + if (inner_rel->userid == outer_rel->userid) + { + joinrel->serverid = outer_rel->serverid; + joinrel->userid = outer_rel->userid; + joinrel->useridiscurrent = outer_rel->useridiscurrent || inner_rel->useridiscurrent; + joinrel->fdwroutine = outer_rel->fdwroutine; + } + else if (!OidIsValid(inner_rel->userid) && + outer_rel->userid == GetUserId()) + { + joinrel->serverid = outer_rel->serverid; + joinrel->userid = outer_rel->userid; + joinrel->useridiscurrent = true; + joinrel->fdwroutine = outer_rel->fdwroutine; + } + else if (!OidIsValid(outer_rel->userid) && + inner_rel->userid == GetUserId()) + { + joinrel->serverid = outer_rel->serverid; + joinrel->userid = inner_rel->userid; + joinrel->useridiscurrent = true; + joinrel->fdwroutine = outer_rel->fdwroutine; + } + } +} + +/* + * add_join_rel + * Add given join relation to the list of join relations in the given + * PlannerInfo. Also add it to the auxiliary hashtable if there is one. + */ +static void +add_join_rel(PlannerInfo *root, RelOptInfo *joinrel) +{ + /* GEQO requires us to append the new joinrel to the end of the list! */ + root->join_rel_list = lappend(root->join_rel_list, joinrel); + + /* store it into the auxiliary hashtable if there is one. */ + if (root->join_rel_hash) + { + JoinHashEntry *hentry; + bool found; + + hentry = (JoinHashEntry *) hash_search(root->join_rel_hash, + &(joinrel->relids), + HASH_ENTER, + &found); + Assert(!found); + hentry->join_rel = joinrel; + } +} + +/* * build_join_rel * Returns relation entry corresponding to the union of two given rels, * creating a new relation entry if none already exists. @@ -396,6 +509,7 @@ build_join_rel(PlannerInfo *root, joinrel->lateral_vars = NIL; joinrel->lateral_referencers = NULL; joinrel->indexlist = NIL; + joinrel->statlist = NIL; joinrel->pages = 0; joinrel->tuples = 0; joinrel->allvisfrac = 0; @@ -407,52 +521,18 @@ build_join_rel(PlannerInfo *root, joinrel->useridiscurrent = false; joinrel->fdwroutine = NULL; joinrel->fdw_private = NULL; + joinrel->unique_for_rels = NIL; + joinrel->non_unique_for_rels = NIL; joinrel->baserestrictinfo = NIL; joinrel->baserestrictcost.startup = 0; joinrel->baserestrictcost.per_tuple = 0; + joinrel->baserestrict_min_security = UINT_MAX; joinrel->joininfo = NIL; joinrel->has_eclass_joins = false; + joinrel->top_parent_relids = NULL; - /* - * Set up foreign-join fields if outer and inner relation are foreign - * tables (or joins) belonging to the same server and assigned to the same - * user to check access permissions as. In addition to an exact match of - * userid, we allow the case where one side has zero userid (implying - * current user) and the other side has explicit userid that happens to - * equal the current user; but in that case, pushdown of the join is only - * valid for the current user. The useridiscurrent field records whether - * we had to make such an assumption for this join or any sub-join. - * - * Otherwise these fields are left invalid, so GetForeignJoinPaths will - * not be called for the join relation. - */ - if (OidIsValid(outer_rel->serverid) && - inner_rel->serverid == outer_rel->serverid) - { - if (inner_rel->userid == outer_rel->userid) - { - joinrel->serverid = outer_rel->serverid; - joinrel->userid = outer_rel->userid; - joinrel->useridiscurrent = outer_rel->useridiscurrent || inner_rel->useridiscurrent; - joinrel->fdwroutine = outer_rel->fdwroutine; - } - else if (!OidIsValid(inner_rel->userid) && - outer_rel->userid == GetUserId()) - { - joinrel->serverid = outer_rel->serverid; - joinrel->userid = outer_rel->userid; - joinrel->useridiscurrent = true; - joinrel->fdwroutine = outer_rel->fdwroutine; - } - else if (!OidIsValid(outer_rel->userid) && - inner_rel->userid == GetUserId()) - { - joinrel->serverid = outer_rel->serverid; - joinrel->userid = inner_rel->userid; - joinrel->useridiscurrent = true; - joinrel->fdwroutine = outer_rel->fdwroutine; - } - } + /* Compute information relevant to the foreign relations. */ + set_foreign_rel_properties(joinrel, outer_rel, inner_rel); /* * Create a new tlist containing just the vars that need to be output from @@ -516,29 +596,12 @@ build_join_rel(PlannerInfo *root, * here. */ if (inner_rel->consider_parallel && outer_rel->consider_parallel && - !has_parallel_hazard((Node *) restrictlist, false) && - !has_parallel_hazard((Node *) joinrel->reltarget->exprs, false)) + is_parallel_safe(root, (Node *) restrictlist) && + is_parallel_safe(root, (Node *) joinrel->reltarget->exprs)) joinrel->consider_parallel = true; - /* - * Add the joinrel to the query's joinrel list, and store it into the - * auxiliary hashtable if there is one. NB: GEQO requires us to append - * the new joinrel to the end of the list! - */ - root->join_rel_list = lappend(root->join_rel_list, joinrel); - - if (root->join_rel_hash) - { - JoinHashEntry *hentry; - bool found; - - hentry = (JoinHashEntry *) hash_search(root->join_rel_hash, - &(joinrel->relids), - HASH_ENTER, - &found); - Assert(!found); - hentry->join_rel = joinrel; - } + /* Add the joinrel to the PlannerInfo. */ + add_join_rel(root, joinrel); /* * Also, if dynamic-programming join search is active, add the new joinrel @@ -929,32 +992,6 @@ find_childrel_appendrelinfo(PlannerInfo *root, RelOptInfo *rel) /* - * find_childrel_top_parent - * Fetch the topmost appendrel parent rel of an appendrel child rel. - * - * Since appendrels can be nested, a child could have multiple levels of - * appendrel ancestors. This function locates the topmost ancestor, - * which will be a regular baserel not an otherrel. - */ -RelOptInfo * -find_childrel_top_parent(PlannerInfo *root, RelOptInfo *rel) -{ - do - { - AppendRelInfo *appinfo = find_childrel_appendrelinfo(root, rel); - Index prelid = appinfo->parent_relid; - - /* traverse up to the parent rel, loop if it's also a child rel */ - rel = find_base_rel(root, prelid); - } while (rel->reloptkind == RELOPT_OTHER_MEMBER_REL); - - Assert(rel->reloptkind == RELOPT_BASEREL); - - return rel; -} - - -/* * find_childrel_parents * Compute the set of parent relids of an appendrel child rel. * @@ -967,6 +1004,8 @@ find_childrel_parents(PlannerInfo *root, RelOptInfo *rel) { Relids result = NULL; + Assert(rel->reloptkind == RELOPT_OTHER_MEMBER_REL); + do { AppendRelInfo *appinfo = find_childrel_appendrelinfo(root, rel); diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c index 7fc81e7aa3..e946290af5 100644 --- a/src/backend/optimizer/util/restrictinfo.c +++ b/src/backend/optimizer/util/restrictinfo.c @@ -3,7 +3,7 @@ * restrictinfo.c * RestrictInfo node manipulation routines. * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -24,6 +24,7 @@ static RestrictInfo *make_restrictinfo_internal(Expr *clause, bool is_pushed_down, bool outerjoin_delayed, bool pseudoconstant, + Index security_level, Relids required_relids, Relids outer_relids, Relids nullable_relids); @@ -31,6 +32,7 @@ static Expr *make_sub_restrictinfos(Expr *clause, bool is_pushed_down, bool outerjoin_delayed, bool pseudoconstant, + Index security_level, Relids required_relids, Relids outer_relids, Relids nullable_relids); @@ -43,7 +45,7 @@ static Expr *make_sub_restrictinfos(Expr *clause, * * The is_pushed_down, outerjoin_delayed, and pseudoconstant flags for the * RestrictInfo must be supplied by the caller, as well as the correct values - * for outer_relids and nullable_relids. + * for security_level, outer_relids, and nullable_relids. * required_relids can be NULL, in which case it defaults to the actual clause * contents (i.e., clause_relids). * @@ -56,6 +58,7 @@ make_restrictinfo(Expr *clause, bool is_pushed_down, bool outerjoin_delayed, bool pseudoconstant, + Index security_level, Relids required_relids, Relids outer_relids, Relids nullable_relids) @@ -69,6 +72,7 @@ make_restrictinfo(Expr *clause, is_pushed_down, outerjoin_delayed, pseudoconstant, + security_level, required_relids, outer_relids, nullable_relids); @@ -81,65 +85,13 @@ make_restrictinfo(Expr *clause, is_pushed_down, outerjoin_delayed, pseudoconstant, + security_level, required_relids, outer_relids, nullable_relids); } /* - * make_restrictinfos_from_actual_clauses - * - * Given a list of implicitly-ANDed restriction clauses, produce a list - * of RestrictInfo nodes. This is used to reconstitute the RestrictInfo - * representation after doing transformations of a list of clauses. - * - * We assume that the clauses are relation-level restrictions and therefore - * we don't have to worry about is_pushed_down, outerjoin_delayed, - * outer_relids, and nullable_relids (these can be assumed true, false, - * NULL, and NULL, respectively). - * We do take care to recognize pseudoconstant clauses properly. - */ -List * -make_restrictinfos_from_actual_clauses(PlannerInfo *root, - List *clause_list) -{ - List *result = NIL; - ListCell *l; - - foreach(l, clause_list) - { - Expr *clause = (Expr *) lfirst(l); - bool pseudoconstant; - RestrictInfo *rinfo; - - /* - * It's pseudoconstant if it contains no Vars and no volatile - * functions. We probably can't see any sublinks here, so - * contain_var_clause() would likely be enough, but for safety use - * contain_vars_of_level() instead. - */ - pseudoconstant = - !contain_vars_of_level((Node *) clause, 0) && - !contain_volatile_functions((Node *) clause); - if (pseudoconstant) - { - /* tell createplan.c to check for gating quals */ - root->hasPseudoConstantQuals = true; - } - - rinfo = make_restrictinfo(clause, - true, - false, - pseudoconstant, - NULL, - NULL, - NULL); - result = lappend(result, rinfo); - } - return result; -} - -/* * make_restrictinfo_internal * * Common code for the main entry points and the recursive cases. @@ -150,6 +102,7 @@ make_restrictinfo_internal(Expr *clause, bool is_pushed_down, bool outerjoin_delayed, bool pseudoconstant, + Index security_level, Relids required_relids, Relids outer_relids, Relids nullable_relids) @@ -162,10 +115,21 @@ make_restrictinfo_internal(Expr *clause, restrictinfo->outerjoin_delayed = outerjoin_delayed; restrictinfo->pseudoconstant = pseudoconstant; restrictinfo->can_join = false; /* may get set below */ + restrictinfo->security_level = security_level; restrictinfo->outer_relids = outer_relids; restrictinfo->nullable_relids = nullable_relids; /* + * If it's potentially delayable by lower-level security quals, figure out + * whether it's leakproof. We can skip testing this for level-zero quals, + * since they would never get delayed on security grounds anyway. + */ + if (security_level > 0) + restrictinfo->leakproof = !contain_leaked_vars((Node *) clause); + else + restrictinfo->leakproof = false; /* really, "don't know" */ + + /* * If it's a binary opclause, set up left/right relids info. In any case * set up the total clause relids info. */ @@ -250,7 +214,7 @@ make_restrictinfo_internal(Expr *clause, * * The same is_pushed_down, outerjoin_delayed, and pseudoconstant flag * values can be applied to all RestrictInfo nodes in the result. Likewise - * for outer_relids and nullable_relids. + * for security_level, outer_relids, and nullable_relids. * * The given required_relids are attached to our top-level output, * but any OR-clause constituents are allowed to default to just the @@ -261,6 +225,7 @@ make_sub_restrictinfos(Expr *clause, bool is_pushed_down, bool outerjoin_delayed, bool pseudoconstant, + Index security_level, Relids required_relids, Relids outer_relids, Relids nullable_relids) @@ -276,6 +241,7 @@ make_sub_restrictinfos(Expr *clause, is_pushed_down, outerjoin_delayed, pseudoconstant, + security_level, NULL, outer_relids, nullable_relids)); @@ -284,6 +250,7 @@ make_sub_restrictinfos(Expr *clause, is_pushed_down, outerjoin_delayed, pseudoconstant, + security_level, required_relids, outer_relids, nullable_relids); @@ -299,6 +266,7 @@ make_sub_restrictinfos(Expr *clause, is_pushed_down, outerjoin_delayed, pseudoconstant, + security_level, required_relids, outer_relids, nullable_relids)); @@ -310,6 +278,7 @@ make_sub_restrictinfos(Expr *clause, is_pushed_down, outerjoin_delayed, pseudoconstant, + security_level, required_relids, outer_relids, nullable_relids); @@ -330,51 +299,45 @@ restriction_is_or_clause(RestrictInfo *restrictinfo) } /* - * get_actual_clauses + * restriction_is_securely_promotable * - * Returns a list containing the bare clauses from 'restrictinfo_list'. - * - * This is only to be used in cases where none of the RestrictInfos can - * be pseudoconstant clauses (for instance, it's OK on indexqual lists). + * Returns true if it's okay to evaluate this clause "early", that is before + * other restriction clauses attached to the specified relation. */ -List * -get_actual_clauses(List *restrictinfo_list) +bool +restriction_is_securely_promotable(RestrictInfo *restrictinfo, + RelOptInfo *rel) { - List *result = NIL; - ListCell *l; - - foreach(l, restrictinfo_list) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - - Assert(IsA(rinfo, RestrictInfo)); - - Assert(!rinfo->pseudoconstant); - - result = lappend(result, rinfo->clause); - } - return result; + /* + * It's okay if there are no baserestrictinfo clauses for the rel that + * would need to go before this one, *or* if this one is leakproof. + */ + if (restrictinfo->security_level <= rel->baserestrict_min_security || + restrictinfo->leakproof) + return true; + else + return false; } /* - * get_all_actual_clauses + * get_actual_clauses * * Returns a list containing the bare clauses from 'restrictinfo_list'. * - * This loses the distinction between regular and pseudoconstant clauses, - * so be careful what you use it for. + * This is only to be used in cases where none of the RestrictInfos can + * be pseudoconstant clauses (for instance, it's OK on indexqual lists). */ List * -get_all_actual_clauses(List *restrictinfo_list) +get_actual_clauses(List *restrictinfo_list) { List *result = NIL; ListCell *l; foreach(l, restrictinfo_list) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); - Assert(IsA(rinfo, RestrictInfo)); + Assert(!rinfo->pseudoconstant); result = lappend(result, rinfo->clause); } @@ -396,9 +359,7 @@ extract_actual_clauses(List *restrictinfo_list, foreach(l, restrictinfo_list) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - - Assert(IsA(rinfo, RestrictInfo)); + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); if (rinfo->pseudoconstant == pseudoconstant) result = lappend(result, rinfo->clause); @@ -428,9 +389,7 @@ extract_actual_join_clauses(List *restrictinfo_list, foreach(l, restrictinfo_list) { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - - Assert(IsA(rinfo, RestrictInfo)); + RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); if (rinfo->is_pushed_down) { @@ -550,7 +509,7 @@ join_clause_is_movable_into(RestrictInfo *rinfo, Relids currentrelids, Relids current_and_outer) { - /* Clause must be evaluatable given available context */ + /* Clause must be evaluable given available context */ if (!bms_is_subset(rinfo->clause_relids, current_and_outer)) return false; diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index 68096b309c..09523853d0 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -3,7 +3,7 @@ * tlist.c * Target list manipulation routines * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * @@ -16,9 +16,31 @@ #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "optimizer/cost.h" #include "optimizer/tlist.h" +/* Test if an expression node represents a SRF call. Beware multiple eval! */ +#define IS_SRF_CALL(node) \ + ((IsA(node, FuncExpr) && ((FuncExpr *) (node))->funcretset) || \ + (IsA(node, OpExpr) && ((OpExpr *) (node))->opretset)) + +/* Workspace for split_pathtarget_walker */ +typedef struct +{ + List *input_target_exprs; /* exprs available from input */ + List *level_srfs; /* list of lists of SRF exprs */ + List *level_input_vars; /* vars needed by SRFs of each level */ + List *level_input_srfs; /* SRFs needed by SRFs of each level */ + List *current_input_vars; /* vars needed in current subexpr */ + List *current_input_srfs; /* SRFs needed in current subexpr */ + int current_depth; /* max SRF depth in current subexpr */ +} split_pathtarget_context; + +static bool split_pathtarget_walker(Node *node, + split_pathtarget_context *context); + + /***************************************************************************** * Target list creation and searching utilities *****************************************************************************/ @@ -29,7 +51,7 @@ * equal() to the given expression. Result is NULL if no such member. */ TargetEntry * -tlist_member(Node *node, List *targetlist) +tlist_member(Expr *node, List *targetlist) { ListCell *temp; @@ -50,12 +72,12 @@ tlist_member(Node *node, List *targetlist) * involving binary-compatible sort operations. */ TargetEntry * -tlist_member_ignore_relabel(Node *node, List *targetlist) +tlist_member_ignore_relabel(Expr *node, List *targetlist) { ListCell *temp; while (node && IsA(node, RelabelType)) - node = (Node *) ((RelabelType *) node)->arg; + node = ((RelabelType *) node)->arg; foreach(temp, targetlist) { @@ -117,7 +139,7 @@ add_to_flat_tlist(List *tlist, List *exprs) foreach(lc, exprs) { - Node *expr = (Node *) lfirst(lc); + Expr *expr = (Expr *) lfirst(lc); if (!tlist_member(expr, tlist)) { @@ -740,7 +762,7 @@ apply_pathtarget_labeling_to_tlist(List *tlist, PathTarget *target) if (expr && IsA(expr, Var)) tle = tlist_member_match_var((Var *) expr, tlist); else - tle = tlist_member((Node *) expr, tlist); + tle = tlist_member(expr, tlist); /* * Complain if noplace for the sortgrouprefs label, or if we'd @@ -759,3 +781,344 @@ apply_pathtarget_labeling_to_tlist(List *tlist, PathTarget *target) i++; } } + +/* + * split_pathtarget_at_srfs + * Split given PathTarget into multiple levels to position SRFs safely + * + * The executor can only handle set-returning functions that appear at the + * top level of the targetlist of a ProjectSet plan node. If we have any SRFs + * that are not at top level, we need to split up the evaluation into multiple + * plan levels in which each level satisfies this constraint. This function + * creates appropriate PathTarget(s) for each level. + * + * As an example, consider the tlist expression + * x + srf1(srf2(y + z)) + * This expression should appear as-is in the top PathTarget, but below that + * we must have a PathTarget containing + * x, srf1(srf2(y + z)) + * and below that, another PathTarget containing + * x, srf2(y + z) + * and below that, another PathTarget containing + * x, y, z + * When these tlists are processed by setrefs.c, subexpressions that match + * output expressions of the next lower tlist will be replaced by Vars, + * so that what the executor gets are tlists looking like + * Var1 + Var2 + * Var1, srf1(Var2) + * Var1, srf2(Var2 + Var3) + * x, y, z + * which satisfy the desired property. + * + * Another example is + * srf1(x), srf2(srf3(y)) + * That must appear as-is in the top PathTarget, but below that we need + * srf1(x), srf3(y) + * That is, each SRF must be computed at a level corresponding to the nesting + * depth of SRFs within its arguments. + * + * In some cases, a SRF has already been evaluated in some previous plan level + * and we shouldn't expand it again (that is, what we see in the target is + * already meant as a reference to a lower subexpression). So, don't expand + * any tlist expressions that appear in input_target, if that's not NULL. + * + * The outputs of this function are two parallel lists, one a list of + * PathTargets and the other an integer list of bool flags indicating + * whether the corresponding PathTarget contains any evaluatable SRFs. + * The lists are given in the order they'd need to be evaluated in, with + * the "lowest" PathTarget first. So the last list entry is always the + * originally given PathTarget, and any entries before it indicate evaluation + * levels that must be inserted below it. The first list entry must not + * contain any SRFs (other than ones duplicating input_target entries), since + * it will typically be attached to a plan node that cannot evaluate SRFs. + * + * Note: using a list for the flags may seem like overkill, since there + * are only a few possible patterns for which levels contain SRFs. + * But this representation decouples callers from that knowledge. + */ +void +split_pathtarget_at_srfs(PlannerInfo *root, + PathTarget *target, PathTarget *input_target, + List **targets, List **targets_contain_srfs) +{ + split_pathtarget_context context; + int max_depth; + bool need_extra_projection; + List *prev_level_tlist; + ListCell *lc, + *lc1, + *lc2, + *lc3; + + /* + * It's not unusual for planner.c to pass us two physically identical + * targets, in which case we can conclude without further ado that all + * expressions are available from the input. (The logic below would + * arrive at the same conclusion, but much more tediously.) + */ + if (target == input_target) + { + *targets = list_make1(target); + *targets_contain_srfs = list_make1_int(false); + return; + } + + /* Pass any input_target exprs down to split_pathtarget_walker() */ + context.input_target_exprs = input_target ? input_target->exprs : NIL; + + /* + * Initialize with empty level-zero lists, and no levels after that. + * (Note: we could dispense with representing level zero explicitly, since + * it will never receive any SRFs, but then we'd have to special-case that + * level when we get to building result PathTargets. Level zero describes + * the SRF-free PathTarget that will be given to the input plan node.) + */ + context.level_srfs = list_make1(NIL); + context.level_input_vars = list_make1(NIL); + context.level_input_srfs = list_make1(NIL); + + /* Initialize data we'll accumulate across all the target expressions */ + context.current_input_vars = NIL; + context.current_input_srfs = NIL; + max_depth = 0; + need_extra_projection = false; + + /* Scan each expression in the PathTarget looking for SRFs */ + foreach(lc, target->exprs) + { + Node *node = (Node *) lfirst(lc); + + /* + * Find all SRFs and Vars (and Var-like nodes) in this expression, and + * enter them into appropriate lists within the context struct. + */ + context.current_depth = 0; + split_pathtarget_walker(node, &context); + + /* An expression containing no SRFs is of no further interest */ + if (context.current_depth == 0) + continue; + + /* + * Track max SRF nesting depth over the whole PathTarget. Also, if + * this expression establishes a new max depth, we no longer care + * whether previous expressions contained nested SRFs; we can handle + * any required projection for them in the final ProjectSet node. + */ + if (max_depth < context.current_depth) + { + max_depth = context.current_depth; + need_extra_projection = false; + } + + /* + * If any maximum-depth SRF is not at the top level of its expression, + * we'll need an extra Result node to compute the top-level scalar + * expression. + */ + if (max_depth == context.current_depth && !IS_SRF_CALL(node)) + need_extra_projection = true; + } + + /* + * If we found no SRFs needing evaluation (maybe they were all present in + * input_target, or maybe they were all removed by const-simplification), + * then no ProjectSet is needed; fall out. + */ + if (max_depth == 0) + { + *targets = list_make1(target); + *targets_contain_srfs = list_make1_int(false); + return; + } + + /* + * The Vars and SRF outputs needed at top level can be added to the last + * level_input lists if we don't need an extra projection step. If we do + * need one, add a SRF-free level to the lists. + */ + if (need_extra_projection) + { + context.level_srfs = lappend(context.level_srfs, NIL); + context.level_input_vars = lappend(context.level_input_vars, + context.current_input_vars); + context.level_input_srfs = lappend(context.level_input_srfs, + context.current_input_srfs); + } + else + { + lc = list_nth_cell(context.level_input_vars, max_depth); + lfirst(lc) = list_concat(lfirst(lc), context.current_input_vars); + lc = list_nth_cell(context.level_input_srfs, max_depth); + lfirst(lc) = list_concat(lfirst(lc), context.current_input_srfs); + } + + /* + * Now construct the output PathTargets. The original target can be used + * as-is for the last one, but we need to construct a new SRF-free target + * representing what the preceding plan node has to emit, as well as a + * target for each intermediate ProjectSet node. + */ + *targets = *targets_contain_srfs = NIL; + prev_level_tlist = NIL; + + forthree(lc1, context.level_srfs, + lc2, context.level_input_vars, + lc3, context.level_input_srfs) + { + List *level_srfs = (List *) lfirst(lc1); + PathTarget *ntarget; + + if (lnext(lc1) == NULL) + { + ntarget = target; + } + else + { + ntarget = create_empty_pathtarget(); + + /* + * This target should actually evaluate any SRFs of the current + * level, and it needs to propagate forward any Vars needed by + * later levels, as well as SRFs computed earlier and needed by + * later levels. We rely on add_new_columns_to_pathtarget() to + * remove duplicate items. Also, for safety, make a separate copy + * of each item for each PathTarget. + */ + add_new_columns_to_pathtarget(ntarget, copyObject(level_srfs)); + for_each_cell(lc, lnext(lc2)) + { + List *input_vars = (List *) lfirst(lc); + + add_new_columns_to_pathtarget(ntarget, copyObject(input_vars)); + } + for_each_cell(lc, lnext(lc3)) + { + List *input_srfs = (List *) lfirst(lc); + ListCell *lcx; + + foreach(lcx, input_srfs) + { + Expr *srf = (Expr *) lfirst(lcx); + + if (list_member(prev_level_tlist, srf)) + add_new_column_to_pathtarget(ntarget, copyObject(srf)); + } + } + set_pathtarget_cost_width(root, ntarget); + } + + /* + * Add current target and does-it-compute-SRFs flag to output lists. + */ + *targets = lappend(*targets, ntarget); + *targets_contain_srfs = lappend_int(*targets_contain_srfs, + (level_srfs != NIL)); + + /* Remember this level's output for next pass */ + prev_level_tlist = ntarget->exprs; + } +} + +/* + * Recursively examine expressions for split_pathtarget_at_srfs. + * + * Note we make no effort here to prevent duplicate entries in the output + * lists. Duplicates will be gotten rid of later. + */ +static bool +split_pathtarget_walker(Node *node, split_pathtarget_context *context) +{ + if (node == NULL) + return false; + + /* + * A subexpression that matches an expression already computed in + * input_target can be treated like a Var (which indeed it will be after + * setrefs.c gets done with it), even if it's actually a SRF. Record it + * as being needed for the current expression, and ignore any + * substructure. + */ + if (list_member(context->input_target_exprs, node)) + { + context->current_input_vars = lappend(context->current_input_vars, + node); + return false; + } + + /* + * Vars and Var-like constructs are expected to be gotten from the input, + * too. We assume that these constructs cannot contain any SRFs (if one + * does, there will be an executor failure from a misplaced SRF). + */ + if (IsA(node, Var) || + IsA(node, PlaceHolderVar) || + IsA(node, Aggref) || + IsA(node, GroupingFunc) || + IsA(node, WindowFunc)) + { + context->current_input_vars = lappend(context->current_input_vars, + node); + return false; + } + + /* + * If it's a SRF, recursively examine its inputs, determine its level, and + * make appropriate entries in the output lists. + */ + if (IS_SRF_CALL(node)) + { + List *save_input_vars = context->current_input_vars; + List *save_input_srfs = context->current_input_srfs; + int save_current_depth = context->current_depth; + int srf_depth; + ListCell *lc; + + context->current_input_vars = NIL; + context->current_input_srfs = NIL; + context->current_depth = 0; + + (void) expression_tree_walker(node, split_pathtarget_walker, + (void *) context); + + /* Depth is one more than any SRF below it */ + srf_depth = context->current_depth + 1; + + /* If new record depth, initialize another level of output lists */ + if (srf_depth >= list_length(context->level_srfs)) + { + context->level_srfs = lappend(context->level_srfs, NIL); + context->level_input_vars = lappend(context->level_input_vars, NIL); + context->level_input_srfs = lappend(context->level_input_srfs, NIL); + } + + /* Record this SRF as needing to be evaluated at appropriate level */ + lc = list_nth_cell(context->level_srfs, srf_depth); + lfirst(lc) = lappend(lfirst(lc), node); + + /* Record its inputs as being needed at the same level */ + lc = list_nth_cell(context->level_input_vars, srf_depth); + lfirst(lc) = list_concat(lfirst(lc), context->current_input_vars); + lc = list_nth_cell(context->level_input_srfs, srf_depth); + lfirst(lc) = list_concat(lfirst(lc), context->current_input_srfs); + + /* + * Restore caller-level state and update it for presence of this SRF. + * Notice we report the SRF itself as being needed for evaluation of + * surrounding expression. + */ + context->current_input_vars = save_input_vars; + context->current_input_srfs = lappend(save_input_srfs, node); + context->current_depth = Max(save_current_depth, srf_depth); + + /* We're done here */ + return false; + } + + /* + * Otherwise, the node is a scalar (non-set) expression, so recurse to + * examine its inputs. + */ + return expression_tree_walker(node, split_pathtarget_walker, + (void *) context); +} diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c index 292e1f4aac..cf326ae003 100644 --- a/src/backend/optimizer/util/var.c +++ b/src/backend/optimizer/util/var.c @@ -9,7 +9,7 @@ * contains variables. * * - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * |