* planagg.c
* Special planning for aggregate queries.
*
+ * This module tries to replace MIN/MAX aggregate functions by subqueries
+ * of the form
+ * (SELECT col FROM tab WHERE ... ORDER BY col ASC/DESC LIMIT 1)
+ * Given a suitable index on tab.col, this can be much faster than the
+ * generic scan-all-the-rows aggregation plan. We can handle multiple
+ * MIN/MAX aggregates by generating multiple subqueries, and their
+ * orderings can be different. However, if the query contains any
+ * non-optimizable aggregates, there's no point since we'll have to
+ * scan all the rows anyway.
+ *
+ *
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/planmain.h"
-#include "optimizer/predtest.h"
+#include "optimizer/prep.h"
+#include "optimizer/restrictinfo.h"
#include "optimizer/subselect.h"
-#include "parser/parse_clause.h"
#include "parser/parsetree.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
+/* Per-aggregate info during optimize_minmax_aggregates() */
typedef struct
{
- Oid aggfnoid; /* pg_proc Oid of the aggregate */
- Oid aggsortop; /* Oid of its sort operator */
- Expr *target; /* expression we are aggregating on */
- NullTest *notnulltest; /* expression for "target IS NOT NULL" */
- IndexPath *path; /* access path for index scan */
+ MinMaxAggInfo *mminfo; /* info gathered by preprocessing */
+ Path *path; /* access path for ordered scan */
Cost pathcost; /* estimated cost to fetch first row */
- bool nulls_first; /* null ordering direction matching index */
Param *param; /* param for subplan's output */
-} MinMaxAggInfo;
+} PrivateMMAggInfo;
static bool find_minmax_aggs_walker(Node *node, List **context);
-static bool build_minmax_path(PlannerInfo *root, RelOptInfo *rel,
- MinMaxAggInfo *info);
-static ScanDirection match_agg_to_index_col(MinMaxAggInfo *info,
- IndexOptInfo *index, int indexcol);
-static void make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info);
-static void attach_notnull_index_qual(MinMaxAggInfo *info, IndexScan *iplan);
+static PrivateMMAggInfo *find_minmax_path(PlannerInfo *root, RelOptInfo *rel,
+ MinMaxAggInfo *mminfo);
+static bool path_usable_for_agg(Path *path);
+static void make_agg_subplan(PlannerInfo *root, RelOptInfo *rel,
+ PrivateMMAggInfo *info);
+static void add_notnull_qual(PlannerInfo *root, RelOptInfo *rel,
+ PrivateMMAggInfo *info, Path *path);
static Node *replace_aggs_with_params_mutator(Node *node, List **context);
static Oid fetch_agg_sort_op(Oid aggfnoid);
/*
- * optimize_minmax_aggregates - check for optimizing MIN/MAX via indexes
+ * preprocess_minmax_aggregates - preprocess MIN/MAX aggregates
*
- * This checks to see if we can replace MIN/MAX aggregate functions by
- * subqueries of the form
- * (SELECT col FROM tab WHERE ... ORDER BY col ASC/DESC LIMIT 1)
- * Given a suitable index on tab.col, this can be much faster than the
- * generic scan-all-the-rows plan.
+ * Check to see whether the query contains MIN/MAX aggregate functions that
+ * might be optimizable via indexscans. If it does, and all the aggregates
+ * are potentially optimizable, then set up root->minmax_aggs with a list of
+ * these aggregates.
*
- * We are passed the preprocessed tlist, and the best path
- * devised for computing the input of a standard Agg node. If we are able
- * to optimize all the aggregates, and the result is estimated to be cheaper
- * than the generic aggregate method, then generate and return a Plan that
- * does it that way. Otherwise, return NULL.
+ * Note: we are passed the preprocessed targetlist separately, because it's
+ * not necessarily equal to root->parse->targetList.
*/
-Plan *
-optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path)
+void
+preprocess_minmax_aggregates(PlannerInfo *root, List *tlist)
{
Query *parse = root->parse;
FromExpr *jtnode;
RangeTblRef *rtr;
RangeTblEntry *rte;
- RelOptInfo *rel;
List *aggs_list;
- ListCell *l;
- Cost total_cost;
- Path agg_p;
- Plan *plan;
- Node *hqual;
- QualCost tlist_cost;
+ ListCell *lc;
+
+ /* minmax_aggs list should be empty at this point */
+ Assert(root->minmax_aggs == NIL);
/* Nothing to do if query has no aggregates */
if (!parse->hasAggs)
- return NULL;
+ return;
Assert(!parse->setOperations); /* shouldn't get here if a setop */
Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */
* so there's not much point in optimizing MIN/MAX.
*/
if (parse->groupClause || parse->hasWindowFuncs)
- return NULL;
+ return;
/*
* We also restrict the query to reference exactly one table, since join
* conditions can't be handled reasonably. (We could perhaps handle a
* query containing cartesian-product joins, but it hardly seems worth the
* trouble.) However, the single real table could be buried in several
- * levels of FromExpr.
+ * levels of FromExpr due to subqueries. Note the single table could be
+ * an inheritance parent, too.
*/
jtnode = parse->jointree;
while (IsA(jtnode, FromExpr))
{
if (list_length(jtnode->fromlist) != 1)
- return NULL;
+ return;
jtnode = linitial(jtnode->fromlist);
}
if (!IsA(jtnode, RangeTblRef))
- return NULL;
+ return;
rtr = (RangeTblRef *) jtnode;
rte = planner_rt_fetch(rtr->rtindex, root);
- if (rte->rtekind != RTE_RELATION || rte->inh)
- return NULL;
- rel = find_base_rel(root, rtr->rtindex);
+ if (rte->rtekind != RTE_RELATION)
+ return;
/*
- * Since this optimization is not applicable all that often, we want to
- * fall out before doing very much work if possible. Therefore we do the
- * work in several passes. The first pass scans the tlist and HAVING qual
- * to find all the aggregates and verify that each of them is a MIN/MAX
- * aggregate. If that succeeds, the second pass looks at each aggregate
- * to see if it is optimizable; if so we make an IndexPath describing how
- * we would scan it. (We do not try to optimize if only some aggs are
- * optimizable, since that means we'll have to scan all the rows anyway.)
- * If that succeeds, we have enough info to compare costs against the
- * generic implementation. Only if that test passes do we build a Plan.
+ * Scan the tlist and HAVING qual to find all the aggregates and verify
+ * all are MIN/MAX aggregates. Stop as soon as we find one that isn't.
*/
-
- /* Pass 1: find all the aggregates */
aggs_list = NIL;
if (find_minmax_aggs_walker((Node *) tlist, &aggs_list))
- return NULL;
+ return;
if (find_minmax_aggs_walker(parse->havingQual, &aggs_list))
+ return;
+
+ /*
+ * OK, there is at least the possibility of performing the optimization.
+ * Build pathkeys (and thereby EquivalenceClasses) for each aggregate.
+ * The existence of the EquivalenceClasses will prompt the path generation
+ * logic to try to build paths matching the desired sort ordering(s).
+ *
+ * Note: the pathkeys are non-canonical at this point. They'll be fixed
+ * later by canonicalize_all_pathkeys().
+ */
+ foreach(lc, aggs_list)
+ {
+ MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
+
+ mminfo->pathkeys = make_pathkeys_for_aggregate(root,
+ mminfo->target,
+ mminfo->aggsortop);
+ }
+
+ /*
+ * We're done until path generation is complete. Save info for later.
+ */
+ root->minmax_aggs = aggs_list;
+}
+
+/*
+ * optimize_minmax_aggregates - check for optimizing MIN/MAX via indexes
+ *
+ * Check to see whether all the aggregates are in fact optimizable into
+ * indexscans. If so, and the result is estimated to be cheaper than the
+ * generic aggregate method, then generate and return a Plan that does it
+ * that way. Otherwise, return NULL.
+ *
+ * We are passed the preprocessed tlist, as well as the best path devised for
+ * computing the input of a standard Agg node.
+ */
+Plan *
+optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path)
+{
+ Query *parse = root->parse;
+ FromExpr *jtnode;
+ RangeTblRef *rtr;
+ RelOptInfo *rel;
+ List *aggs_list;
+ ListCell *lc;
+ Cost total_cost;
+ Path agg_p;
+ Plan *plan;
+ Node *hqual;
+ QualCost tlist_cost;
+
+ /* Nothing to do if preprocess_minmax_aggs rejected the query */
+ if (root->minmax_aggs == NIL)
return NULL;
- /* Pass 2: see if each one is optimizable */
+ /* Re-locate the one real table identified by preprocess_minmax_aggs */
+ jtnode = parse->jointree;
+ while (IsA(jtnode, FromExpr))
+ {
+ Assert(list_length(jtnode->fromlist) == 1);
+ jtnode = linitial(jtnode->fromlist);
+ }
+ Assert(IsA(jtnode, RangeTblRef));
+ rtr = (RangeTblRef *) jtnode;
+ rel = find_base_rel(root, rtr->rtindex);
+
+ /*
+ * Examine each agg to see if we can find a suitable ordered path for it.
+ * Give up if any agg isn't indexable.
+ */
+ aggs_list = NIL;
total_cost = 0;
- foreach(l, aggs_list)
+ foreach(lc, root->minmax_aggs)
{
- MinMaxAggInfo *info = (MinMaxAggInfo *) lfirst(l);
+ MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
+ PrivateMMAggInfo *info;
- if (!build_minmax_path(root, rel, info))
+ info = find_minmax_path(root, rel, mminfo);
+ if (!info)
return NULL;
+ aggs_list = lappend(aggs_list, info);
total_cost += info->pathcost;
}
/*
- * Make the cost comparison.
+ * Now we have enough info to compare costs against the generic aggregate
+ * implementation.
*
* Note that we don't include evaluation cost of the tlist here; this is
* OK since it isn't included in best_path's cost either, and should be
/*
* OK, we are going to generate an optimized plan.
+ *
+ * First, generate a subplan and output Param node for each agg.
*/
-
- /* Pass 3: generate subplans and output Param nodes */
- foreach(l, aggs_list)
+ foreach(lc, aggs_list)
{
- make_agg_subplan(root, (MinMaxAggInfo *) lfirst(l));
+ make_agg_subplan(root, rel, (PrivateMMAggInfo *) lfirst(lc));
}
/*
Aggref *aggref = (Aggref *) node;
Oid aggsortop;
TargetEntry *curTarget;
- MinMaxAggInfo *info;
+ MinMaxAggInfo *mminfo;
ListCell *l;
Assert(aggref->agglevelsup == 0);
if (list_length(aggref->args) != 1 || aggref->aggorder != NIL)
return true; /* it couldn't be MIN/MAX */
/* note: we do not care if DISTINCT is mentioned ... */
+ curTarget = (TargetEntry *) linitial(aggref->args);
aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
if (!OidIsValid(aggsortop))
return true; /* not a MIN/MAX aggregate */
+ if (contain_mutable_functions((Node *) curTarget->expr))
+ return true; /* not potentially indexable */
+
+ if (type_is_rowtype(exprType((Node *) curTarget->expr)))
+ return true; /* IS NOT NULL would have weird semantics */
+
/*
* Check whether it's already in the list, and add it if not.
*/
- curTarget = (TargetEntry *) linitial(aggref->args);
foreach(l, *context)
{
- info = (MinMaxAggInfo *) lfirst(l);
- if (info->aggfnoid == aggref->aggfnoid &&
- equal(info->target, curTarget->expr))
+ mminfo = (MinMaxAggInfo *) lfirst(l);
+ if (mminfo->aggfnoid == aggref->aggfnoid &&
+ equal(mminfo->target, curTarget->expr))
return false;
}
- info = (MinMaxAggInfo *) palloc0(sizeof(MinMaxAggInfo));
- info->aggfnoid = aggref->aggfnoid;
- info->aggsortop = aggsortop;
- info->target = curTarget->expr;
+ mminfo = makeNode(MinMaxAggInfo);
+ mminfo->aggfnoid = aggref->aggfnoid;
+ mminfo->aggsortop = aggsortop;
+ mminfo->target = curTarget->expr;
+ mminfo->pathkeys = NIL; /* don't compute pathkeys yet */
- *context = lappend(*context, info);
+ *context = lappend(*context, mminfo);
/*
* We need not recurse into the argument, since it can't contain any
}
/*
- * build_minmax_path
- * Given a MIN/MAX aggregate, try to find an index it can be optimized
- * with. Build a Path describing the best such index path.
- *
- * Returns TRUE if successful, FALSE if not. In the TRUE case, info->path
- * is filled in.
+ * find_minmax_path
+ * Given a MIN/MAX aggregate, try to find an ordered Path it can be
+ * optimized with.
*
- * XXX look at sharing more code with indxpath.c.
- *
- * Note: check_partial_indexes() must have been run previously.
+ * If successful, build and return a PrivateMMAggInfo struct. Otherwise,
+ * return NULL.
*/
-static bool
-build_minmax_path(PlannerInfo *root, RelOptInfo *rel, MinMaxAggInfo *info)
+static PrivateMMAggInfo *
+find_minmax_path(PlannerInfo *root, RelOptInfo *rel, MinMaxAggInfo *mminfo)
{
- IndexPath *best_path = NULL;
+ PrivateMMAggInfo *info;
+ Path *best_path = NULL;
Cost best_cost = 0;
- bool best_nulls_first = false;
- NullTest *ntest;
- List *allquals;
- ListCell *l;
-
- /* Build "target IS NOT NULL" expression for use below */
- ntest = makeNode(NullTest);
- ntest->nulltesttype = IS_NOT_NULL;
- ntest->arg = copyObject(info->target);
- ntest->argisrow = type_is_rowtype(exprType((Node *) ntest->arg));
- if (ntest->argisrow)
- return false; /* punt on composites */
- info->notnulltest = ntest;
+ double path_fraction;
+ PathKey *mmpathkey;
+ ListCell *lc;
/*
- * Build list of existing restriction clauses plus the notnull test. We
- * cheat a bit by not bothering with a RestrictInfo node for the notnull
- * test --- predicate_implied_by() won't care.
+ * Punt if the aggregate's pathkey turned out to be redundant, ie its
+ * pathkeys list is now empty. This would happen with something like
+ * "SELECT max(x) ... WHERE x = constant". There's no need to try to
+ * optimize such a case, because if there is an index that would help,
+ * it should already have been used with the WHERE clause.
*/
- allquals = list_concat(list_make1(ntest), rel->baserestrictinfo);
+ if (mminfo->pathkeys == NIL)
+ return NULL;
- foreach(l, rel->indexlist)
- {
- IndexOptInfo *index = (IndexOptInfo *) lfirst(l);
- ScanDirection indexscandir = NoMovementScanDirection;
- int indexcol;
- int prevcol;
- List *restrictclauses;
- IndexPath *new_path;
- Cost new_cost;
- bool found_clause;
+ /*
+ * Search the paths that were generated for the rel to see if there are
+ * any with the desired ordering. There could be multiple such paths,
+ * in which case take the cheapest (as measured according to how fast it
+ * will be to fetch the first row).
+ *
+ * We can't use pathkeys_contained_in() to check the ordering, because we
+ * would like to match pathkeys regardless of the nulls_first setting.
+ * However, we know that MIN/MAX aggregates will have at most one item in
+ * their pathkeys, so it's not too complicated to match by brute force.
+ *
+ * Note: this test ignores the possible costs associated with skipping
+ * NULL tuples. We assume that adding the not-null criterion to the
+ * indexqual doesn't really cost anything.
+ */
+ if (rel->rows > 1.0)
+ path_fraction = 1.0 / rel->rows;
+ else
+ path_fraction = 1.0;
- /* Ignore non-btree indexes */
- if (index->relam != BTREE_AM_OID)
- continue;
+ Assert(list_length(mminfo->pathkeys) == 1);
+ mmpathkey = (PathKey *) linitial(mminfo->pathkeys);
- /*
- * Ignore partial indexes that do not match the query --- unless their
- * predicates can be proven from the baserestrict list plus the IS NOT
- * NULL test. In that case we can use them.
- */
- if (index->indpred != NIL && !index->predOK &&
- !predicate_implied_by(index->indpred, allquals))
- continue;
+ foreach(lc, rel->pathlist)
+ {
+ Path *path = (Path *) lfirst(lc);
+ PathKey *pathkey;
+ Cost path_cost;
- /*
- * Look for a match to one of the index columns. (In a stupidly
- * designed index, there could be multiple matches, but we only care
- * about the first one.)
- */
- for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
- {
- indexscandir = match_agg_to_index_col(info, index, indexcol);
- if (!ScanDirectionIsNoMovement(indexscandir))
- break;
- }
- if (ScanDirectionIsNoMovement(indexscandir))
- continue;
+ if (path->pathkeys == NIL)
+ continue; /* unordered path */
+ pathkey = (PathKey *) linitial(path->pathkeys);
- /*
- * If the match is not at the first index column, we have to verify
- * that there are "x = something" restrictions on all the earlier
- * index columns. Since we'll need the restrictclauses list anyway to
- * build the path, it's convenient to extract that first and then look
- * through it for the equality restrictions.
- */
- restrictclauses = group_clauses_by_indexkey(index,
- index->rel->baserestrictinfo,
- NIL,
- NULL,
- SAOP_FORBID,
- &found_clause);
-
- if (list_length(restrictclauses) < indexcol)
- continue; /* definitely haven't got enough */
- for (prevcol = 0; prevcol < indexcol; prevcol++)
+ if (mmpathkey->pk_eclass == pathkey->pk_eclass &&
+ mmpathkey->pk_opfamily == pathkey->pk_opfamily &&
+ mmpathkey->pk_strategy == pathkey->pk_strategy)
{
- List *rinfos = (List *) list_nth(restrictclauses, prevcol);
- ListCell *ll;
-
- foreach(ll, rinfos)
+ /*
+ * OK, it has the right ordering; is it acceptable otherwise?
+ * (We test in this order because the pathkey check is cheap.)
+ */
+ if (path_usable_for_agg(path))
{
- RestrictInfo *rinfo = (RestrictInfo *) lfirst(ll);
- int strategy;
-
- /* Could be an IS_NULL test, if so ignore */
- if (!is_opclause(rinfo->clause))
- continue;
- strategy =
- get_op_opfamily_strategy(((OpExpr *) rinfo->clause)->opno,
- index->opfamily[prevcol]);
- if (strategy == BTEqualStrategyNumber)
- break;
+ /*
+ * It'll work; but is it the cheapest?
+ *
+ * Note: cost calculation here should match
+ * compare_fractional_path_costs().
+ */
+ path_cost = path->startup_cost +
+ path_fraction * (path->total_cost - path->startup_cost);
+
+ if (best_path == NULL || path_cost < best_cost)
+ {
+ best_path = path;
+ best_cost = path_cost;
+ }
}
- if (ll == NULL)
- break; /* none are Equal for this index col */
- }
- if (prevcol < indexcol)
- continue; /* didn't find all Equal clauses */
-
- /*
- * Build the access path. We don't bother marking it with pathkeys.
- */
- new_path = create_index_path(root, index,
- restrictclauses,
- NIL,
- indexscandir,
- NULL);
-
- /*
- * Estimate actual cost of fetching just one row.
- */
- if (new_path->rows > 1.0)
- new_cost = new_path->path.startup_cost +
- (new_path->path.total_cost - new_path->path.startup_cost)
- * 1.0 / new_path->rows;
- else
- new_cost = new_path->path.total_cost;
-
- /*
- * Keep if first or if cheaper than previous best.
- */
- if (best_path == NULL || new_cost < best_cost)
- {
- best_path = new_path;
- best_cost = new_cost;
- if (ScanDirectionIsForward(indexscandir))
- best_nulls_first = index->nulls_first[indexcol];
- else
- best_nulls_first = !index->nulls_first[indexcol];
}
}
+ /* Fail if no suitable path */
+ if (best_path == NULL)
+ return NULL;
+
+ /* Construct private state for further processing */
+ info = (PrivateMMAggInfo *) palloc(sizeof(PrivateMMAggInfo));
+ info->mminfo = mminfo;
info->path = best_path;
info->pathcost = best_cost;
- info->nulls_first = best_nulls_first;
- return (best_path != NULL);
+ info->param = NULL; /* will be set later */
+
+ return info;
}
/*
- * match_agg_to_index_col
- * Does an aggregate match an index column?
- *
- * It matches if its argument is equal to the index column's data and its
- * sortop is either the forward or reverse sort operator for the column.
- *
- * We return ForwardScanDirection if match the forward sort operator,
- * BackwardScanDirection if match the reverse sort operator,
- * and NoMovementScanDirection if there's no match.
+ * To be usable, a Path needs to be an IndexPath on a btree index, or be a
+ * MergeAppendPath of such IndexPaths. This restriction is mainly because
+ * we need to be sure the index can handle an added NOT NULL constraint at
+ * minimal additional cost. If you wish to relax it, you'll need to improve
+ * add_notnull_qual() too.
*/
-static ScanDirection
-match_agg_to_index_col(MinMaxAggInfo *info, IndexOptInfo *index, int indexcol)
+static bool
+path_usable_for_agg(Path *path)
{
- ScanDirection result;
-
- /* Check for operator match first (cheaper) */
- if (info->aggsortop == index->fwdsortop[indexcol])
- result = ForwardScanDirection;
- else if (info->aggsortop == index->revsortop[indexcol])
- result = BackwardScanDirection;
- else
- return NoMovementScanDirection;
+ if (IsA(path, IndexPath))
+ {
+ IndexPath *ipath = (IndexPath *) path;
- /* Check for data match */
- if (!match_index_to_operand((Node *) info->target, indexcol, index))
- return NoMovementScanDirection;
+ /* OK if it's a btree index */
+ if (ipath->indexinfo->relam == BTREE_AM_OID)
+ return true;
+ }
+ else if (IsA(path, MergeAppendPath))
+ {
+ MergeAppendPath *mpath = (MergeAppendPath *) path;
+ ListCell *lc;
- return result;
+ foreach(lc, mpath->subpaths)
+ {
+ if (!path_usable_for_agg((Path *) lfirst(lc)))
+ return false;
+ }
+ return true;
+ }
+ return false;
}
/*
* Construct a suitable plan for a converted aggregate query
*/
static void
-make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info)
+make_agg_subplan(PlannerInfo *root, RelOptInfo *rel, PrivateMMAggInfo *info)
{
PlannerInfo subroot;
Query *subparse;
Plan *plan;
- IndexScan *iplan;
TargetEntry *tle;
- SortGroupClause *sortcl;
/*
* Generate a suitably modified query. Much of the work here is probably
subparse->groupClause = NIL;
subparse->havingQual = NULL;
subparse->distinctClause = NIL;
+ subparse->sortClause = NIL;
subroot.hasHavingQual = false;
/* single tlist entry that is the aggregate target */
- tle = makeTargetEntry(copyObject(info->target),
+ tle = makeTargetEntry(copyObject(info->mminfo->target),
1,
pstrdup("agg_target"),
false);
subparse->targetList = list_make1(tle);
- /* set up the appropriate ORDER BY entry */
- sortcl = makeNode(SortGroupClause);
- sortcl->tleSortGroupRef = assignSortGroupRef(tle, subparse->targetList);
- sortcl->eqop = get_equality_op_for_ordering_op(info->aggsortop, NULL);
- if (!OidIsValid(sortcl->eqop)) /* shouldn't happen */
- elog(ERROR, "could not find equality operator for ordering operator %u",
- info->aggsortop);
- sortcl->sortop = info->aggsortop;
- sortcl->nulls_first = info->nulls_first;
- sortcl->hashable = false; /* no need to make this accurate */
- subparse->sortClause = list_make1(sortcl);
-
- /* set up LIMIT 1 */
+ /* set up expressions for LIMIT 1 */
subparse->limitOffset = NULL;
subparse->limitCount = (Node *) makeConst(INT8OID, -1, sizeof(int64),
Int64GetDatum(1), false,
FLOAT8PASSBYVAL);
/*
- * Generate the plan for the subquery. We already have a Path for the
- * basic indexscan, but we have to convert it to a Plan and attach a LIMIT
- * node above it.
- *
- * Also we must add a "WHERE target IS NOT NULL" restriction to the
- * indexscan, to be sure we don't return a NULL, which'd be contrary to
- * the standard behavior of MIN/MAX.
- *
- * The NOT NULL qual has to go on the actual indexscan; create_plan might
- * have stuck a gating Result atop that, if there were any pseudoconstant
- * quals.
+ * Modify the ordered Path to add an indexed "target IS NOT NULL"
+ * condition to each scan. We need this to ensure we don't return a NULL,
+ * which'd be contrary to the standard behavior of MIN/MAX. We insist on
+ * it being indexed, else the Path might not be as cheap as we thought.
*/
- plan = create_plan(&subroot, (Path *) info->path);
-
- plan->targetlist = copyObject(subparse->targetList);
+ add_notnull_qual(root, rel, info, info->path);
- if (IsA(plan, Result))
- iplan = (IndexScan *) plan->lefttree;
- else
- iplan = (IndexScan *) plan;
- if (!IsA(iplan, IndexScan))
- elog(ERROR, "result of create_plan(IndexPath) isn't an IndexScan");
+ /*
+ * Generate the plan for the subquery. We already have a Path, but we have
+ * to convert it to a Plan and attach a LIMIT node above it.
+ */
+ plan = create_plan(&subroot, info->path);
- attach_notnull_index_qual(info, iplan);
+ plan->targetlist = subparse->targetList;
plan = (Plan *) make_limit(plan,
subparse->limitOffset,
}
/*
- * Add "target IS NOT NULL" to the quals of the given indexscan.
- *
- * This is trickier than it sounds because the new qual has to be added at an
- * appropriate place in the qual list, to preserve the list's ordering by
- * index column position.
+ * Attach a suitable NOT NULL qual to the IndexPath, or each of the member
+ * IndexPaths. Note we assume we can modify the paths in-place.
*/
static void
-attach_notnull_index_qual(MinMaxAggInfo *info, IndexScan *iplan)
+add_notnull_qual(PlannerInfo *root, RelOptInfo *rel, PrivateMMAggInfo *info,
+ Path *path)
{
- NullTest *ntest;
- List *newindexqual;
- List *newindexqualorig;
- bool done;
- ListCell *lc1;
- ListCell *lc2;
- Expr *leftop;
- AttrNumber targetattno;
-
- /*
- * We can skip adding the NOT NULL qual if it duplicates either an
- * already-given WHERE condition, or a clause of the index predicate.
- */
- if (list_member(iplan->indexqualorig, info->notnulltest) ||
- list_member(info->path->indexinfo->indpred, info->notnulltest))
- return;
-
- /* Need a "fixed" copy as well as the original */
- ntest = copyObject(info->notnulltest);
- ntest->arg = (Expr *) fix_indexqual_operand((Node *) ntest->arg,
- info->path->indexinfo);
-
- /* Identify the target index column from the "fixed" copy */
- leftop = ntest->arg;
-
- if (leftop && IsA(leftop, RelabelType))
- leftop = ((RelabelType *) leftop)->arg;
-
- Assert(leftop != NULL);
-
- if (!IsA(leftop, Var))
- elog(ERROR, "NullTest indexqual has wrong key");
-
- targetattno = ((Var *) leftop)->varattno;
-
- /*
- * list.c doesn't expose a primitive to insert a list cell at an arbitrary
- * position, so our strategy is to copy the lists and insert the null test
- * when we reach an appropriate spot.
- */
- newindexqual = newindexqualorig = NIL;
- done = false;
-
- forboth(lc1, iplan->indexqual, lc2, iplan->indexqualorig)
+ if (IsA(path, IndexPath))
{
- Expr *qual = (Expr *) lfirst(lc1);
- Expr *qualorig = (Expr *) lfirst(lc2);
- AttrNumber varattno;
+ IndexPath *ipath = (IndexPath *) path;
+ Expr *target;
+ NullTest *ntest;
+ RestrictInfo *rinfo;
+ List *newquals;
+ bool found_clause;
/*
- * Identify which index column this qual is for. This code should
- * match the qual disassembly code in ExecIndexBuildScanKeys.
+ * If we are looking at a child of the original rel, we have to adjust
+ * the agg target expression to match the child.
*/
- if (IsA(qual, OpExpr))
+ if (ipath->path.parent != rel)
{
- /* indexkey op expression */
- leftop = (Expr *) get_leftop(qual);
-
- if (leftop && IsA(leftop, RelabelType))
- leftop = ((RelabelType *) leftop)->arg;
+ AppendRelInfo *appinfo = NULL;
+ ListCell *lc;
- Assert(leftop != NULL);
-
- if (!IsA(leftop, Var))
- elog(ERROR, "indexqual doesn't have key on left side");
-
- varattno = ((Var *) leftop)->varattno;
+ /* Search for the appropriate AppendRelInfo */
+ foreach(lc, root->append_rel_list)
+ {
+ appinfo = (AppendRelInfo *) lfirst(lc);
+ if (appinfo->parent_relid == rel->relid &&
+ appinfo->child_relid == ipath->path.parent->relid)
+ break;
+ appinfo = NULL;
+ }
+ if (!appinfo)
+ elog(ERROR, "failed to find AppendRelInfo for child rel");
+ target = (Expr *)
+ adjust_appendrel_attrs((Node *) info->mminfo->target,
+ appinfo);
}
- else if (IsA(qual, RowCompareExpr))
+ else
{
- /* (indexkey, indexkey, ...) op (expression, expression, ...) */
- RowCompareExpr *rc = (RowCompareExpr *) qual;
-
- /*
- * Examine just the first column of the rowcompare, which is what
- * determines its placement in the overall qual list.
- */
- leftop = (Expr *) linitial(rc->largs);
-
- if (leftop && IsA(leftop, RelabelType))
- leftop = ((RelabelType *) leftop)->arg;
-
- Assert(leftop != NULL);
-
- if (!IsA(leftop, Var))
- elog(ERROR, "indexqual doesn't have key on left side");
-
- varattno = ((Var *) leftop)->varattno;
+ /* Otherwise, just make a copy (may not be necessary) */
+ target = copyObject(info->mminfo->target);
}
- else if (IsA(qual, ScalarArrayOpExpr))
- {
- /* indexkey op ANY (array-expression) */
- ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) qual;
- leftop = (Expr *) linitial(saop->args);
+ /* Build "target IS NOT NULL" expression */
+ ntest = makeNode(NullTest);
+ ntest->nulltesttype = IS_NOT_NULL;
+ ntest->arg = target;
+ /* we checked it wasn't a rowtype in find_minmax_aggs_walker */
+ ntest->argisrow = false;
- if (leftop && IsA(leftop, RelabelType))
- leftop = ((RelabelType *) leftop)->arg;
-
- Assert(leftop != NULL);
-
- if (!IsA(leftop, Var))
- elog(ERROR, "indexqual doesn't have key on left side");
+ /*
+ * We can skip adding the NOT NULL qual if it duplicates either an
+ * already-given index condition, or a clause of the index predicate.
+ */
+ if (list_member(get_actual_clauses(ipath->indexquals), ntest) ||
+ list_member(ipath->indexinfo->indpred, ntest))
+ return;
- varattno = ((Var *) leftop)->varattno;
- }
- else if (IsA(qual, NullTest))
- {
- /* indexkey IS NULL or indexkey IS NOT NULL */
- NullTest *ntest = (NullTest *) qual;
+ /* Wrap it in a RestrictInfo and prepend to existing indexquals */
+ rinfo = make_restrictinfo((Expr *) ntest,
+ true,
+ false,
+ false,
+ NULL,
+ NULL);
- leftop = ntest->arg;
+ newquals = list_concat(list_make1(rinfo), ipath->indexquals);
- if (leftop && IsA(leftop, RelabelType))
- leftop = ((RelabelType *) leftop)->arg;
+ /*
+ * We can't just stick the IS NOT NULL at the front of the list,
+ * though. It has to go in the right position corresponding to its
+ * index column, which might not be the first one. Easiest way to fix
+ * this is to run the quals through group_clauses_by_indexkey again.
+ */
+ newquals = group_clauses_by_indexkey(ipath->indexinfo,
+ newquals,
+ NIL,
+ NULL,
+ SAOP_FORBID,
+ &found_clause);
- Assert(leftop != NULL);
+ newquals = flatten_clausegroups_list(newquals);
- if (!IsA(leftop, Var))
- elog(ERROR, "NullTest indexqual has wrong key");
+ /* Trouble if we lost any quals */
+ if (list_length(newquals) != list_length(ipath->indexquals) + 1)
+ elog(ERROR, "add_notnull_qual failed to add NOT NULL qual");
- varattno = ((Var *) leftop)->varattno;
- }
- else
- {
- elog(ERROR, "unsupported indexqual type: %d",
- (int) nodeTag(qual));
- varattno = 0; /* keep compiler quiet */
- }
+ /*
+ * And update the path's indexquals. Note we don't bother adding
+ * to indexclauses, which is OK since this is like a generated
+ * index qual.
+ */
+ ipath->indexquals = newquals;
+ }
+ else if (IsA(path, MergeAppendPath))
+ {
+ MergeAppendPath *mpath = (MergeAppendPath *) path;
+ ListCell *lc;
- /* Insert the null test at the first place it can legally go */
- if (!done && targetattno <= varattno)
+ foreach(lc, mpath->subpaths)
{
- newindexqual = lappend(newindexqual, ntest);
- newindexqualorig = lappend(newindexqualorig, info->notnulltest);
- done = true;
+ add_notnull_qual(root, rel, info, (Path *) lfirst(lc));
}
-
- newindexqual = lappend(newindexqual, qual);
- newindexqualorig = lappend(newindexqualorig, qualorig);
}
-
- /* Add the null test at the end if it must follow all existing quals */
- if (!done)
+ else
{
- newindexqual = lappend(newindexqual, ntest);
- newindexqualorig = lappend(newindexqualorig, info->notnulltest);
+ /* shouldn't get here, because of path_usable_for_agg checks */
+ elog(ERROR, "add_notnull_qual failed");
}
-
- iplan->indexqual = newindexqual;
- iplan->indexqualorig = newindexqualorig;
}
/*
foreach(l, *context)
{
- MinMaxAggInfo *info = (MinMaxAggInfo *) lfirst(l);
+ PrivateMMAggInfo *info = (PrivateMMAggInfo *) lfirst(l);
- if (info->aggfnoid == aggref->aggfnoid &&
- equal(info->target, curTarget->expr))
+ if (info->mminfo->aggfnoid == aggref->aggfnoid &&
+ equal(info->mminfo->target, curTarget->expr))
return (Node *) info->param;
}
- elog(ERROR, "failed to re-find aggregate info record");
+ elog(ERROR, "failed to re-find PrivateMMAggInfo record");
}
Assert(!IsA(node, SubLink));
return expression_tree_mutator(node, replace_aggs_with_params_mutator,
(1 row)
--
--- Test several cases that should be optimized into indexscans instead of
--- the generic aggregate implementation. We can't actually verify that they
--- are done as indexscans, but we can check that the results are correct.
+-- Test cases that should be optimized into indexscans instead of
+-- the generic aggregate implementation.
--
+analyze tenk1; -- ensure we get consistent plans here
-- Basic cases
+explain (costs off)
+ select min(unique1) from tenk1;
+ QUERY PLAN
+-------------------------------------------------------
+ Result
+ InitPlan 1 (returns $0)
+ -> Limit
+ -> Index Scan using tenk1_unique1 on tenk1
+ Index Cond: (unique1 IS NOT NULL)
+(5 rows)
+
+select min(unique1) from tenk1;
+ min
+-----
+ 0
+(1 row)
+
+explain (costs off)
+ select max(unique1) from tenk1;
+ QUERY PLAN
+----------------------------------------------------------------
+ Result
+ InitPlan 1 (returns $0)
+ -> Limit
+ -> Index Scan Backward using tenk1_unique1 on tenk1
+ Index Cond: (unique1 IS NOT NULL)
+(5 rows)
+
select max(unique1) from tenk1;
max
------
9999
(1 row)
+explain (costs off)
+ select max(unique1) from tenk1 where unique1 < 42;
+ QUERY PLAN
+------------------------------------------------------------------------
+ Result
+ InitPlan 1 (returns $0)
+ -> Limit
+ -> Index Scan Backward using tenk1_unique1 on tenk1
+ Index Cond: ((unique1 IS NOT NULL) AND (unique1 < 42))
+(5 rows)
+
select max(unique1) from tenk1 where unique1 < 42;
max
-----
41
(1 row)
+explain (costs off)
+ select max(unique1) from tenk1 where unique1 > 42;
+ QUERY PLAN
+------------------------------------------------------------------------
+ Result
+ InitPlan 1 (returns $0)
+ -> Limit
+ -> Index Scan Backward using tenk1_unique1 on tenk1
+ Index Cond: ((unique1 IS NOT NULL) AND (unique1 > 42))
+(5 rows)
+
select max(unique1) from tenk1 where unique1 > 42;
max
------
9999
(1 row)
+explain (costs off)
+ select max(unique1) from tenk1 where unique1 > 42000;
+ QUERY PLAN
+---------------------------------------------------------------------------
+ Result
+ InitPlan 1 (returns $0)
+ -> Limit
+ -> Index Scan Backward using tenk1_unique1 on tenk1
+ Index Cond: ((unique1 IS NOT NULL) AND (unique1 > 42000))
+(5 rows)
+
select max(unique1) from tenk1 where unique1 > 42000;
max
-----
(1 row)
-- multi-column index (uses tenk1_thous_tenthous)
+explain (costs off)
+ select max(tenthous) from tenk1 where thousand = 33;
+ QUERY PLAN
+--------------------------------------------------------------------------
+ Result
+ InitPlan 1 (returns $0)
+ -> Limit
+ -> Index Scan Backward using tenk1_thous_tenthous on tenk1
+ Index Cond: ((thousand = 33) AND (tenthous IS NOT NULL))
+(5 rows)
+
select max(tenthous) from tenk1 where thousand = 33;
max
------
9033
(1 row)
+explain (costs off)
+ select min(tenthous) from tenk1 where thousand = 33;
+ QUERY PLAN
+--------------------------------------------------------------------------
+ Result
+ InitPlan 1 (returns $0)
+ -> Limit
+ -> Index Scan using tenk1_thous_tenthous on tenk1
+ Index Cond: ((thousand = 33) AND (tenthous IS NOT NULL))
+(5 rows)
+
select min(tenthous) from tenk1 where thousand = 33;
min
-----
(1 row)
-- check parameter propagation into an indexscan subquery
+explain (costs off)
+ select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt
+ from int4_tbl;
+ QUERY PLAN
+-----------------------------------------------------------------------------------------
+ Seq Scan on int4_tbl
+ SubPlan 2
+ -> Result
+ InitPlan 1 (returns $1)
+ -> Limit
+ -> Index Scan using tenk1_unique1 on tenk1
+ Index Cond: ((unique1 IS NOT NULL) AND (unique1 > int4_tbl.f1))
+(7 rows)
+
select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt
-from int4_tbl;
+ from int4_tbl;
f1 | gt
-------------+----
0 | 1
(5 rows)
-- check some cases that were handled incorrectly in 8.3.0
+explain (costs off)
+ select distinct max(unique2) from tenk1;
+ QUERY PLAN
+----------------------------------------------------------------
+ HashAggregate
+ InitPlan 1 (returns $0)
+ -> Limit
+ -> Index Scan Backward using tenk1_unique2 on tenk1
+ Index Cond: (unique2 IS NOT NULL)
+ -> Result
+(6 rows)
+
select distinct max(unique2) from tenk1;
max
------
9999
(1 row)
+explain (costs off)
+ select max(unique2) from tenk1 order by 1;
+ QUERY PLAN
+----------------------------------------------------------------
+ Sort
+ Sort Key: ($0)
+ InitPlan 1 (returns $0)
+ -> Limit
+ -> Index Scan Backward using tenk1_unique2 on tenk1
+ Index Cond: (unique2 IS NOT NULL)
+ -> Result
+(7 rows)
+
select max(unique2) from tenk1 order by 1;
max
------
9999
(1 row)
+explain (costs off)
+ select max(unique2) from tenk1 order by max(unique2);
+ QUERY PLAN
+----------------------------------------------------------------
+ Sort
+ Sort Key: ($0)
+ InitPlan 1 (returns $0)
+ -> Limit
+ -> Index Scan Backward using tenk1_unique2 on tenk1
+ Index Cond: (unique2 IS NOT NULL)
+ -> Result
+(7 rows)
+
select max(unique2) from tenk1 order by max(unique2);
max
------
9999
(1 row)
+explain (costs off)
+ select max(unique2) from tenk1 order by max(unique2)+1;
+ QUERY PLAN
+----------------------------------------------------------------
+ Sort
+ Sort Key: (($0 + 1))
+ InitPlan 1 (returns $0)
+ -> Limit
+ -> Index Scan Backward using tenk1_unique2 on tenk1
+ Index Cond: (unique2 IS NOT NULL)
+ -> Result
+(7 rows)
+
select max(unique2) from tenk1 order by max(unique2)+1;
max
------
9999
(1 row)
+explain (costs off)
+ select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
+ QUERY PLAN
+----------------------------------------------------------------
+ Sort
+ Sort Key: (generate_series(1, 3))
+ InitPlan 1 (returns $0)
+ -> Limit
+ -> Index Scan Backward using tenk1_unique2 on tenk1
+ Index Cond: (unique2 IS NOT NULL)
+ -> Result
+(7 rows)
+
select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
max | g
------+---
9999 | 1
(3 rows)
+-- this is an interesting special case as of 9.1
+explain (costs off)
+ select min(unique2) from tenk1 where unique2 = 42;
+ QUERY PLAN
+-----------------------------------------------
+ Aggregate
+ -> Index Scan using tenk1_unique2 on tenk1
+ Index Cond: (unique2 = 42)
+(3 rows)
+
+select min(unique2) from tenk1 where unique2 = 42;
+ min
+-----
+ 42
+(1 row)
+
+-- try it on an inheritance tree
+create table minmaxtest(f1 int);
+create table minmaxtest1() inherits (minmaxtest);
+create table minmaxtest2() inherits (minmaxtest);
+create index minmaxtesti on minmaxtest(f1);
+create index minmaxtest1i on minmaxtest1(f1);
+create index minmaxtest2i on minmaxtest2(f1 desc);
+insert into minmaxtest values(11), (12);
+insert into minmaxtest1 values(13), (14);
+insert into minmaxtest2 values(15), (16);
+explain (costs off)
+ select min(f1), max(f1) from minmaxtest;
+ QUERY PLAN
+--------------------------------------------------------------------------------------
+ Result
+ InitPlan 1 (returns $0)
+ -> Limit
+ -> Merge Append
+ Sort Key: public.minmaxtest.f1
+ -> Index Scan using minmaxtesti on minmaxtest
+ Index Cond: (f1 IS NOT NULL)
+ -> Index Scan using minmaxtest1i on minmaxtest1 minmaxtest
+ Index Cond: (f1 IS NOT NULL)
+ -> Index Scan Backward using minmaxtest2i on minmaxtest2 minmaxtest
+ Index Cond: (f1 IS NOT NULL)
+ InitPlan 2 (returns $1)
+ -> Limit
+ -> Merge Append
+ Sort Key: public.minmaxtest.f1
+ -> Index Scan Backward using minmaxtesti on minmaxtest
+ Index Cond: (f1 IS NOT NULL)
+ -> Index Scan Backward using minmaxtest1i on minmaxtest1 minmaxtest
+ Index Cond: (f1 IS NOT NULL)
+ -> Index Scan using minmaxtest2i on minmaxtest2 minmaxtest
+ Index Cond: (f1 IS NOT NULL)
+(21 rows)
+
+select min(f1), max(f1) from minmaxtest;
+ min | max
+-----+-----
+ 11 | 16
+(1 row)
+
+drop table minmaxtest cascade;
+NOTICE: drop cascades to 2 other objects
+DETAIL: drop cascades to table minmaxtest1
+drop cascades to table minmaxtest2
--
-- Test combinations of DISTINCT and/or ORDER BY
--