*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.122 2005/06/05 22:32:56 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.123 2005/07/15 17:09:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "utils/syscache.h"
-static bool is_distinct_query(Query *query);
+static List *translate_sub_tlist(List *tlist, int relid);
+static bool query_is_distinct_for(Query *query, List *colnos);
static bool hash_safe_tlist(List *tlist);
pathnode->subpath = subpath;
/*
- * If the input is a subquery whose output must be unique already, we
- * don't need to do anything.
+ * Try to identify the targetlist that will actually be unique-ified.
+ * In current usage, this routine is only used for sub-selects of IN
+ * clauses, so we should be able to find the tlist in in_info_list.
+ */
+ sub_targetlist = NIL;
+ foreach(l, root->in_info_list)
+ {
+ InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
+
+ if (bms_equal(ininfo->righthand, rel->relids))
+ {
+ sub_targetlist = ininfo->sub_targetlist;
+ break;
+ }
+ }
+
+ /*
+ * If the input is a subquery whose output must be unique already,
+ * then we don't need to do anything. The test for uniqueness has
+ * to consider exactly which columns we are extracting; for example
+ * "SELECT DISTINCT x,y" doesn't guarantee that x alone is distinct.
+ * So we cannot check for this optimization unless we found our own
+ * targetlist above, and it consists only of simple Vars referencing
+ * subquery outputs. (Possibly we could do something with expressions
+ * in the subquery outputs, too, but for now keep it simple.)
*/
- if (rel->rtekind == RTE_SUBQUERY)
+ if (sub_targetlist && rel->rtekind == RTE_SUBQUERY)
{
RangeTblEntry *rte = rt_fetch(rel->relid, root->parse->rtable);
+ List *sub_tlist_colnos;
+
+ sub_tlist_colnos = translate_sub_tlist(sub_targetlist, rel->relid);
- if (is_distinct_query(rte->subquery))
+ if (sub_tlist_colnos &&
+ query_is_distinct_for(rte->subquery, sub_tlist_colnos))
{
pathnode->umethod = UNIQUE_PATH_NOOP;
pathnode->rows = rel->rows;
}
}
- /*
- * Try to identify the targetlist that will actually be unique-ified.
- * In current usage, this routine is only used for sub-selects of IN
- * clauses, so we should be able to find the tlist in in_info_list.
- */
- sub_targetlist = NIL;
- foreach(l, root->in_info_list)
- {
- InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
-
- if (bms_equal(ininfo->righthand, rel->relids))
- {
- sub_targetlist = ininfo->sub_targetlist;
- break;
- }
- }
-
/*
* If we know the targetlist, try to estimate number of result rows;
* otherwise punt.
}
/*
- * is_distinct_query - does query never return duplicate rows?
+ * translate_sub_tlist - get subquery column numbers represented by tlist
+ *
+ * The given targetlist should contain only Vars referencing the given relid.
+ * Extract their varattnos (ie, the column numbers of the subquery) and return
+ * as an integer List.
+ *
+ * If any of the tlist items is not a simple Var, we cannot determine whether
+ * the subquery's uniqueness condition (if any) matches ours, so punt and
+ * return NIL.
*/
-static bool
-is_distinct_query(Query *query)
+static List *
+translate_sub_tlist(List *tlist, int relid)
{
- /* DISTINCT (but not DISTINCT ON) guarantees uniqueness */
- if (has_distinct_clause(query))
- return true;
+ List *result = NIL;
+ ListCell *l;
- /* UNION, INTERSECT, EXCEPT guarantee uniqueness, except with ALL */
- if (query->setOperations)
+ foreach(l, tlist)
{
- SetOperationStmt *topop = (SetOperationStmt *) query->setOperations;
+ Var *var = (Var *) lfirst(l);
- Assert(IsA(topop, SetOperationStmt));
- Assert(topop->op != SETOP_NONE);
+ if (!var || !IsA(var, Var) ||
+ var->varno != relid)
+ return NIL; /* punt */
- if (!topop->all)
+ result = lappend_int(result, var->varattno);
+ }
+ return result;
+}
+
+/*
+ * query_is_distinct_for - does query never return duplicates of the
+ * specified columns?
+ *
+ * colnos is an integer list of output column numbers (resno's). We are
+ * interested in whether rows consisting of just these columns are certain
+ * to be distinct.
+ */
+static bool
+query_is_distinct_for(Query *query, List *colnos)
+{
+ ListCell *l;
+
+ /*
+ * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
+ * columns in the DISTINCT clause appear in colnos.
+ */
+ if (query->distinctClause)
+ {
+ foreach(l, query->distinctClause)
+ {
+ SortClause *scl = (SortClause *) lfirst(l);
+ TargetEntry *tle = get_sortgroupclause_tle(scl,
+ query->targetList);
+
+ if (!list_member_int(colnos, tle->resno))
+ break; /* exit early if no match */
+ }
+ if (l == NULL) /* had matches for all? */
return true;
}
/*
- * GROUP BY guarantees uniqueness if all the grouped columns appear in
- * the output. In our implementation this means checking they are non
- * resjunk columns.
+ * Similarly, GROUP BY guarantees uniqueness if all the grouped columns
+ * appear in colnos.
*/
if (query->groupClause)
{
- ListCell *gl;
-
- foreach(gl, query->groupClause)
+ foreach(l, query->groupClause)
{
- GroupClause *grpcl = (GroupClause *) lfirst(gl);
+ GroupClause *grpcl = (GroupClause *) lfirst(l);
TargetEntry *tle = get_sortgroupclause_tle(grpcl,
query->targetList);
- if (tle->resjunk)
- break;
+ if (!list_member_int(colnos, tle->resno))
+ break; /* exit early if no match */
}
- if (!gl) /* got to the end? */
+ if (l == NULL) /* had matches for all? */
return true;
}
else
return true;
}
+ /*
+ * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
+ * except with ALL
+ */
+ if (query->setOperations)
+ {
+ SetOperationStmt *topop = (SetOperationStmt *) query->setOperations;
+
+ Assert(IsA(topop, SetOperationStmt));
+ Assert(topop->op != SETOP_NONE);
+
+ if (!topop->all)
+ {
+ /* We're good if all the nonjunk output columns are in colnos */
+ foreach(l, query->targetList)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(l);
+
+ if (!tle->resjunk &&
+ !list_member_int(colnos, tle->resno))
+ break; /* exit early if no match */
+ }
+ if (l == NULL) /* had matches for all? */
+ return true;
+ }
+ }
+
/*
* XXX Are there any other cases in which we can easily see the result
* must be distinct?
10
(1 row)
+--
+-- Test cases to check for overenthusiastic optimization of
+-- "IN (SELECT DISTINCT ...)" and related cases. Per example from
+-- Luca Pireddu and Michael Fuhr.
+--
+CREATE TEMP TABLE foo (id integer);
+CREATE TEMP TABLE bar (id1 integer, id2 integer);
+INSERT INTO foo VALUES (1);
+INSERT INTO bar VALUES (1, 1);
+INSERT INTO bar VALUES (2, 2);
+INSERT INTO bar VALUES (3, 1);
+-- These cases require an extra level of distinct-ing above subquery s
+SELECT * FROM foo WHERE id IN
+ (SELECT id2 FROM (SELECT DISTINCT id1, id2 FROM bar) AS s);
+ id
+----
+ 1
+(1 row)
+
+SELECT * FROM foo WHERE id IN
+ (SELECT id2 FROM (SELECT id1,id2 FROM bar GROUP BY id1,id2) AS s);
+ id
+----
+ 1
+(1 row)
+
+SELECT * FROM foo WHERE id IN
+ (SELECT id2 FROM (SELECT id1, id2 FROM bar UNION
+ SELECT id1, id2 FROM bar) AS s);
+ id
+----
+ 1
+(1 row)
+
+-- These cases do not
+SELECT * FROM foo WHERE id IN
+ (SELECT id2 FROM (SELECT DISTINCT ON (id2) id1, id2 FROM bar) AS s);
+ id
+----
+ 1
+(1 row)
+
+SELECT * FROM foo WHERE id IN
+ (SELECT id2 FROM (SELECT id2 FROM bar GROUP BY id2) AS s);
+ id
+----
+ 1
+(1 row)
+
+SELECT * FROM foo WHERE id IN
+ (SELECT id2 FROM (SELECT id2 FROM bar UNION
+ SELECT id2 FROM bar) AS s);
+ id
+----
+ 1
+(1 row)
+
--
-- Test case to catch problems with multiply nested sub-SELECTs not getting
-- recalculated properly. Per bug report from Didier Moens.
(select ten from tenk1 a
where unique1 IN (select distinct hundred from tenk1 b)) ss;
+--
+-- Test cases to check for overenthusiastic optimization of
+-- "IN (SELECT DISTINCT ...)" and related cases. Per example from
+-- Luca Pireddu and Michael Fuhr.
+--
+
+CREATE TEMP TABLE foo (id integer);
+CREATE TEMP TABLE bar (id1 integer, id2 integer);
+
+INSERT INTO foo VALUES (1);
+
+INSERT INTO bar VALUES (1, 1);
+INSERT INTO bar VALUES (2, 2);
+INSERT INTO bar VALUES (3, 1);
+
+-- These cases require an extra level of distinct-ing above subquery s
+SELECT * FROM foo WHERE id IN
+ (SELECT id2 FROM (SELECT DISTINCT id1, id2 FROM bar) AS s);
+SELECT * FROM foo WHERE id IN
+ (SELECT id2 FROM (SELECT id1,id2 FROM bar GROUP BY id1,id2) AS s);
+SELECT * FROM foo WHERE id IN
+ (SELECT id2 FROM (SELECT id1, id2 FROM bar UNION
+ SELECT id1, id2 FROM bar) AS s);
+
+-- These cases do not
+SELECT * FROM foo WHERE id IN
+ (SELECT id2 FROM (SELECT DISTINCT ON (id2) id1, id2 FROM bar) AS s);
+SELECT * FROM foo WHERE id IN
+ (SELECT id2 FROM (SELECT id2 FROM bar GROUP BY id2) AS s);
+SELECT * FROM foo WHERE id IN
+ (SELECT id2 FROM (SELECT id2 FROM bar UNION
+ SELECT id2 FROM bar) AS s);
+
--
-- Test case to catch problems with multiply nested sub-SELECTs not getting
-- recalculated properly. Per bug report from Didier Moens.