diff options
| author | Tom Lane | 2008-08-02 21:32:01 +0000 |
|---|---|---|
| committer | Tom Lane | 2008-08-02 21:32:01 +0000 |
| commit | 951130475221562b44b0da575fac8470adb5b555 (patch) | |
| tree | 5dc2ac24a5381a2ff1c2f25e1d14430b66558c02 /src/include | |
| parent | 49f001d81ed635747c4439f38ae1fc6090887452 (diff) | |
Rearrange the querytree representation of ORDER BY/GROUP BY/DISTINCT items
as per my recent proposal:
1. Fold SortClause and GroupClause into a single node type SortGroupClause.
We were already relying on them to be struct-equivalent, so using two node
tags wasn't accomplishing much except to get in the way of comparing items
with equal().
2. Add an "eqop" field to SortGroupClause to carry the associated equality
operator. This is cheap for the parser to get at the same time it's looking
up the sort operator, and storing it eliminates the need for repeated
not-so-cheap lookups during planning. In future this will also let us
represent GROUP/DISTINCT operations on datatypes that have hash opclasses
but no btree opclasses (ie, they have equality but no natural sort order).
The previous representation simply didn't work for that, since its only
indicator of comparison semantics was a sort operator.
3. Add a hasDistinctOn boolean to struct Query to explicitly record whether
the distinctClause came from DISTINCT or DISTINCT ON. This allows removing
some complicated and not 100% bulletproof code that attempted to figure
that out from the distinctClause alone.
This patch doesn't in itself create any new capability, but it's necessary
infrastructure for future attempts to use hash-based grouping for DISTINCT
and UNION/INTERSECT/EXCEPT.
Diffstat (limited to 'src/include')
| -rw-r--r-- | src/include/catalog/catversion.h | 4 | ||||
| -rw-r--r-- | src/include/nodes/nodes.h | 5 | ||||
| -rw-r--r-- | src/include/nodes/parsenodes.h | 88 | ||||
| -rw-r--r-- | src/include/nodes/primnodes.h | 18 | ||||
| -rw-r--r-- | src/include/optimizer/clauses.h | 5 | ||||
| -rw-r--r-- | src/include/optimizer/tlist.h | 8 | ||||
| -rw-r--r-- | src/include/parser/parse_clause.h | 13 | ||||
| -rw-r--r-- | src/include/parser/parse_oper.h | 13 | ||||
| -rw-r--r-- | src/include/utils/lsyscache.h | 6 |
9 files changed, 83 insertions, 77 deletions
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 264183af56a..49d2a32431d 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.473 2008/07/30 19:35:13 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.474 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200807301 +#define CATALOG_VERSION_NO 200808011 #endif diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 79d679b5be4..e35356ab55f 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.206 2008/03/20 21:42:48 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.207 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -336,8 +336,7 @@ typedef enum NodeTag T_Constraint, T_DefElem, T_RangeTblEntry, - T_SortClause, - T_GroupClause, + T_SortGroupClause, T_FkConstraint, T_PrivGrantee, T_FuncWithArgs, diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index fa78b4b188b..f1a1e828fc4 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.369 2008/07/31 22:47:56 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.370 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -108,6 +108,7 @@ typedef struct Query bool hasAggs; /* has aggregates in tlist or havingQual */ bool hasSubLinks; /* has subquery SubLink */ + bool hasDistinctOn; /* distinctClause is from DISTINCT ON */ List *rtable; /* list of range table entries */ FromExpr *jointree; /* table join tree (FROM and WHERE clauses) */ @@ -116,13 +117,13 @@ typedef struct Query List *returningList; /* return-values list (of TargetEntry) */ - List *groupClause; /* a list of GroupClause's */ + List *groupClause; /* a list of SortGroupClause's */ Node *havingQual; /* qualifications applied to groups */ - List *distinctClause; /* a list of SortClause's */ + List *distinctClause; /* a list of SortGroupClause's */ - List *sortClause; /* a list of SortClause's */ + List *sortClause; /* a list of SortGroupClause's */ Node *limitOffset; /* # of result tuples to skip (int8 expr) */ Node *limitCount; /* # of result tuples to return (int8 expr) */ @@ -607,47 +608,58 @@ typedef struct RangeTblEntry } RangeTblEntry; /* - * SortClause - - * representation of ORDER BY clauses + * SortGroupClause - + * representation of ORDER BY, GROUP BY, DISTINCT, DISTINCT ON items + * + * You might think that ORDER BY is only interested in defining ordering, + * and GROUP/DISTINCT are only interested in defining equality. However, + * one way to implement grouping is to sort and then apply a "uniq"-like + * filter. So it's also interesting to keep track of possible sort operators + * for GROUP/DISTINCT, and in particular to try to sort for the grouping + * in a way that will also yield a requested ORDER BY ordering. So we need + * to be able to compare ORDER BY and GROUP/DISTINCT lists, which motivates + * the decision to give them the same representation. * * tleSortGroupRef must match ressortgroupref of exactly one entry of the - * associated targetlist; that is the expression to be sorted (or grouped) by. - * sortop is the OID of the ordering operator (a "<" or ">" operator). - * nulls_first means about what you'd expect. + * query's targetlist; that is the expression to be sorted or grouped by. + * eqop is the OID of the equality operator. + * sortop is the OID of the ordering operator (a "<" or ">" operator), + * or InvalidOid if not available. + * nulls_first means about what you'd expect. If sortop is InvalidOid + * then nulls_first is meaningless and should be set to false. + * + * In an ORDER BY item, all fields must be valid. (The eqop isn't essential + * here, but it's cheap to get it along with the sortop, and requiring it + * to be valid eases comparisons to grouping items.) + * + * In a grouping item, eqop must be valid. If the eqop is a btree equality + * operator, then sortop should be set to a compatible ordering operator. + * We prefer to set eqop/sortop/nulls_first to match any ORDER BY item that + * the query presents for the same tlist item. If there is none, we just + * use the default ordering op for the datatype. * - * SortClauses are also used to identify targets that we will do a "Unique" - * filter step on (for SELECT DISTINCT and SELECT DISTINCT ON). The - * distinctClause list is a list of SortClauses for the expressions to be - * unique-ified. (As per comment for GroupClause, this overspecifies the - * semantics.) In SELECT DISTINCT, the distinctClause list is typically - * longer than the ORDER BY list, while in SELECT DISTINCT ON it's typically - * shorter. The two lists must match up to the end of the shorter one --- - * the parser rearranges the distinctClause if necessary to make this true. - * (This restriction ensures that only one sort step is needed to both - * satisfy the ORDER BY and set up for the Unique step. This is semantically - * necessary for DISTINCT ON, and offers no real drawback for DISTINCT.) + * If the tlist item's type has a hash opclass but no btree opclass, then + * we will set eqop to the hash equality operator, sortop to InvalidOid, + * and nulls_first to false. A grouping item of this kind can only be + * implemented by hashing, and of course it'll never match an ORDER BY item. + * + * A query might have both ORDER BY and DISTINCT (or DISTINCT ON) clauses. + * In SELECT DISTINCT, the distinctClause list is as long or longer than the + * sortClause list, while in SELECT DISTINCT ON it's typically shorter. + * The two lists must match up to the end of the shorter one --- the parser + * rearranges the distinctClause if necessary to make this true. (This + * restriction ensures that only one sort step is needed to both satisfy the + * ORDER BY and set up for the Unique step. This is semantically necessary + * for DISTINCT ON, and presents no real drawback for DISTINCT.) */ -typedef struct SortClause +typedef struct SortGroupClause { NodeTag type; Index tleSortGroupRef; /* reference into targetlist */ - Oid sortop; /* the ordering operator ('<' op) */ - bool nulls_first; /* do NULLs come before normal values? */ -} SortClause; - -/* - * GroupClause - - * representation of GROUP BY clauses - * - * GroupClause is exactly like SortClause except for the nodetag value. - * We have routines that operate interchangeably on both. - * - * XXX SortClause overspecifies the semantics so far as GROUP BY is concerned - * (ditto for DISTINCT). It'd be better to specify an equality operator not - * an ordering operator. However, the two implementations are tightly entwined - * at the moment ... breaking them apart is work for another day. - */ -typedef SortClause GroupClause; + Oid eqop; /* the equality operator ('=' op) */ + Oid sortop; /* the ordering operator ('<' op), or 0 */ + bool nulls_first; /* do NULLs come before normal values? */ +} SortGroupClause; /* * RowMarkClause - diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 59d7af50443..f0c55112cbc 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -10,7 +10,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/primnodes.h,v 1.137 2008/01/01 19:45:58 momjian Exp $ + * $PostgreSQL: pgsql/src/include/nodes/primnodes.h,v 1.138 2008/08/02 21:32:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -969,14 +969,14 @@ typedef struct CurrentOfExpr * a specific system-generated name (such as "ctid") or NULL; anything else * risks confusing ExecGetJunkAttribute! * - * ressortgroupref is used in the representation of ORDER BY and - * GROUP BY items. Targetlist entries with ressortgroupref=0 are not - * sort/group items. If ressortgroupref>0, then this item is an ORDER BY or - * GROUP BY value. No two entries in a targetlist may have the same nonzero - * ressortgroupref --- but there is no particular meaning to the nonzero - * values, except as tags. (For example, one must not assume that lower - * ressortgroupref means a more significant sort key.) The order of the - * associated SortClause or GroupClause lists determine the semantics. + * ressortgroupref is used in the representation of ORDER BY, GROUP BY, and + * DISTINCT items. Targetlist entries with ressortgroupref=0 are not + * sort/group items. If ressortgroupref>0, then this item is an ORDER BY, + * GROUP BY, and/or DISTINCT target value. No two entries in a targetlist + * may have the same nonzero ressortgroupref --- but there is no particular + * meaning to the nonzero values, except as tags. (For example, one must + * not assume that lower ressortgroupref means a more significant sort key.) + * The order of the associated SortGroupClause lists determine the semantics. * * resorigtbl/resorigcol identify the source of the column, if it is a * simple reference to a column of a base table (or view). If it is not diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index dce10d65066..69a4f4c774c 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.90 2008/04/01 00:48:33 tgl Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.91 2008/08/02 21:32:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -63,9 +63,6 @@ extern Relids find_nonnullable_rels(Node *clause); extern bool is_pseudo_constant_clause(Node *clause); extern bool is_pseudo_constant_clause_relids(Node *clause, Relids relids); -extern bool has_distinct_clause(Query *query); -extern bool has_distinct_on_clause(Query *query); - extern int NumRelids(Node *clause); extern void CommuteOpExpr(OpExpr *clause); diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h index e953d665209..8a8966213b1 100644 --- a/src/include/optimizer/tlist.h +++ b/src/include/optimizer/tlist.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/tlist.h,v 1.49 2008/01/01 19:45:58 momjian Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/tlist.h,v 1.50 2008/08/02 21:32:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -25,11 +25,11 @@ extern List *add_to_flat_tlist(List *tlist, List *vars); extern TargetEntry *get_sortgroupref_tle(Index sortref, List *targetList); -extern TargetEntry *get_sortgroupclause_tle(SortClause *sortClause, +extern TargetEntry *get_sortgroupclause_tle(SortGroupClause *sgClause, List *targetList); -extern Node *get_sortgroupclause_expr(SortClause *sortClause, +extern Node *get_sortgroupclause_expr(SortGroupClause *sgClause, List *targetList); -extern List *get_sortgrouplist_exprs(List *sortClauses, +extern List *get_sortgrouplist_exprs(List *sgClauses, List *targetList); extern bool tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK); diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h index 6fc357522e7..357a2947cb5 100644 --- a/src/include/parser/parse_clause.h +++ b/src/include/parser/parse_clause.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.50 2008/07/31 22:47:56 tgl Exp $ + * $PostgreSQL: pgsql/src/include/parser/parse_clause.h,v 1.51 2008/08/02 21:32:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -30,13 +30,14 @@ extern List *transformGroupClause(ParseState *pstate, List *grouplist, List **targetlist, List *sortClause); extern List *transformSortClause(ParseState *pstate, List *orderlist, List **targetlist, bool resolveUnknown); -extern List *transformDistinctClause(ParseState *pstate, List *distinctlist, +extern List *transformDistinctClause(ParseState *pstate, + List **targetlist, List *sortClause); +extern List *transformDistinctOnClause(ParseState *pstate, List *distinctlist, List **targetlist, List *sortClause); -extern List *addTargetToSortList(ParseState *pstate, TargetEntry *tle, - List *sortlist, List *targetlist, - SortByDir sortby_dir, SortByNulls sortby_nulls, - List *sortby_opname, bool resolveUnknown); +extern List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle, + List *grouplist, List *targetlist, + bool requireSortOp, bool resolveUnknown); extern Index assignSortGroupRef(TargetEntry *tle, List *tlist); extern bool targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList); diff --git a/src/include/parser/parse_oper.h b/src/include/parser/parse_oper.h index f0ec2d1f088..ad0d65b3be2 100644 --- a/src/include/parser/parse_oper.h +++ b/src/include/parser/parse_oper.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/parser/parse_oper.h,v 1.42 2008/01/01 19:45:58 momjian Exp $ + * $PostgreSQL: pgsql/src/include/parser/parse_oper.h,v 1.43 2008/08/02 21:32:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -45,16 +45,13 @@ extern Operator compatible_oper(ParseState *pstate, List *op, /* currently no need for compatible_left_oper/compatible_right_oper */ -/* Routines for identifying "=", "<", ">" operators for a type */ -extern Operator equality_oper(Oid argtype, bool noError); -extern Operator ordering_oper(Oid argtype, bool noError); -extern Operator reverse_ordering_oper(Oid argtype, bool noError); +/* Routines for identifying "<", "=", ">" operators for a type */ +extern void get_sort_group_operators(Oid argtype, + bool needLT, bool needEQ, bool needGT, + Oid *ltOpr, Oid *eqOpr, Oid *gtOpr); /* Convenience routines for common calls on the above */ extern Oid compatible_oper_opid(List *op, Oid arg1, Oid arg2, bool noError); -extern Oid equality_oper_funcid(Oid argtype); -extern Oid ordering_oper_opid(Oid argtype); -extern Oid reverse_ordering_oper_opid(Oid argtype); /* Extract operator OID or underlying-function OID from an Operator tuple */ extern Oid oprid(Operator op); diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index 66e035cffae..0538e0ff569 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.124 2008/07/30 17:05:05 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/lsyscache.h,v 1.125 2008/08/02 21:32:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -38,7 +38,7 @@ extern bool get_ordering_op_properties(Oid opno, Oid *opfamily, Oid *opcintype, int16 *strategy); extern bool get_compare_function_for_ordering_op(Oid opno, Oid *cmpfunc, bool *reverse); -extern Oid get_equality_op_for_ordering_op(Oid opno); +extern Oid get_equality_op_for_ordering_op(Oid opno, bool *reverse); extern Oid get_ordering_op_for_equality_op(Oid opno, bool use_lhs_type); extern List *get_mergejoin_opfamilies(Oid opno); extern bool get_compatible_hash_operators(Oid opno, @@ -47,7 +47,7 @@ extern bool get_op_hash_functions(Oid opno, RegProcedure *lhs_procno, RegProcedure *rhs_procno); extern void get_op_btree_interpretation(Oid opno, List **opfamilies, List **opstrats); -extern bool ops_in_same_btree_opfamily(Oid opno1, Oid opno2); +extern bool equality_ops_are_compatible(Oid opno1, Oid opno2); extern Oid get_opfamily_proc(Oid opfamily, Oid lefttype, Oid righttype, int16 procnum); extern char *get_attname(Oid relid, AttrNumber attnum); |
