15 files changed, 938 insertions, 226 deletions
diff --git a/src/backend/parser/Makefile b/src/backend/parser/Makefile
index ee92a8ca463..665250b9e76 100644
--- a/src/backend/parser/Makefile
+++ b/src/backend/parser/Makefile
@@ -13,9 +13,9 @@ include $(top_builddir)/src/Makefile.global
 override CPPFLAGS := -I. -I$(srcdir) $(CPPFLAGS)
 
 OBJS= analyze.o gram.o keywords.o kwlookup.o parser.o \
-      parse_agg.o parse_clause.o parse_coerce.o parse_cte.o parse_expr.o \
-      parse_func.o parse_node.o parse_oper.o parse_param.o parse_relation.o \
-      parse_target.o parse_type.o parse_utilcmd.o scansup.o
+      parse_agg.o parse_clause.o parse_coerce.o parse_collate.o parse_cte.o \
+      parse_expr.o parse_func.o parse_node.o parse_oper.o parse_param.o \
+      parse_relation.o parse_target.o parse_type.o parse_utilcmd.o scansup.o
 
 FLEXFLAGS = -CF
 
diff --git a/src/backend/parser/README b/src/backend/parser/README
index 59cc32fef3a..08625e427d2 100644
--- a/src/backend/parser/README
+++ b/src/backend/parser/README
@@ -17,6 +17,7 @@ analyze.c	top level of parse analysis for optimizable queries
 parse_agg.c	handle aggregates, like SUM(col1),  AVG(col2), ...
 parse_clause.c	handle clauses like WHERE, ORDER BY, GROUP BY, ...
 parse_coerce.c	handle coercing expressions to different data types
+parse_collate.c	assign collation information in completed expressions
 parse_cte.c	handle Common Table Expressions (WITH clauses)
 parse_expr.c	handle expressions like col, col + 3, x = 3 or x = 4
 parse_func.c	handle functions, table.column and column identifiers
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 85f231da9c5..315f067b17a 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -33,6 +33,7 @@
 #include "parser/parse_agg.h"
 #include "parser/parse_clause.h"
 #include "parser/parse_coerce.h"
+#include "parser/parse_collate.h"
 #include "parser/parse_cte.h"
 #include "parser/parse_oper.h"
 #include "parser/parse_param.h"
@@ -323,6 +324,8 @@ transformDeleteStmt(ParseState *pstate, DeleteStmt *stmt)
 	if (pstate->p_hasWindowFuncs)
 		parseCheckWindowFuncs(pstate, qry);
 
+	assign_query_collations(pstate, qry);
+
 	return qry;
 }
 
@@ -566,6 +569,14 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
 										 stmt->cols,
 										 icolumns, attrnos);
 
+			/*
+			 * We must assign collations now because assign_query_collations
+			 * doesn't process rangetable entries.  We just assign all the
+			 * collations independently in each row, and don't worry about
+			 * whether they are consistent vertically either.
+			 */
+			assign_list_collations(pstate, sublist);
+
 			exprsLists = lappend(exprsLists, sublist);
 		}
 
@@ -705,6 +716,8 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
 				 parser_errposition(pstate,
 									locate_windowfunc((Node *) qry))));
 
+	assign_query_collations(pstate, qry);
+
 	return qry;
 }
 
@@ -960,6 +973,8 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
 							   (LockingClause *) lfirst(l), false);
 	}
 
+	assign_query_collations(pstate, qry);
+
 	return qry;
 }
 
@@ -1082,6 +1097,14 @@ transformValuesClause(ParseState *pstate, SelectStmt *stmt)
 			i++;
 		}
 
+		/*
+		 * We must assign collations now because assign_query_collations
+		 * doesn't process rangetable entries.  We just assign all the
+		 * collations independently in each row, and don't worry about
+		 * whether they are consistent vertically either.
+		 */
+		assign_list_collations(pstate, newsublist);
+
 		newExprsLists = lappend(newExprsLists, newsublist);
 	}
 
@@ -1176,6 +1199,8 @@ transformValuesClause(ParseState *pstate, SelectStmt *stmt)
 				 parser_errposition(pstate,
 								locate_windowfunc((Node *) newExprsLists))));
 
+	assign_query_collations(pstate, qry);
+
 	return qry;
 }
 
@@ -1417,6 +1442,8 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt)
 							   (LockingClause *) lfirst(l), false);
 	}
 
+	assign_query_collations(pstate, qry);
+
 	return qry;
 }
 
@@ -1634,12 +1661,6 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
 				rescoltypmod = lcoltypmod;
 			else
 				rescoltypmod = -1;
-			/* Select common collation.  A common collation is
-			 * required for all set operators except UNION ALL; see
-			 * SQL:2008-2 7.13 SR 15c. */
-			rescolcoll = select_common_collation(pstate,
-												 list_make2(lcolnode, rcolnode),
-												 (op->op == SETOP_UNION && op->all));
 
 			/*
 			 * Verify the coercions are actually possible.	If not, we'd fail
@@ -1662,26 +1683,46 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
 			 * output type of the child query and the resolved target type.
 			 * Such a discrepancy would disable optimization in the planner.
 			 *
-			 * If it's some other UNKNOWN-type node, eg a Var, we do nothing.
-			 * The planner is sometimes able to fold an UNKNOWN Var to a
-			 * constant before it has to coerce the type, so failing now would
-			 * just break cases that might work.
+			 * If it's some other UNKNOWN-type node, eg a Var, we do nothing
+			 * (knowing that coerce_to_common_type would fail).  The planner
+			 * is sometimes able to fold an UNKNOWN Var to a constant before
+			 * it has to coerce the type, so failing now would just break
+			 * cases that might work.
 			 */
 			if (lcoltype != UNKNOWNOID)
-				(void) coerce_to_common_type(pstate, lcolnode,
-											 rescoltype, context);
-			else if (IsA(lcolnode, Const) ||IsA(lcolnode, Param))
-				ltle->expr = (Expr *)
-					coerce_to_common_type(pstate, lcolnode,
-										  rescoltype, context);
+				lcolnode = coerce_to_common_type(pstate, lcolnode,
+												 rescoltype, context);
+			else if (IsA(lcolnode, Const) ||
+					 IsA(lcolnode, Param))
+			{
+				lcolnode = coerce_to_common_type(pstate, lcolnode,
+												 rescoltype, context);
+				ltle->expr = (Expr *) lcolnode;
+			}
 
 			if (rcoltype != UNKNOWNOID)
-				(void) coerce_to_common_type(pstate, rcolnode,
-											 rescoltype, context);
-			else if (IsA(rcolnode, Const) ||IsA(rcolnode, Param))
-				rtle->expr = (Expr *)
-					coerce_to_common_type(pstate, rcolnode,
-										  rescoltype, context);
+				rcolnode = coerce_to_common_type(pstate, rcolnode,
+												 rescoltype, context);
+			else if (IsA(rcolnode, Const) ||
+					 IsA(rcolnode, Param))
+			{
+				rcolnode = coerce_to_common_type(pstate, rcolnode,
+												 rescoltype, context);
+				rtle->expr = (Expr *) rcolnode;
+			}
+
+			/*
+			 * Select common collation.  A common collation is required for
+			 * all set operators except UNION ALL; see SQL:2008 7.13 <query
+			 * expression> Syntax Rule 15c.  (If we fail to identify a common
+			 * collation for a UNION ALL column, the curCollations element
+			 * will be set to InvalidOid, which may result in a runtime error
+			 * if something at a higher query level wants to use the column's
+			 * collation.)
+			 */
+			rescolcoll = select_common_collation(pstate,
+												 list_make2(lcolnode, rcolnode),
+												 (op->op == SETOP_UNION && op->all));
 
 			/* emit results */
 			op->colTypes = lappend_oid(op->colTypes, rescoltype);
@@ -1734,7 +1775,7 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
 
 				rescolnode->typeId = rescoltype;
 				rescolnode->typeMod = rescoltypmod;
-				rescolnode->collid = rescolcoll;
+				rescolnode->collation = rescolcoll;
 				rescolnode->location = bestlocation;
 				restle = makeTargetEntry((Expr *) rescolnode,
 										 0,			/* no need to set resno */
@@ -1966,6 +2007,8 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt)
 	if (origTargetList != NULL)
 		elog(ERROR, "UPDATE target count mismatch --- internal error");
 
+	assign_query_collations(pstate, qry);
+
 	return qry;
 }
 
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index 8267627c42f..523d6e6989a 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -709,6 +709,7 @@ check_ungrouped_columns_walker(Node *node,
  * agg_input_types, agg_state_type, agg_result_type identify the input,
  * transition, and result types of the aggregate.  These should all be
  * resolved to actual types (ie, none should ever be ANYELEMENT etc).
+ * agg_input_collation is the aggregate function's input collation.
  *
  * transfn_oid and finalfn_oid identify the funcs to be called; the latter
  * may be InvalidOid.
@@ -721,9 +722,9 @@ build_aggregate_fnexprs(Oid *agg_input_types,
 						int agg_num_inputs,
 						Oid agg_state_type,
 						Oid agg_result_type,
+						Oid agg_input_collation,
 						Oid transfn_oid,
 						Oid finalfn_oid,
-						Oid collation,
 						Expr **transfnexpr,
 						Expr **finalfnexpr)
 {
@@ -742,7 +743,7 @@ build_aggregate_fnexprs(Oid *agg_input_types,
 	argp->paramid = -1;
 	argp->paramtype = agg_state_type;
 	argp->paramtypmod = -1;
-	argp->paramcollation = collation;
+	argp->paramcollid = agg_input_collation;
 	argp->location = -1;
 
 	args = list_make1(argp);
@@ -754,7 +755,7 @@ build_aggregate_fnexprs(Oid *agg_input_types,
 		argp->paramid = -1;
 		argp->paramtype = agg_input_types[i];
 		argp->paramtypmod = -1;
-		argp->paramcollation = collation;
+		argp->paramcollid = agg_input_collation;
 		argp->location = -1;
 		args = lappend(args, argp);
 	}
@@ -762,7 +763,8 @@ build_aggregate_fnexprs(Oid *agg_input_types,
 	*transfnexpr = (Expr *) makeFuncExpr(transfn_oid,
 										 agg_state_type,
 										 args,
-										 collation,
+										 InvalidOid,
+										 agg_input_collation,
 										 COERCE_DONTCARE);
 
 	/* see if we have a final function */
@@ -780,13 +782,14 @@ build_aggregate_fnexprs(Oid *agg_input_types,
 	argp->paramid = -1;
 	argp->paramtype = agg_state_type;
 	argp->paramtypmod = -1;
-	argp->paramcollation = collation;
+	argp->paramcollid = agg_input_collation;
 	argp->location = -1;
 	args = list_make1(argp);
 
 	*finalfnexpr = (Expr *) makeFuncExpr(finalfn_oid,
 										 agg_result_type,
 										 args,
-										 collation,
+										 InvalidOid,
+										 agg_input_collation,
 										 COERCE_DONTCARE);
 }
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 4c5a6fe0b01..6c0a78474cd 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -28,6 +28,7 @@
 #include "parser/parsetree.h"
 #include "parser/parse_clause.h"
 #include "parser/parse_coerce.h"
+#include "parser/parse_collate.h"
 #include "parser/parse_expr.h"
 #include "parser/parse_oper.h"
 #include "parser/parse_relation.h"
@@ -558,6 +559,11 @@ transformRangeFunction(ParseState *pstate, RangeFunction *r)
 	funcexpr = transformExpr(pstate, r->funccallnode);
 
 	/*
+	 * We must assign collations now so that we can fill funccolcollations.
+	 */
+	assign_expr_collations(pstate, funcexpr);
+
+	/*
 	 * The function parameters cannot make use of any variables from other
 	 * FROM items.	(Compare to transformRangeSubselect(); the coding is
 	 * different though because we didn't parse as a sub-select with its own
@@ -1072,6 +1078,7 @@ buildMergedJoinVar(ParseState *pstate, JoinType jointype,
 	else if (l_colvar->vartypmod != outcoltypmod)
 		l_node = (Node *) makeRelabelType((Expr *) l_colvar,
 										  outcoltype, outcoltypmod,
+										  InvalidOid,		/* fixed below */
 										  COERCE_IMPLICIT_CAST);
 	else
 		l_node = (Node *) l_colvar;
@@ -1083,6 +1090,7 @@ buildMergedJoinVar(ParseState *pstate, JoinType jointype,
 	else if (r_colvar->vartypmod != outcoltypmod)
 		r_node = (Node *) makeRelabelType((Expr *) r_colvar,
 										  outcoltype, outcoltypmod,
+										  InvalidOid,		/* fixed below */
 										  COERCE_IMPLICIT_CAST);
 	else
 		r_node = (Node *) r_colvar;
@@ -1121,6 +1129,7 @@ buildMergedJoinVar(ParseState *pstate, JoinType jointype,
 				CoalesceExpr *c = makeNode(CoalesceExpr);
 
 				c->coalescetype = outcoltype;
+				/* coalescecollid will get set below */
 				c->args = list_make2(l_node, r_node);
 				c->location = -1;
 				res_node = (Node *) c;
@@ -1132,6 +1141,13 @@ buildMergedJoinVar(ParseState *pstate, JoinType jointype,
 			break;
 	}
 
+	/*
+	 * Apply assign_expr_collations to fix up the collation info in the
+	 * coercion and CoalesceExpr nodes, if we made any.  This must be done
+	 * now so that the join node's alias vars show correct collation info.
+	 */
+	assign_expr_collations(pstate, res_node);
+
 	return res_node;
 }
 
@@ -1936,7 +1952,6 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle,
 					bool resolveUnknown)
 {
 	Oid			restype = exprType((Node *) tle->expr);
-	Oid			rescollation = exprCollation((Node *) tle->expr);
 	Oid			sortop;
 	Oid			eqop;
 	bool		hashable;
@@ -2020,12 +2035,6 @@ addTargetToSortList(ParseState *pstate, TargetEntry *tle,
 			break;
 	}
 
-	if (type_is_collatable(restype) && !OidIsValid(rescollation))
-		ereport(ERROR,
-				(errcode(ERRCODE_INDETERMINATE_COLLATION),
-				 errmsg("no collation was derived for the sort expression"),
-				 errhint("Use the COLLATE clause to set the collation explicitly.")));
-
 	cancel_parser_errposition_callback(&pcbstate);
 
 	/* avoid making duplicate sortlist entries */
diff --git a/src/backend/parser/parse_coerce.c b/src/backend/parser/parse_coerce.c
index 6aff34dd90d..9b59b032976 100644
--- a/src/backend/parser/parse_coerce.c
+++ b/src/backend/parser/parse_coerce.c
@@ -16,7 +16,6 @@
 
 #include "catalog/pg_cast.h"
 #include "catalog/pg_class.h"
-#include "catalog/pg_collation.h"
 #include "catalog/pg_inherits_fn.h"
 #include "catalog/pg_proc.h"
 #include "catalog/pg_type.h"
@@ -123,6 +122,9 @@ coerce_to_target_type(ParseState *pstate, Node *expr, Oid exprtype,
  * pstate is only used in the case that we are able to resolve the type of
  * a previously UNKNOWN Param.	It is okay to pass pstate = NULL if the
  * caller does not want type information updated for Params.
+ *
+ * Note: this function must not modify the given expression tree, only add
+ * decoration on top of it.  See transformSetOperationTree, for example.
  */
 Node *
 coerce_type(ParseState *pstate, Node *node,
@@ -282,16 +284,21 @@ coerce_type(ParseState *pstate, Node *node,
 	if (IsA(node, CollateExpr))
 	{
 		/*
-		 * XXX very ugly kluge to push the coercion underneath the CollateExpr.
-		 * This needs to be rethought, as it almost certainly doesn't cover
-		 * all cases.
+		 * If we have a COLLATE clause, we have to push the coercion
+		 * underneath the COLLATE.  This is really ugly, but there is little
+		 * choice because the above hacks on Consts and Params wouldn't happen
+		 * otherwise.
 		 */
-		CollateExpr *cc = (CollateExpr *) node;
-
-		cc->arg = (Expr *) coerce_type(pstate, (Node *) cc->arg,
-									   inputTypeId, targetTypeId, targetTypeMod,
-									   ccontext, cformat, location);
-		return (Node *) cc;
+		CollateExpr *coll = (CollateExpr *) node;
+		CollateExpr *newcoll = makeNode(CollateExpr);
+
+		newcoll->arg = (Expr *)
+			coerce_type(pstate, (Node *) coll->arg,
+						inputTypeId, targetTypeId, targetTypeMod,
+						ccontext, cformat, location);
+		newcoll->collOid = coll->collOid;
+		newcoll->location = coll->location;
+		return (Node *) newcoll;
 	}
 	pathtype = find_coercion_pathway(targetTypeId, inputTypeId, ccontext,
 									 &funcId);
@@ -352,6 +359,7 @@ coerce_type(ParseState *pstate, Node *node,
 				 */
 				RelabelType *r = makeRelabelType((Expr *) result,
 												 targetTypeId, -1,
+												 InvalidOid,
 												 cformat);
 
 				r->location = location;
@@ -591,6 +599,7 @@ coerce_to_domain(Node *arg, Oid baseTypeId, int32 baseTypeMod, Oid typeId,
 	result->arg = (Expr *) arg;
 	result->resulttype = typeId;
 	result->resulttypmod = -1;	/* currently, always -1 for domains */
+	/* resultcollid will be set by parse_collate.c */
 	result->coercionformat = cformat;
 	result->location = location;
 
@@ -734,7 +743,6 @@ build_coercion_expression(Node *node,
 		FuncExpr   *fexpr;
 		List	   *args;
 		Const	   *cons;
-		Oid			collation;
 
 		Assert(OidIsValid(funcId));
 
@@ -766,9 +774,8 @@ build_coercion_expression(Node *node,
 			args = lappend(args, cons);
 		}
 
-		collation = coercion_expression_result_collation(targetTypeId, node);
-
-		fexpr = makeFuncExpr(funcId, targetTypeId, args, collation, cformat);
+		fexpr = makeFuncExpr(funcId, targetTypeId, args,
+							 InvalidOid, InvalidOid, cformat);
 		fexpr->location = location;
 		return (Node *) fexpr;
 	}
@@ -2100,120 +2107,3 @@ typeIsOfTypedTable(Oid reltypeId, Oid reloftypeId)
 
 	return result;
 }
-
-
-/*
- * select_common_collation() -- determine one collation to apply for
- * an expression node, for evaluating the expression itself or to
- * label the result of the expression node.
- *
- * none_ok means that it is permitted to return "no" collation.  It is
- * then not possible to sort the result value of whatever expression
- * is applying this.  none_ok = true reflects the rules of SQL
- * standard clause "Result of data type combinations", none_ok = false
- * reflects the rules of clause "Collation determination" (in some
- * cases invoked via "Grouping operations").
- */
-Oid
-select_common_collation(ParseState *pstate, List *exprs, bool none_ok)
-{
-	ListCell   *lc;
-
-	/*
-	 * Check if there are any explicit collation derivations.  If so,
-	 * they must all be the same.
-	 */
-	foreach(lc, exprs)
-	{
-		Node	   *pexpr = (Node *) lfirst(lc);
-		Oid			pcoll = exprCollation(pexpr);
-		bool		pexplicit = IsA(pexpr, CollateExpr);
-
-		if (pcoll && pexplicit)
-		{
-			ListCell	*lc2;
-			for_each_cell(lc2, lnext(lc))
-			{
-				Node	   *nexpr = (Node *) lfirst(lc2);
-				Oid			ncoll = exprCollation(nexpr);
-				bool		nexplicit = IsA(nexpr, CollateExpr);
-
-				if (!ncoll || !nexplicit)
-					continue;
-
-				if (ncoll != pcoll)
-					ereport(ERROR,
-							(errcode(ERRCODE_COLLATION_MISMATCH),
-							 errmsg("collation mismatch between explicit collations \"%s\" and \"%s\"",
-									get_collation_name(pcoll),
-									get_collation_name(ncoll)),
-							 parser_errposition(pstate, exprLocation(nexpr))));
-			}
-
-			return pcoll;
-		}
-	}
-
-	/*
-	 * Check if there are any implicit collation derivations.
-	 */
-	foreach(lc, exprs)
-	{
-		Node	   *pexpr = (Node *) lfirst(lc);
-		Oid			pcoll = exprCollation(pexpr);
-
-		if (pcoll && pcoll != DEFAULT_COLLATION_OID)
-		{
-			ListCell	*lc2;
-			for_each_cell(lc2, lnext(lc))
-			{
-				Node	   *nexpr = (Node *) lfirst(lc2);
-				Oid			ncoll = exprCollation(nexpr);
-
-				if (!ncoll || ncoll == DEFAULT_COLLATION_OID)
-					continue;
-
-				if (ncoll != pcoll)
-				{
-					if (none_ok)
-						return InvalidOid;
-					ereport(ERROR,
-							(errcode(ERRCODE_COLLATION_MISMATCH),
-							 errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
-									get_collation_name(pcoll),
-									get_collation_name(ncoll)),
-							 errhint("You can override the collation by applying the COLLATE clause to one or both expressions."),
-							 parser_errposition(pstate, exprLocation(nexpr))));
-				}
-			}
-
-			return pcoll;
-		}
-	}
-
-	foreach(lc, exprs)
-	{
-		Node	   *pexpr = (Node *) lfirst(lc);
-		Oid			pcoll = exprCollation(pexpr);
-
-		if (pcoll == DEFAULT_COLLATION_OID)
-		{
-			ListCell	*lc2;
-			for_each_cell(lc2, lnext(lc))
-			{
-				Node	   *nexpr = (Node *) lfirst(lc2);
-				Oid			ncoll = exprCollation(nexpr);
-
-				if (ncoll != pcoll)
-					break;
-			}
-
-			return pcoll;
-		}
-	}
-
-	/*
-	 * Else use default
-	 */
-	return InvalidOid;
-}
diff --git a/src/backend/parser/parse_collate.c b/src/backend/parser/parse_collate.c
new file mode 100644
index 00000000000..0b77e3ea2b7
--- /dev/null
+++ b/src/backend/parser/parse_collate.c
@@ -0,0 +1,763 @@
+/*-------------------------------------------------------------------------
+ *
+ * parse_collate.c
+ *		Routines for assigning collation information.
+ *
+ * We choose to handle collation analysis in a post-pass over the output
+ * of expression parse analysis.  This is because we need more state to
+ * perform this processing than is needed in the finished tree.  If we
+ * did it on-the-fly while building the tree, all that state would have
+ * to be kept in expression node trees permanently.  This way, the extra
+ * storage is just local variables in this recursive routine.
+ *
+ * The info that is actually saved in the finished tree is:
+ * 1. The output collation of each expression node, or InvalidOid if it
+ * returns a noncollatable data type.  This can also be InvalidOid if the
+ * result type is collatable but the collation is indeterminate.
+ * 2. The collation to be used in executing each function.  InvalidOid means
+ * that there are no collatable inputs or their collation is indeterminate.
+ * This value is only stored in node types that might call collation-using
+ * functions.
+ *
+ * You might think we could get away with storing only one collation per
+ * node, but the two concepts really need to be kept distinct.  Otherwise
+ * it's too confusing when a function produces a collatable output type but
+ * has no collatable inputs or produces noncollatable output from collatable
+ * inputs.
+ *
+ * Cases with indeterminate collation might result in an error being thrown
+ * at runtime.  If we knew exactly which functions require collation
+ * information, we could throw those errors at parse time instead.
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/parser/parse_collate.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/pg_collation.h"
+#include "nodes/nodeFuncs.h"
+#include "parser/parse_collate.h"
+#include "utils/lsyscache.h"
+
+
+/*
+ * Collation strength (the SQL standard calls this "derivation").  Order is
+ * chosen to allow comparisons to work usefully.  Note: the standard doesn't
+ * seem to distingish between NONE and CONFLICT.
+ */
+typedef enum
+{
+	COLLATE_NONE,				/* expression is of a noncollatable datatype */
+	COLLATE_IMPLICIT,			/* collation was derived implicitly */
+	COLLATE_CONFLICT,			/* we had a conflict of implicit collations */
+	COLLATE_EXPLICIT			/* collation was derived explicitly */
+} CollateStrength;
+
+typedef struct
+{
+	ParseState *pstate;			/* parse state (for error reporting) */
+	Oid			collation;		/* OID of current collation, if any */
+	CollateStrength strength;	/* strength of current collation choice */
+	int			location;		/* location of expr that set collation */
+	/* Remaining fields are only valid when strength == COLLATE_CONFLICT */
+	Oid			collation2;		/* OID of conflicting collation */
+	int			location2;		/* location of expr that set collation2 */
+} assign_collations_context;
+
+static bool assign_query_collations_walker(Node *node, ParseState *pstate);
+static bool assign_collations_walker(Node *node,
+									 assign_collations_context *context);
+
+
+/*
+ * assign_query_collations()
+ *		Mark all expressions in the given Query with collation information.
+ *
+ * This should be applied to each Query after completion of parse analysis
+ * for expressions.  Note that we do not recurse into sub-Queries, since
+ * those should have been processed when built.
+ */
+void
+assign_query_collations(ParseState *pstate, Query *query)
+{
+	/*
+	 * We just use query_tree_walker() to visit all the contained expressions.
+	 * We can skip the rangetable and CTE subqueries, though, since RTEs and
+	 * subqueries had better have been processed already (else Vars referring
+	 * to them would not get created with the right collation).
+	 */
+	(void) query_tree_walker(query,
+							 assign_query_collations_walker,
+							 (void *) pstate,
+							 QTW_IGNORE_RANGE_TABLE |
+							 QTW_IGNORE_CTE_SUBQUERIES);
+}
+
+/*
+ * Walker for assign_query_collations
+ *
+ * Each expression found by query_tree_walker is processed independently.
+ * Note that query_tree_walker may pass us a whole List, such as the
+ * targetlist, in which case each subexpression must be processed
+ * independently --- we don't want to bleat if two different targetentries
+ * have different collations.
+ */
+static bool
+assign_query_collations_walker(Node *node, ParseState *pstate)
+{
+	/* Need do nothing for empty subexpressions */
+	if (node == NULL)
+		return false;
+
+	/*
+	 * We don't want to recurse into a set-operations tree; it's already
+	 * been fully processed in transformSetOperationStmt.
+	 */
+	if (IsA(node, SetOperationStmt))
+		return false;
+
+	if (IsA(node, List))
+		assign_list_collations(pstate, (List *) node);
+	else
+		assign_expr_collations(pstate, node);
+
+	return false;
+}
+
+/*
+ * assign_list_collations()
+ *		Mark all nodes in the list of expressions with collation information.
+ *
+ * The list member expressions are processed independently; they do not have
+ * to share a common collation.
+ */
+void
+assign_list_collations(ParseState *pstate, List *exprs)
+{
+	ListCell   *lc;
+
+	foreach(lc, exprs)
+	{
+		Node   *node = (Node *) lfirst(lc);
+
+		assign_expr_collations(pstate, node);
+	}
+}
+
+/*
+ * assign_expr_collations()
+ *		Mark all nodes in the given expression tree with collation information.
+ *
+ * This is exported for the benefit of various utility commands that process
+ * expressions without building a complete Query.  It should be applied after
+ * calling transformExpr() plus any expression-modifying operations such as
+ * coerce_to_boolean().
+ */
+void
+assign_expr_collations(ParseState *pstate, Node *expr)
+{
+	assign_collations_context context;
+
+	/* initialize context for tree walk */
+	context.pstate = pstate;
+	context.collation = InvalidOid;
+	context.strength = COLLATE_NONE;
+	context.location = -1;
+
+	/* and away we go */
+	(void) assign_collations_walker(expr, &context);
+}
+
+/*
+ * select_common_collation()
+ *		Identify a common collation for a list of expressions.
+ *
+ * The expressions should all return the same datatype, else this is not
+ * terribly meaningful.
+ *
+ * none_ok means that it is permitted to return InvalidOid, indicating that
+ * no common collation could be identified, even for collatable datatypes.
+ * Otherwise, an error is thrown for conflict of implicit collations.
+ *
+ * In theory, none_ok = true reflects the rules of SQL standard clause "Result
+ * of data type combinations", none_ok = false reflects the rules of clause
+ * "Collation determination" (in some cases invoked via "Grouping
+ * operations").
+ */
+Oid
+select_common_collation(ParseState *pstate, List *exprs, bool none_ok)
+{
+	assign_collations_context context;
+
+	/* initialize context for tree walk */
+	context.pstate = pstate;
+	context.collation = InvalidOid;
+	context.strength = COLLATE_NONE;
+	context.location = -1;
+
+	/* and away we go */
+	(void) assign_collations_walker((Node *) exprs, &context);
+
+	/* deal with collation conflict */
+	if (context.strength == COLLATE_CONFLICT)
+	{
+		if (none_ok)
+			return InvalidOid;
+		ereport(ERROR,
+				(errcode(ERRCODE_COLLATION_MISMATCH),
+				 errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
+						get_collation_name(context.collation),
+						get_collation_name(context.collation2)),
+				 errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
+				 parser_errposition(context.pstate, context.location2)));
+	}
+
+	/*
+	 * Note: if strength is still COLLATE_NONE, we'll return InvalidOid, but
+	 * that's okay because it must mean none of the expressions returned
+	 * collatable datatypes.
+	 */
+	return context.collation;
+}
+
+/*
+ * assign_collations_walker()
+ *		Recursive guts of collation processing.
+ *
+ * Nodes with no children (eg, Vars, Consts, Params) must have been marked
+ * when built.  All upper-level nodes are marked here.
+ *
+ * Note: if this is invoked directly on a List, it will attempt to infer a
+ * common collation for all the list members.  In particular, it will throw
+ * error if there are conflicting explicit collations for different members.
+ */
+static bool
+assign_collations_walker(Node *node, assign_collations_context *context)
+{
+	assign_collations_context loccontext;
+	Oid			collation;
+	CollateStrength strength;
+	int			location;
+
+	/* Need do nothing for empty subexpressions */
+	if (node == NULL)
+		return false;
+
+	/*
+	 * Prepare for recursion.  For most node types, though not all, the
+	 * first thing we do is recurse to process all nodes below this one.
+	 * Each level of the tree has its own local context.
+	 */
+	loccontext.pstate = context->pstate;
+	loccontext.collation = InvalidOid;
+	loccontext.strength = COLLATE_NONE;
+	loccontext.location = -1;
+
+	/*
+	 * Recurse if appropriate, then determine the collation for this node.
+	 *
+	 * Note: the general cases are at the bottom of the switch, after various
+	 * special cases.
+	 */
+	switch (nodeTag(node))
+	{
+		case T_CollateExpr:
+			{
+				/*
+				 * COLLATE sets an explicitly derived collation, regardless of
+				 * what the child state is.  But we must recurse to set up
+				 * collation info below here.
+				 */
+				CollateExpr *expr = (CollateExpr *) node;
+
+				(void) expression_tree_walker(node,
+											  assign_collations_walker,
+											  (void *) &loccontext);
+
+				collation = expr->collOid;
+				Assert(OidIsValid(collation));
+				strength = COLLATE_EXPLICIT;
+				location = expr->location;
+			}
+			break;
+		case T_FieldSelect:
+			{
+				/*
+				 * FieldSelect is a special case because the field may have
+				 * a non-default collation, in which case we should use that.
+				 * The field's collation was already looked up and saved
+				 * in the node.
+				 */
+				FieldSelect *expr = (FieldSelect *) node;
+
+				/* ... but first, recurse */
+				(void) expression_tree_walker(node,
+											  assign_collations_walker,
+											  (void *) &loccontext);
+
+				if (OidIsValid(expr->resultcollid))
+				{
+					/* Node's result type is collatable. */
+					if (expr->resultcollid == DEFAULT_COLLATION_OID)
+					{
+						/*
+						 * The immediate input node necessarily yields a
+						 * composite type, so it will have no exposed
+						 * collation.  However, if we are selecting a field
+						 * from a function returning composite, see if we
+						 * can bubble up a collation from the function's
+						 * input.  XXX this is a bit of a hack, rethink ...
+						 */
+						if (IsA(expr->arg, FuncExpr))
+						{
+							FuncExpr *fexpr = (FuncExpr *) expr->arg;
+
+							if (OidIsValid(fexpr->inputcollid))
+								expr->resultcollid = fexpr->inputcollid;
+						}
+					}
+					/* Pass up field's collation as an implicit choice. */
+					collation = expr->resultcollid;
+					strength = COLLATE_IMPLICIT;
+					location = exprLocation(node);
+				}
+				else
+				{
+					/* Node's result type isn't collatable. */
+					collation = InvalidOid;
+					strength = COLLATE_NONE;
+					location = -1;		/* won't be used */
+				}
+			}
+			break;
+		case T_CaseExpr:
+			{
+				/*
+				 * CaseExpr is a special case because we do not want to
+				 * recurse into the test expression (if any).  It was
+				 * already marked with collations during transformCaseExpr,
+				 * and furthermore its collation is not relevant to the
+				 * result of the CASE --- only the output expressions are.
+				 * So we can't use expression_tree_walker here.
+				 */
+				CaseExpr   *expr = (CaseExpr *) node;
+				Oid			typcollation;
+				ListCell   *lc;
+
+				foreach(lc, expr->args)
+				{
+					CaseWhen   *when = (CaseWhen *) lfirst(lc);
+
+					Assert(IsA(when, CaseWhen));
+					/*
+					 * The condition expressions mustn't affect the CASE's
+					 * result collation either; but since they are known to
+					 * yield boolean, it's safe to recurse directly on them
+					 * --- they won't change loccontext.
+					 */
+					(void) assign_collations_walker((Node *) when->expr,
+													&loccontext);
+					(void) assign_collations_walker((Node *) when->result,
+													&loccontext);
+				}
+				(void) assign_collations_walker((Node *) expr->defresult,
+												&loccontext);
+
+				/*
+				 * Now determine the CASE's output collation.  This is the
+				 * same as the general case below.
+				 */
+				typcollation = get_typcollation(exprType(node));
+				if (OidIsValid(typcollation))
+				{
+					/* Node's result is collatable; what about its input? */
+					if (loccontext.strength > COLLATE_NONE)
+					{
+						/* Collation state bubbles up from children. */
+						collation = loccontext.collation;
+						strength = loccontext.strength;
+						location = loccontext.location;
+					}
+					else
+					{
+						/*
+						 * Collatable output produced without any collatable
+						 * input.  Use the type's collation (which is usually
+						 * DEFAULT_COLLATION_OID, but might be different for a
+						 * domain).
+						 */
+						collation = typcollation;
+						strength = COLLATE_IMPLICIT;
+						location = exprLocation(node);
+					}
+				}
+				else
+				{
+					/* Node's result type isn't collatable. */
+					collation = InvalidOid;
+					strength = COLLATE_NONE;
+					location = -1;		/* won't be used */
+				}
+
+				/*
+				 * Save the state into the expression node.  We know it
+				 * doesn't care about input collation.
+				 */
+				if (strength == COLLATE_CONFLICT)
+					exprSetCollation(node, InvalidOid);
+				else
+					exprSetCollation(node, collation);
+			}
+			break;
+		case T_RowExpr:
+			{
+				/*
+				 * RowExpr is a special case because the subexpressions
+				 * are independent: we don't want to complain if some of
+				 * them have incompatible explicit collations.
+				 */
+				RowExpr *expr = (RowExpr *) node;
+
+				assign_list_collations(context->pstate, expr->args);
+
+				/*
+				 * Since the result is always composite and therefore never
+				 * has a collation, we can just stop here: this node has no
+				 * impact on the collation of its parent.
+				 */
+				return false;			/* done */
+			}
+		case T_RowCompareExpr:
+			{
+				/*
+				 * For RowCompare, we have to find the common collation of
+				 * each pair of input columns and build a list.  If we can't
+				 * find a common collation, we just put InvalidOid into the
+				 * list, which may or may not cause an error at runtime.
+				 */
+				RowCompareExpr *expr = (RowCompareExpr *) node;
+				List	   *colls = NIL;
+				ListCell   *l;
+				ListCell   *r;
+
+				forboth(l, expr->largs, r, expr->rargs)
+				{
+					Node  *le = (Node *) lfirst(l);
+					Node  *re = (Node *) lfirst(r);
+					Oid		coll;
+
+					coll = select_common_collation(context->pstate,
+												   list_make2(le, re),
+												   true);
+					colls = lappend_oid(colls, coll);
+				}
+				expr->inputcollids = colls;
+
+				/*
+				 * Since the result is always boolean and therefore never
+				 * has a collation, we can just stop here: this node has no
+				 * impact on the collation of its parent.
+				 */
+				return false;			/* done */
+			}
+		case T_CoerceToDomain:
+			{
+				/*
+				 * If the domain declaration included a non-default COLLATE
+				 * spec, then use that collation as the output collation of
+				 * the coercion.  Otherwise allow the input collation to
+				 * bubble up.  (The input should be of the domain's base
+				 * type, therefore we don't need to worry about it not being
+				 * collatable when the domain is.)
+				 */
+				CoerceToDomain *expr = (CoerceToDomain *) node;
+				Oid		typcollation = get_typcollation(expr->resulttype);
+
+				/* ... but first, recurse */
+				(void) expression_tree_walker(node,
+											  assign_collations_walker,
+											  (void *) &loccontext);
+
+				if (OidIsValid(typcollation))
+				{
+					/* Node's result type is collatable. */
+					if (typcollation == DEFAULT_COLLATION_OID)
+					{
+						/* Collation state bubbles up from child. */
+						collation = loccontext.collation;
+						strength = loccontext.strength;
+						location = loccontext.location;
+					}
+					else
+					{
+						/* Use domain's collation as an implicit choice. */
+						collation = typcollation;
+						strength = COLLATE_IMPLICIT;
+						location = exprLocation(node);
+					}
+				}
+				else
+				{
+					/* Node's result type isn't collatable. */
+					collation = InvalidOid;
+					strength = COLLATE_NONE;
+					location = -1;		/* won't be used */
+				}
+
+				/*
+				 * Save the state into the expression node.  We know it
+				 * doesn't care about input collation.
+				 */
+				if (strength == COLLATE_CONFLICT)
+					exprSetCollation(node, InvalidOid);
+				else
+					exprSetCollation(node, collation);
+			}
+			break;
+		case T_TargetEntry:
+			(void) expression_tree_walker(node,
+										  assign_collations_walker,
+										  (void *) &loccontext);
+
+			/*
+			 * TargetEntry can have only one child, and should bubble that
+			 * state up to its parent.  We can't use the general-case code
+			 * below because exprType and friends don't work on TargetEntry.
+			 */
+			collation = loccontext.collation;
+			strength = loccontext.strength;
+			location = loccontext.location;
+			break;
+		case T_RangeTblRef:
+		case T_JoinExpr:
+		case T_FromExpr:
+		case T_SortGroupClause:
+			(void) expression_tree_walker(node,
+										  assign_collations_walker,
+										  (void *) &loccontext);
+			/*
+			 * When we're invoked on a query's jointree, we don't need to do
+			 * anything with join nodes except recurse through them to process
+			 * WHERE/ON expressions.  So just stop here.  Likewise, we don't
+			 * need to do anything when invoked on sort/group lists.
+			 */
+			return false;
+		case T_Query:
+			{
+				/*
+				 * We get here when we're invoked on the Query belonging to a
+				 * SubLink.  Act as though the Query returns its first output
+				 * column, which indeed is what it does for EXPR_SUBLINK and
+				 * ARRAY_SUBLINK cases.  In the cases where the SubLink
+				 * returns boolean, this info will be ignored.
+				 *
+				 * We needn't recurse, since the Query is already processed.
+				 */
+				Query	   *qtree = (Query *) node;
+				TargetEntry *tent;
+
+				tent = (TargetEntry *) linitial(qtree->targetList);
+				Assert(IsA(tent, TargetEntry));
+				Assert(!tent->resjunk);
+				collation = exprCollation((Node *) tent->expr);
+				/* collation doesn't change if it's converted to array */
+				strength = COLLATE_IMPLICIT;
+				location = exprLocation((Node *) tent->expr);
+			}
+			break;
+		case T_List:
+			(void) expression_tree_walker(node,
+										  assign_collations_walker,
+										  (void *) &loccontext);
+
+			/*
+			 * When processing a list, collation state just bubbles up from
+			 * the list elements.
+			 */
+			collation = loccontext.collation;
+			strength = loccontext.strength;
+			location = loccontext.location;
+			break;
+
+		case T_Var:
+		case T_Const:
+		case T_Param:
+		case T_CoerceToDomainValue:
+		case T_CaseTestExpr:
+		case T_SetToDefault:
+		case T_CurrentOfExpr:
+			/*
+			 * General case for childless expression nodes.  These should
+			 * already have a collation assigned; it is not this function's
+			 * responsibility to look into the catalogs for base-case
+			 * information.
+			 */
+			collation = exprCollation(node);
+
+			/*
+			 * Note: in most cases, there will be an assigned collation
+			 * whenever type_is_collatable(exprType(node)); but an exception
+			 * occurs for a Var referencing a subquery output column for
+			 * which a unique collation was not determinable.  That may lead
+			 * to a runtime failure if a collation-sensitive function is
+			 * applied to the Var.
+			 */
+
+			if (OidIsValid(collation))
+				strength = COLLATE_IMPLICIT;
+			else
+				strength = COLLATE_NONE;
+			location = exprLocation(node);
+			break;
+
+		default:
+			{
+				/*
+				 * General case for most expression nodes with children.
+				 * First recurse, then figure out what to assign here.
+				 */
+				Oid		typcollation;
+
+				(void) expression_tree_walker(node,
+											  assign_collations_walker,
+											  (void *) &loccontext);
+
+				typcollation = get_typcollation(exprType(node));
+				if (OidIsValid(typcollation))
+				{
+					/* Node's result is collatable; what about its input? */
+					if (loccontext.strength > COLLATE_NONE)
+					{
+						/* Collation state bubbles up from children. */
+						collation = loccontext.collation;
+						strength = loccontext.strength;
+						location = loccontext.location;
+					}
+					else
+					{
+						/*
+						 * Collatable output produced without any collatable
+						 * input.  Use the type's collation (which is usually
+						 * DEFAULT_COLLATION_OID, but might be different for a
+						 * domain).
+						 */
+						collation = typcollation;
+						strength = COLLATE_IMPLICIT;
+						location = exprLocation(node);
+					}
+				}
+				else
+				{
+					/* Node's result type isn't collatable. */
+					collation = InvalidOid;
+					strength = COLLATE_NONE;
+					location = -1;		/* won't be used */
+				}
+
+				/*
+				 * Save the result collation into the expression node.
+				 * If the state is COLLATE_CONFLICT, we'll set the collation
+				 * to InvalidOid, which might result in an error at runtime.
+				 */
+				if (strength == COLLATE_CONFLICT)
+					exprSetCollation(node, InvalidOid);
+				else
+					exprSetCollation(node, collation);
+
+				/*
+				 * Likewise save the input collation, which is the one that
+				 * any function called by this node should use.
+				 */
+				if (loccontext.strength == COLLATE_CONFLICT)
+					exprSetInputCollation(node, InvalidOid);
+				else
+					exprSetInputCollation(node, loccontext.collation);
+			}
+			break;
+	}
+
+	/*
+	 * Now, merge my information into my parent's state.  If the collation
+	 * strength for this node is different from what's already in *context,
+	 * then this node either dominates or is dominated by earlier siblings.
+	 */
+	if (strength > context->strength)
+	{
+		/* Override previous parent state */
+		context->collation = collation;
+		context->strength = strength;
+		context->location = location;
+		/* Bubble up error info if applicable */
+		if (strength == COLLATE_CONFLICT)
+		{
+			context->collation2 = loccontext.collation2;
+			context->location2 = loccontext.location2;
+		}
+	}
+	else if (strength == context->strength)
+	{
+		/* Merge, or detect error if there's a collation conflict */
+		switch (strength)
+		{
+			case COLLATE_NONE:
+				/* Nothing + nothing is still nothing */
+				break;
+			case COLLATE_IMPLICIT:
+				if (collation != context->collation)
+				{
+					/*
+					 * Non-default implicit collation always beats default.
+					 */
+					if (context->collation == DEFAULT_COLLATION_OID)
+					{
+						/* Override previous parent state */
+						context->collation = collation;
+						context->strength = strength;
+						context->location = location;
+					}
+					else if (collation != DEFAULT_COLLATION_OID)
+					{
+						/*
+						 * Ooops, we have a conflict.  We cannot throw error
+						 * here, since the conflict could be resolved by a
+						 * later sibling CollateExpr, or the parent might not
+						 * care about collation anyway.  Return enough info to
+						 * throw the error later, if needed.
+						 */
+						context->strength = COLLATE_CONFLICT;
+						context->collation2 = collation;
+						context->location2 = location;
+					}
+				}
+				break;
+			case COLLATE_CONFLICT:
+				/* We're still conflicted ... */
+				break;
+			case COLLATE_EXPLICIT:
+				if (collation != context->collation)
+				{
+					/*
+					 * Ooops, we have a conflict of explicit COLLATE clauses.
+					 * Here we choose to throw error immediately; that is what
+					 * the SQL standard says to do, and there's no good reason
+					 * to be less strict.
+					 */
+					ereport(ERROR,
+							(errcode(ERRCODE_COLLATION_MISMATCH),
+							 errmsg("collation mismatch between explicit collations \"%s\" and \"%s\"",
+									get_collation_name(context->collation),
+									get_collation_name(collation)),
+							 parser_errposition(context->pstate, location)));
+				}
+				break;
+		}
+	}
+
+	return false;
+}
diff --git a/src/backend/parser/parse_cte.c b/src/backend/parser/parse_cte.c
index 23b72b245b2..c527f7589e2 100644
--- a/src/backend/parser/parse_cte.c
+++ b/src/backend/parser/parse_cte.c
@@ -405,12 +405,16 @@ analyzeCTETargetList(ParseState *pstate, CommonTableExpr *cte, List *tlist)
 		 * might see "unknown" as a result of an untyped literal in the
 		 * non-recursive term's select list, and if we don't convert to text
 		 * then we'll have a mismatch against the UNION result.
+		 *
+		 * The column might contain 'foo' COLLATE "bar", so don't override
+		 * collation if it's already set.
 		 */
 		if (cte->cterecursive && coltype == UNKNOWNOID)
 		{
 			coltype = TEXTOID;
 			coltypmod = -1;		/* should be -1 already, but be sure */
-			colcoll = DEFAULT_COLLATION_OID;
+			if (!OidIsValid(colcoll))
+				colcoll = DEFAULT_COLLATION_OID;
 		}
 		cte->ctecoltypes = lappend_oid(cte->ctecoltypes, coltype);
 		cte->ctecoltypmods = lappend_int(cte->ctecoltypmods, coltypmod);
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 17bd2bf50ae..4986e0e5fab 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -23,6 +23,7 @@
 #include "optimizer/var.h"
 #include "parser/analyze.h"
 #include "parser/parse_coerce.h"
+#include "parser/parse_collate.h"
 #include "parser/parse_expr.h"
 #include "parser/parse_func.h"
 #include "parser/parse_oper.h"
@@ -309,8 +310,8 @@ transformExpr(ParseState *pstate, Node *expr)
 		case T_FuncExpr:
 		case T_OpExpr:
 		case T_DistinctExpr:
-		case T_ScalarArrayOpExpr:
 		case T_NullIfExpr:
+		case T_ScalarArrayOpExpr:
 		case T_BoolExpr:
 		case T_FieldSelect:
 		case T_FieldStore:
@@ -429,7 +430,6 @@ transformIndirection(ParseState *pstate, Node *basenode, List *indirection)
 														   exprType(result),
 														   InvalidOid,
 														   exprTypmod(result),
-														   exprCollation(result),
 														   subscripts,
 														   NULL);
 			subscripts = NIL;
@@ -451,7 +451,6 @@ transformIndirection(ParseState *pstate, Node *basenode, List *indirection)
 												   exprType(result),
 												   InvalidOid,
 												   exprTypmod(result),
-												   exprCollation(result),
 												   subscripts,
 												   NULL);
 
@@ -1001,25 +1000,34 @@ transformAExprNullIf(ParseState *pstate, A_Expr *a)
 {
 	Node	   *lexpr = transformExpr(pstate, a->lexpr);
 	Node	   *rexpr = transformExpr(pstate, a->rexpr);
-	Node	   *result;
+	OpExpr	   *result;
 
-	result = (Node *) make_op(pstate,
-							  a->name,
-							  lexpr,
-							  rexpr,
-							  a->location);
-	if (((OpExpr *) result)->opresulttype != BOOLOID)
+	result = (OpExpr *) make_op(pstate,
+								a->name,
+								lexpr,
+								rexpr,
+								a->location);
+
+	/*
+	 * The comparison operator itself should yield boolean ...
+	 */
+	if (result->opresulttype != BOOLOID)
 		ereport(ERROR,
 				(errcode(ERRCODE_DATATYPE_MISMATCH),
 				 errmsg("NULLIF requires = operator to yield boolean"),
 				 parser_errposition(pstate, a->location)));
 
 	/*
+	 * ... but the NullIfExpr will yield the first operand's type.
+	 */
+	result->opresulttype = exprType((Node *) linitial(result->args));
+
+	/*
 	 * We rely on NullIfExpr and OpExpr being the same struct
 	 */
 	NodeSetTag(result, T_NullIfExpr);
 
-	return result;
+	return (Node *) result;
 }
 
 static Node *
@@ -1153,6 +1161,7 @@ transformAExprIn(ParseState *pstate, A_Expr *a)
 			}
 			newa = makeNode(ArrayExpr);
 			newa->array_typeid = array_type;
+			/* array_collid will be set by parse_collate.c */
 			newa->element_typeid = scalar_type;
 			newa->elements = aexprs;
 			newa->multidims = false;
@@ -1272,6 +1281,14 @@ transformCaseExpr(ParseState *pstate, CaseExpr *c)
 		if (exprType(arg) == UNKNOWNOID)
 			arg = coerce_to_common_type(pstate, arg, TEXTOID, "CASE");
 
+		/*
+		 * Run collation assignment on the test expression so that we know
+		 * what collation to mark the placeholder with.  In principle we
+		 * could leave it to parse_collate.c to do that later, but propagating
+		 * the result to the CaseTestExpr would be unnecessarily complicated.
+		 */
+		assign_expr_collations(pstate, arg);
+
 		placeholder = makeNode(CaseTestExpr);
 		placeholder->typeId = exprType(arg);
 		placeholder->typeMod = exprTypmod(arg);
@@ -1340,6 +1357,7 @@ transformCaseExpr(ParseState *pstate, CaseExpr *c)
 	ptype = select_common_type(pstate, resultexprs, "CASE", NULL);
 	Assert(OidIsValid(ptype));
 	newc->casetype = ptype;
+	/* casecollid will be set by parse_collate.c */
 
 	/* Convert default result clause, if necessary */
 	newc->defresult = (Expr *)
@@ -1360,8 +1378,6 @@ transformCaseExpr(ParseState *pstate, CaseExpr *c)
 								  "CASE/WHEN");
 	}
 
-	newc->casecollation = select_common_collation(pstate, resultexprs, true);
-
 	newc->location = c->location;
 
 	return (Node *) newc;
@@ -1472,7 +1488,7 @@ transformSubLink(ParseState *pstate, SubLink *sublink)
 			param->paramid = tent->resno;
 			param->paramtype = exprType((Node *) tent->expr);
 			param->paramtypmod = exprTypmod((Node *) tent->expr);
-			param->paramcollation = exprCollation((Node *) tent->expr);
+			param->paramcollid = exprCollation((Node *) tent->expr);
 			param->location = -1;
 
 			right_list = lappend(right_list, param);
@@ -1660,6 +1676,7 @@ transformArrayExpr(ParseState *pstate, A_ArrayExpr *a,
 	}
 
 	newa->array_typeid = array_type;
+	/* array_collid will be set by parse_collate.c */
 	newa->element_typeid = element_type;
 	newa->elements = newcoercedelems;
 	newa->location = a->location;
@@ -1702,6 +1719,7 @@ transformCoalesceExpr(ParseState *pstate, CoalesceExpr *c)
 	}
 
 	newc->coalescetype = select_common_type(pstate, newargs, "COALESCE", NULL);
+	/* coalescecollid will be set by parse_collate.c */
 
 	/* Convert arguments if necessary */
 	foreach(args, newargs)
@@ -1716,7 +1734,6 @@ transformCoalesceExpr(ParseState *pstate, CoalesceExpr *c)
 	}
 
 	newc->args = newcoercedargs;
-	newc->coalescecollation = select_common_collation(pstate, newcoercedargs, true);
 	newc->location = c->location;
 	return (Node *) newc;
 }
@@ -1741,7 +1758,7 @@ transformMinMaxExpr(ParseState *pstate, MinMaxExpr *m)
 	}
 
 	newm->minmaxtype = select_common_type(pstate, newargs, funcname, NULL);
-	newm->collid = select_common_collation(pstate, newargs, false);
+	/* minmaxcollid and inputcollid will be set by parse_collate.c */
 
 	/* Convert arguments if necessary */
 	foreach(args, newargs)
@@ -2149,7 +2166,6 @@ make_row_comparison_op(ParseState *pstate, List *opname,
 	List	   *opexprs;
 	List	   *opnos;
 	List	   *opfamilies;
-	List	   *collids;
 	ListCell   *l,
 			   *r;
 	List	  **opfamily_lists;
@@ -2320,7 +2336,6 @@ make_row_comparison_op(ParseState *pstate, List *opname,
 	 * possibility that make_op inserted coercion operations.
 	 */
 	opnos = NIL;
-	collids = NIL;
 	largs = NIL;
 	rargs = NIL;
 	foreach(l, opexprs)
@@ -2328,7 +2343,6 @@ make_row_comparison_op(ParseState *pstate, List *opname,
 		OpExpr	   *cmp = (OpExpr *) lfirst(l);
 
 		opnos = lappend_oid(opnos, cmp->opno);
-		collids = lappend_oid(collids, cmp->collid);
 		largs = lappend(largs, linitial(cmp->args));
 		rargs = lappend(rargs, lsecond(cmp->args));
 	}
@@ -2337,7 +2351,7 @@ make_row_comparison_op(ParseState *pstate, List *opname,
 	rcexpr->rctype = rctype;
 	rcexpr->opnos = opnos;
 	rcexpr->opfamilies = opfamilies;
-	rcexpr->collids = collids;
+	rcexpr->inputcollids = NIL;	/* assign_expr_collations will fix this */
 	rcexpr->largs = largs;
 	rcexpr->rargs = rargs;
 
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c
index a2d6c598104..a187287e283 100644
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -78,7 +78,6 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
 	bool		retset;
 	int			nvargs;
 	FuncDetailCode fdresult;
-	Oid			funccollid;
 
 	/*
 	 * Most of the rest of the parser just assumes that functions do not have
@@ -344,12 +343,6 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
 	/* perform the necessary typecasting of arguments */
 	make_fn_arguments(pstate, fargs, actual_arg_types, declared_arg_types);
 
-	/* XXX: If we knew which functions required collation information,
-	 * we could selectively set the last argument to true here. */
-	funccollid = select_common_collation(pstate, fargs, false);
-	if (!OidIsValid(funccollid))
-		funccollid = get_typcollation(rettype);
-
 	/*
 	 * If it's a variadic function call, transform the last nvargs arguments
 	 * into an array --- unless it's an "any" variadic.
@@ -374,6 +367,7 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
 					 errmsg("could not find array type for data type %s",
 							format_type_be(newa->element_typeid)),
 				  parser_errposition(pstate, exprLocation((Node *) vargs))));
+		/* array_collid will be set by parse_collate.c */
 		newa->multidims = false;
 		newa->location = exprLocation((Node *) vargs);
 
@@ -389,8 +383,8 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
 		funcexpr->funcresulttype = rettype;
 		funcexpr->funcretset = retset;
 		funcexpr->funcformat = COERCE_EXPLICIT_CALL;
+		/* funccollid and inputcollid will be set by parse_collate.c */
 		funcexpr->args = fargs;
-		funcexpr->collid = funccollid;
 		funcexpr->location = location;
 
 		retval = (Node *) funcexpr;
@@ -402,9 +396,9 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
 
 		aggref->aggfnoid = funcid;
 		aggref->aggtype = rettype;
+		/* aggcollid and inputcollid will be set by parse_collate.c */
 		/* args, aggorder, aggdistinct will be set by transformAggregateCall */
 		aggref->aggstar = agg_star;
-		aggref->collid = funccollid;
 		/* agglevelsup will be set by transformAggregateCall */
 		aggref->location = location;
 
@@ -458,11 +452,11 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
 
 		wfunc->winfnoid = funcid;
 		wfunc->wintype = rettype;
+		/* wincollid and inputcollid will be set by parse_collate.c */
 		wfunc->args = fargs;
 		/* winref will be set by transformWindowFuncCall */
 		wfunc->winstar = agg_star;
 		wfunc->winagg = (fdresult == FUNCDETAIL_AGGREGATE);
-		wfunc->collid = funccollid;
 		wfunc->location = location;
 
 		/*
@@ -1390,7 +1384,8 @@ ParseComplexProjection(ParseState *pstate, char *funcname, Node *first_arg,
 			fselect->fieldnum = i + 1;
 			fselect->resulttype = att->atttypid;
 			fselect->resulttypmod = att->atttypmod;
-			fselect->resultcollation = att->attcollation;
+			/* resultcollid may get overridden by parse_collate.c */
+			fselect->resultcollid = att->attcollation;
 			return (Node *) fselect;
 		}
 	}
diff --git a/src/backend/parser/parse_node.c b/src/backend/parser/parse_node.c
index 163fc891799..2c76c557ecc 100644
--- a/src/backend/parser/parse_node.c
+++ b/src/backend/parser/parse_node.c
@@ -270,7 +270,6 @@ transformArrayType(Oid *arrayType, int32 *arrayTypmod)
  * elementType	OID of array's element type (fetch with transformArrayType,
  *				or pass InvalidOid to do it here)
  * arrayTypMod	typmod for the array (which is also typmod for the elements)
- * arrayColl	OID of collation of array and array's elements
  * indirection	Untransformed list of subscripts (must not be NIL)
  * assignFrom	NULL for array fetch, else transformed expression for source.
  */
@@ -280,7 +279,6 @@ transformArraySubscripts(ParseState *pstate,
 						 Oid arrayType,
 						 Oid elementType,
 						 int32 arrayTypMod,
-						 Oid arrayColl,
 						 List *indirection,
 						 Node *assignFrom)
 {
@@ -407,7 +405,7 @@ transformArraySubscripts(ParseState *pstate,
 	aref->refarraytype = arrayType;
 	aref->refelemtype = elementType;
 	aref->reftypmod = arrayTypMod;
-	aref->refcollid = arrayColl;
+	/* refcollid will be set by parse_collate.c */
 	aref->refupperindexpr = upperIndexpr;
 	aref->reflowerindexpr = lowerIndexpr;
 	aref->refexpr = (Expr *) arrayBase;
diff --git a/src/backend/parser/parse_oper.c b/src/backend/parser/parse_oper.c
index cad41d46f09..822e0a0a628 100644
--- a/src/backend/parser/parse_oper.c
+++ b/src/backend/parser/parse_oper.c
@@ -782,7 +782,6 @@ make_op(ParseState *pstate, List *opname, Node *ltree, Node *rtree,
 	List	   *args;
 	Oid			rettype;
 	OpExpr	   *result;
-	Oid			opcollid;
 
 	/* Select the operator */
 	if (rtree == NULL)
@@ -862,20 +861,14 @@ make_op(ParseState *pstate, List *opname, Node *ltree, Node *rtree,
 	/* perform the necessary typecasting of arguments */
 	make_fn_arguments(pstate, args, actual_arg_types, declared_arg_types);
 
-	/* XXX: If we knew which functions required collation information,
-	 * we could selectively set the last argument to true here. */
-	opcollid = select_common_collation(pstate, args, false);
-	if (!OidIsValid(opcollid))
-		opcollid = get_typcollation(rettype);
-
 	/* and build the expression node */
 	result = makeNode(OpExpr);
 	result->opno = oprid(tup);
 	result->opfuncid = opform->oprcode;
 	result->opresulttype = rettype;
 	result->opretset = get_func_retset(opform->oprcode);
+	/* opcollid and inputcollid will be set by parse_collate.c */
 	result->args = args;
-	result->collid = opcollid;
 	result->location = location;
 
 	ReleaseSysCache(tup);
@@ -904,7 +897,6 @@ make_scalar_array_op(ParseState *pstate, List *opname,
 	List	   *args;
 	Oid			rettype;
 	ScalarArrayOpExpr *result;
-	Oid			opcollid;
 
 	ltypeId = exprType(ltree);
 	atypeId = exprType(rtree);
@@ -999,19 +991,13 @@ make_scalar_array_op(ParseState *pstate, List *opname,
 	/* perform the necessary typecasting of arguments */
 	make_fn_arguments(pstate, args, actual_arg_types, declared_arg_types);
 
-	/* XXX: If we knew which functions required collation information,
-	 * we could selectively set the last argument to true here. */
-	opcollid = select_common_collation(pstate, args, false);
-	if (!OidIsValid(opcollid))
-		opcollid = get_typcollation(rettype);
-
 	/* and build the expression node */
 	result = makeNode(ScalarArrayOpExpr);
 	result->opno = oprid(tup);
 	result->opfuncid = opform->oprcode;
 	result->useOr = useOr;
+	/* inputcollid will be set by parse_collate.c */
 	result->args = args;
-	result->collid = opcollid;
 	result->location = location;
 
 	ReleaseSysCache(tup);
diff --git a/src/backend/parser/parse_param.c b/src/backend/parser/parse_param.c
index 9e9f2e3ca0b..1cf255669ac 100644
--- a/src/backend/parser/parse_param.c
+++ b/src/backend/parser/parse_param.c
@@ -114,7 +114,7 @@ fixed_paramref_hook(ParseState *pstate, ParamRef *pref)
 	param->paramid = paramno;
 	param->paramtype = parstate->paramTypes[paramno - 1];
 	param->paramtypmod = -1;
-	param->paramcollation = get_typcollation(param->paramtype);
+	param->paramcollid = get_typcollation(param->paramtype);
 	param->location = pref->location;
 
 	return (Node *) param;
@@ -167,7 +167,7 @@ variable_paramref_hook(ParseState *pstate, ParamRef *pref)
 	param->paramid = paramno;
 	param->paramtype = *pptype;
 	param->paramtypmod = -1;
-	param->paramcollation = get_typcollation(param->paramtype);
+	param->paramcollid = get_typcollation(param->paramtype);
 	param->location = pref->location;
 
 	return (Node *) param;
@@ -231,6 +231,8 @@ variable_coerce_param_hook(ParseState *pstate, Param *param,
 		 */
 		param->paramtypmod = -1;
 
+		param->paramcollid = get_typcollation(param->paramtype);
+
 		/* Use the leftmost of the param's and coercion's locations */
 		if (location >= 0 &&
 			(param->location < 0 || location < param->location))
diff --git a/src/backend/parser/parse_target.c b/src/backend/parser/parse_target.c
index fd1529fb3f9..550783547e8 100644
--- a/src/backend/parser/parse_target.c
+++ b/src/backend/parser/parse_target.c
@@ -398,7 +398,7 @@ transformAssignedExpr(ParseState *pstate,
 
 		def->typeId = attrtype;
 		def->typeMod = attrtypmod;
-		def->collid = attrcollation;
+		def->collation = attrcollation;
 		if (indirection)
 		{
 			if (IsA(linitial(indirection), A_Indices))
@@ -785,7 +785,6 @@ transformAssignmentSubscripts(ParseState *pstate,
 											   arrayType,
 											   elementTypeId,
 											   arrayTypMod,
-											   InvalidOid,
 											   subscripts,
 											   rhs);
 
@@ -1267,7 +1266,8 @@ ExpandRowReference(ParseState *pstate, Node *expr,
 		fselect->fieldnum = i + 1;
 		fselect->resulttype = att->atttypid;
 		fselect->resulttypmod = att->atttypmod;
-		fselect->resultcollation = att->attcollation;
+		/* resultcollid may get overridden by parse_collate.c */
+		fselect->resultcollid = att->attcollation;
 
 		if (targetlist)
 		{
diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c
index 06baf89886a..3dffcded4ac 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -46,6 +46,7 @@
 #include "nodes/nodeFuncs.h"
 #include "parser/analyze.h"
 #include "parser/parse_clause.h"
+#include "parser/parse_collate.h"
 #include "parser/parse_expr.h"
 #include "parser/parse_relation.h"
 #include "parser/parse_target.h"
@@ -1798,6 +1799,9 @@ transformIndexStmt(IndexStmt *stmt, const char *queryString)
 			/* Now do parse transformation of the expression */
 			ielem->expr = transformExpr(pstate, ielem->expr);
 
+			/* We have to fix its collations too */
+			assign_expr_collations(pstate, ielem->expr);
+
 			/*
 			 * We check only that the result type is legitimate; this is for
 			 * consistency with what transformWhereClause() checks for the