9 files changed, 697 insertions, 177 deletions
diff --git a/contrib/Makefile b/contrib/Makefile
index b37d0dd2c3..0b91ac10ee 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -20,6 +20,7 @@ SUBDIRS = \
 		earthdistance	\
 		file_fdw	\
 		fuzzystrmatch	\
+		hashtest	\
 		hstore		\
 		intagg		\
 		intarray	\
diff --git a/contrib/hashtest/Makefile b/contrib/hashtest/Makefile
new file mode 100644
index 0000000000..3ee42f87d8
--- /dev/null
+++ b/contrib/hashtest/Makefile
@@ -0,0 +1,18 @@
+# contrib/hashtest/Makefile
+
+MODULE_big = hashtest
+OBJS = hashtest.o
+
+EXTENSION = hashtest
+DATA = hashtest--1.0.sql
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/hashtest
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/hashtest/hashtest--1.0.sql b/contrib/hashtest/hashtest--1.0.sql
new file mode 100644
index 0000000000..e271baff0f
--- /dev/null
+++ b/contrib/hashtest/hashtest--1.0.sql
@@ -0,0 +1,52 @@
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION hashtest" to load this file. \quit
+
+CREATE FUNCTION chash_insert_test()
+RETURNS void
+AS 'MODULE_PATHNAME', 'chash_insert_test'
+LANGUAGE C;
+
+CREATE FUNCTION chash_search_test()
+RETURNS void
+AS 'MODULE_PATHNAME', 'chash_search_test'
+LANGUAGE C;
+
+CREATE FUNCTION chash_delete_test()
+RETURNS void
+AS 'MODULE_PATHNAME', 'chash_delete_test'
+LANGUAGE C;
+
+CREATE FUNCTION chash_concurrent_test()
+RETURNS void
+AS 'MODULE_PATHNAME', 'chash_concurrent_test'
+LANGUAGE C;
+
+CREATE FUNCTION chash_collision_test()
+RETURNS void
+AS 'MODULE_PATHNAME', 'chash_collision_test'
+LANGUAGE C;
+
+CREATE FUNCTION dynahash_insert_test()
+RETURNS void
+AS 'MODULE_PATHNAME', 'dynahash_insert_test'
+LANGUAGE C;
+
+CREATE FUNCTION dynahash_search_test()
+RETURNS void
+AS 'MODULE_PATHNAME', 'dynahash_search_test'
+LANGUAGE C;
+
+CREATE FUNCTION dynahash_delete_test()
+RETURNS void
+AS 'MODULE_PATHNAME', 'dynahash_delete_test'
+LANGUAGE C;
+
+CREATE FUNCTION dynahash_concurrent_test()
+RETURNS void
+AS 'MODULE_PATHNAME', 'dynahash_concurrent_test'
+LANGUAGE C;
+
+CREATE FUNCTION dynahash_collision_test()
+RETURNS void
+AS 'MODULE_PATHNAME', 'dynahash_collision_test'
+LANGUAGE C;
diff --git a/contrib/hashtest/hashtest.c b/contrib/hashtest/hashtest.c
new file mode 100644
index 0000000000..172a5bb156
--- /dev/null
+++ b/contrib/hashtest/hashtest.c
@@ -0,0 +1,527 @@
+/*-------------------------------------------------------------------------
+ * hashtest.c
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "funcapi.h"
+#include "libpq/auth.h"
+#include "lib/stringinfo.h"
+#include "miscadmin.h"
+#include "portability/instr_time.h"
+#include "storage/ipc.h"
+#include "utils/chash.h"
+
+PG_MODULE_MAGIC;
+
+void		_PG_init(void);
+Datum		chash_insert_test(PG_FUNCTION_ARGS);
+Datum		chash_search_test(PG_FUNCTION_ARGS);
+Datum		chash_delete_test(PG_FUNCTION_ARGS);
+Datum		chash_concurrent_test(PG_FUNCTION_ARGS);
+Datum		chash_collision_test(PG_FUNCTION_ARGS);
+Datum		dynahash_insert_test(PG_FUNCTION_ARGS);
+Datum		dynahash_search_test(PG_FUNCTION_ARGS);
+Datum		dynahash_delete_test(PG_FUNCTION_ARGS);
+Datum		dynahash_concurrent_test(PG_FUNCTION_ARGS);
+Datum		dynahash_collision_test(PG_FUNCTION_ARGS);
+static void hashtest_shmem_startup(void);
+
+PG_FUNCTION_INFO_V1(chash_insert_test);
+PG_FUNCTION_INFO_V1(chash_search_test);
+PG_FUNCTION_INFO_V1(chash_delete_test);
+PG_FUNCTION_INFO_V1(chash_concurrent_test);
+PG_FUNCTION_INFO_V1(chash_collision_test);
+PG_FUNCTION_INFO_V1(dynahash_insert_test);
+PG_FUNCTION_INFO_V1(dynahash_search_test);
+PG_FUNCTION_INFO_V1(dynahash_delete_test);
+PG_FUNCTION_INFO_V1(dynahash_concurrent_test);
+PG_FUNCTION_INFO_V1(dynahash_collision_test);
+
+typedef struct
+{
+	uint32	key;
+	uint32	val;
+} hentry;
+
+static CHashDescriptor cdesc = {
+	"hashtest-chash",	/* name */
+	1048576,			/* capacity */
+	sizeof(hentry),		/* element size */
+	sizeof(uint32)		/* key size */
+};
+
+#define DYNAHASH_PARTITIONS		16
+
+static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
+static CHashTable chash;
+static HTAB *dynahash;
+static LWLockId dynahash_lock[DYNAHASH_PARTITIONS];
+static ClientAuthentication_hook_type original_client_auth_hook = NULL;
+
+static void hashtest_client_auth_hook(Port *port, int status);
+static void chash_write_stats_to_log(int code, Datum dummy);
+
+#define dynahash_get_lock(hashcode) \
+	(dynahash_lock[(hashcode) % DYNAHASH_PARTITIONS])
+
+void
+_PG_init(void)
+{
+	Size	cs;
+	Size	ds;
+
+	if (!process_shared_preload_libraries_in_progress)
+		return;
+	prev_shmem_startup_hook = shmem_startup_hook;
+	shmem_startup_hook = hashtest_shmem_startup;
+	chash = CHashBootstrap(&cdesc);
+	cs = CHashEstimateSize(chash);
+	RequestAddinShmemSpace(cs);
+	ds = hash_estimate_size(cdesc.capacity, cdesc.element_size);
+	RequestAddinShmemSpace(ds);
+	elog(LOG, "chash: %u bytes; dynahash: %u bytes", (unsigned) cs,
+		 (unsigned) ds);
+	RequestAddinLWLocks(DYNAHASH_PARTITIONS);
+	original_client_auth_hook = ClientAuthentication_hook;
+	ClientAuthentication_hook = hashtest_client_auth_hook;
+	
+}
+
+static void
+hashtest_client_auth_hook(Port *port, int status)
+{
+	if (original_client_auth_hook)
+		original_client_auth_hook(port, status);
+	on_proc_exit(chash_write_stats_to_log, (Datum) 0);
+}
+
+static void
+chash_write_stats_to_log(int code, Datum dummy)
+{
+	uint64	stats[CHS_NumberOfStatistics];
+	CHashStatisticsType i;
+	StringInfoData	buf;
+
+	CHashStatistics(chash, stats);
+	initStringInfo(&buf);
+
+	for (i = 0; i < CHS_NumberOfStatistics; ++i)
+	{
+		if (stats[i] == 0)
+			continue;
+		appendStringInfo(&buf, UINT64_FORMAT " %s; ", stats[i],
+						 CHashStatisticsNames[i]);
+	}
+
+	if (buf.len > 1)
+	{
+		buf.data[buf.len-2] = '\0';
+		elog(LOG, "chash statistics: %s", buf.data);
+	}
+}
+
+static void
+hashtest_shmem_startup(void)
+{
+	HASHCTL		info;
+	uint32		i;
+
+	if (prev_shmem_startup_hook)
+		prev_shmem_startup_hook();
+
+	/* Initialize concurrent hash table. */
+	chash = CHashInitialize(chash, &cdesc);
+
+	/* Initialize shared dynahash table. */
+	info.keysize = cdesc.key_size;
+	info.entrysize = cdesc.element_size;
+	info.hash = tag_hash;
+	info.num_partitions = DYNAHASH_PARTITIONS;
+
+	dynahash = ShmemInitHash("hashtest-dynahash",
+							 cdesc.capacity, cdesc.capacity,
+							 &info,
+							 HASH_ELEM | HASH_FUNCTION | HASH_PARTITION);
+
+	for (i = 0; i < DYNAHASH_PARTITIONS; ++i)
+		dynahash_lock[i] = LWLockAssign();
+}
+
+Datum
+chash_insert_test(PG_FUNCTION_ARGS)
+{
+	uint32	i;
+	hentry	e;
+
+	for (i = 0; i < 1000000; ++i)
+	{
+		bool ok;
+
+		e.key = i;
+		e.val = i * 31;
+		ok = CHashInsert(chash, &e);
+		if (!ok)
+			elog(LOG, "insert %u: failed", i);
+		ok = CHashInsert(chash, &e);
+		if (ok)
+			elog(LOG, "insert %u: worked twice", i);
+	}
+
+	PG_RETURN_VOID();
+}
+
+Datum
+chash_search_test(PG_FUNCTION_ARGS)
+{
+	uint32	i;
+	hentry	e;
+
+	for (i = 0; i < 1000000; ++i)
+	{
+		bool ok;
+
+		e.key = i;
+		ok = CHashSearch(chash, &e);
+		if (!ok)
+			elog(LOG, "search %u: not found", i);
+		else if (e.val != e.key * 31)
+			elog(LOG, "search %u: found %u", i, e.val);
+	}
+
+	PG_RETURN_VOID();
+}
+
+Datum
+chash_delete_test(PG_FUNCTION_ARGS)
+{
+	uint32	i;
+	hentry	e;
+
+	for (i = 0; i < 1000000; ++i)
+	{
+		bool ok;
+
+		e.key = i;
+		ok = CHashDelete(chash, &e);
+		if (!ok)
+			elog(LOG, "delete %u: not found", i);
+		ok = CHashDelete(chash, &e);
+		if (ok)
+			elog(LOG, "delete %u: found twice", i);
+	}
+
+	PG_RETURN_VOID();
+}
+
+Datum
+chash_concurrent_test(PG_FUNCTION_ARGS)
+{
+	uint32	i;
+	hentry	e;
+	uint32	seed = MyProcPid << 16;
+
+	for (i = 0; i < 10000; ++i)
+	{
+		bool ok;
+
+		e.key = seed | i;
+		e.val = MyProcPid;
+		ok = CHashInsert(chash, &e);
+		if (!ok)
+			elog(LOG, "insert %u: found", i);
+	}
+
+	for (i = 0; i < 10000; ++i)
+	{
+		bool ok;
+
+		e.key = seed | i;
+		e.val = 0;
+		ok = CHashSearch(chash, &e);
+		if (!ok)
+		{
+			uint64	retry = 1;
+			elog(LOG, "search %u: not found", i);
+			while (!CHashSearch(chash, &e))
+				++retry;
+			elog(LOG, "search %u: eventually found it after "
+				UINT64_FORMAT " retries", i, retry);
+		}
+		if (e.val != MyProcPid)
+			elog(LOG, "search %u: expected %u found %u", i, (unsigned) MyProcPid, e.val);
+	}
+
+	for (i = 0; i < 10000; ++i)
+	{
+		bool ok;
+
+		e.key = seed | i;
+		ok = CHashDelete(chash, &e);
+		if (!ok)
+		{
+			uint64	retry = 1;
+			elog(LOG, "delete %u: not found", i);
+			while (!CHashDelete(chash, &e))
+				++retry;
+			elog(LOG, "delete %u: eventually deleted it after "
+				UINT64_FORMAT " retries", i, retry);
+		}
+	}
+
+	PG_RETURN_VOID();
+}
+
+Datum
+chash_collision_test(PG_FUNCTION_ARGS)
+{
+	uint32	i;
+	hentry	e;
+
+	/* Don't stack-allocate this. */
+	static bool mine[10000];
+
+	memset(mine, 0, 10000 * sizeof(bool));
+
+	for (i = 0; i < 10000; ++i)
+	{
+		bool ok;
+
+		e.key = i;
+		e.val = MyProcPid;
+		ok = CHashInsert(chash, &e);
+		if (ok)
+			mine[i] = true;
+	}
+
+	for (i = 0; i < 10000; ++i)
+	{
+		bool ok;
+
+		if (!mine[i])
+			continue;
+		e.key = i;
+		ok = CHashSearch(chash, &e);
+		if (!ok)
+			elog(LOG, "search %u: not found", i);
+		else if (e.val != MyProcPid)
+			elog(LOG, "search %u: expected %u found %u",
+				 i, (unsigned) MyProcPid, e.val);
+		ok = CHashDelete(chash, &e);
+		if (!ok)
+			elog(LOG, "delete %u: not found", i);
+	}
+
+	PG_RETURN_VOID();
+}
+
+static bool
+dynahash_insert(uint32 key, uint32 val)
+{
+	bool	found;
+	uint32	hashcode;
+	hentry *e;
+	LWLockId	lockid;
+
+	hashcode = get_hash_value(dynahash, (void *) &key);
+ 	lockid = dynahash_get_lock(hashcode);
+	LWLockAcquire(lockid, LW_EXCLUSIVE);
+	e = hash_search_with_hash_value(dynahash, (void *) &key,
+									hashcode, HASH_ENTER, &found);
+	if (!found)
+		e->val = val;
+	LWLockRelease(lockid);
+
+	return !found;
+}
+
+static bool
+dynahash_search(uint32 key, uint32 *val)
+{
+	uint32	hashcode;
+	hentry *e;
+	LWLockId	lockid;
+
+	hashcode = get_hash_value(dynahash, (void *) &key);
+ 	lockid = dynahash_get_lock(hashcode);
+	LWLockAcquire(lockid, LW_SHARED);
+	e = hash_search_with_hash_value(dynahash, (void *) &key,
+									hashcode, HASH_FIND, NULL);
+	if (e)
+		*val = e->val;
+	LWLockRelease(lockid);
+
+	return e != NULL;
+}
+
+static bool
+dynahash_delete(uint32 key)
+{
+	uint32	hashcode;
+	hentry *e;
+	LWLockId	lockid;
+
+	hashcode = get_hash_value(dynahash, (void *) &key);
+ 	lockid = dynahash_get_lock(hashcode);
+	LWLockAcquire(lockid, LW_EXCLUSIVE);
+	e = hash_search_with_hash_value(dynahash, (void *) &key,
+									hashcode, HASH_REMOVE, NULL);
+	LWLockRelease(lockid);
+
+	return e != NULL;
+}
+
+Datum
+dynahash_insert_test(PG_FUNCTION_ARGS)
+{
+	uint32	i;
+
+	for (i = 0; i < 1000000; ++i)
+	{
+		bool	ok;
+
+		ok = dynahash_insert(i, i * 31);
+		if (!ok)
+			elog(LOG, "insert %u: failed", i);
+		ok = dynahash_insert(i, i * 31);
+		if (ok)
+			elog(LOG, "insert %u: worked twice", i);
+	}
+
+	PG_RETURN_VOID();
+}
+
+Datum
+dynahash_search_test(PG_FUNCTION_ARGS)
+{
+	uint32	i;
+
+	for (i = 0; i < 1000000; ++i)
+	{
+		bool	ok;
+		uint32	val;
+
+		ok = dynahash_search(i, &val);
+		if (!ok)
+			elog(LOG, "search %u: not found", i);
+		else if (val != i* 31)
+			elog(LOG, "search %u: found %u", i, val);
+	}
+
+	PG_RETURN_VOID();
+}
+
+Datum
+dynahash_delete_test(PG_FUNCTION_ARGS)
+{
+	uint32	i;
+
+	for (i = 0; i < 1000000; ++i)
+	{
+		bool	ok;
+
+		ok = dynahash_delete(i);
+		if (!ok)
+			elog(LOG, "delete %u: not found", i);
+		ok = dynahash_delete(i);
+		if (ok)
+			elog(LOG, "delete %u: found twice", i);
+	}
+
+	PG_RETURN_VOID();
+}
+
+Datum
+dynahash_concurrent_test(PG_FUNCTION_ARGS)
+{
+	uint32	i;
+	uint32	val;
+	uint32	seed = MyProcPid << 16;
+
+	for (i = 0; i < 10000; ++i)
+	{
+		bool ok;
+
+		ok = dynahash_insert(seed | i, MyProcPid);
+		if (!ok)
+			elog(LOG, "insert %u: found", i);
+	}
+
+	for (i = 0; i < 10000; ++i)
+	{
+		bool ok;
+
+		ok = dynahash_search(seed | i, &val);
+		if (!ok)
+		{
+			uint64	retry = 1;
+			elog(LOG, "search %u: not found", i);
+			while (!dynahash_search(seed | i, &val))
+				++retry;
+			elog(LOG, "search %u: eventually found it after "
+				UINT64_FORMAT " retries", i, retry);
+		}
+		if (val != MyProcPid)
+			elog(LOG, "search %u: expected %u found %u",
+				 i, (unsigned) MyProcPid, val);
+	}
+
+	for (i = 0; i < 10000; ++i)
+	{
+		bool ok;
+
+		ok = dynahash_delete(seed | i);
+		if (!ok)
+		{
+			uint64	retry = 1;
+			elog(LOG, "delete %u: not found", i);
+			while (!dynahash_delete(seed | i))
+				++retry;
+			elog(LOG, "delete %u: eventually deleted it after "
+				UINT64_FORMAT " retries", i, retry);
+		}
+	}
+
+	PG_RETURN_VOID();
+}
+
+Datum
+dynahash_collision_test(PG_FUNCTION_ARGS)
+{
+	uint32	i;
+	uint32	val;
+
+	/* Don't stack-allocate this. */
+	static bool mine[10000];
+
+	memset(mine, 0, 10000 * sizeof(bool));
+
+	for (i = 0; i < 10000; ++i)
+	{
+		bool ok;
+
+		ok = dynahash_insert(i, MyProcPid);
+		if (ok)
+			mine[i] = true;
+	}
+
+	for (i = 0; i < 10000; ++i)
+	{
+		bool ok;
+
+		if (!mine[i])
+			continue;
+		ok = dynahash_search(i, &val);
+		if (!ok)
+			elog(LOG, "search %u: not found", i);
+		else if (val != MyProcPid)
+			elog(LOG, "search %u: expected %u found %u",
+				 i, (unsigned) MyProcPid, val);
+		ok = dynahash_delete(i);
+		if (!ok)
+			elog(LOG, "delete %u: not found", i);
+	}
+
+	PG_RETURN_VOID();
+}
diff --git a/contrib/hashtest/hashtest.control b/contrib/hashtest/hashtest.control
new file mode 100644
index 0000000000..b8e0f01346
--- /dev/null
+++ b/contrib/hashtest/hashtest.control
@@ -0,0 +1,4 @@
+comment = 'hash testing code'
+default_version = '1.0'
+module_pathname = '$libdir/hashtest'
+relocatable = true
diff --git a/contrib/pg_upgrade/check.c b/contrib/pg_upgrade/check.c
index bbfcab71ce..56db0dd654 100644
--- a/contrib/pg_upgrade/check.c
+++ b/contrib/pg_upgrade/check.c
@@ -14,12 +14,10 @@
 #include "pg_upgrade.h"
 
 
-static void set_locale_and_encoding(ClusterInfo *cluster);
 static void check_new_cluster_is_empty(void);
-static void check_locale_and_encoding(ControlData *oldctrl,
-						  ControlData *newctrl);
-static bool equivalent_locale(const char *loca, const char *locb);
-static bool equivalent_encoding(const char *chara, const char *charb);
+static void check_databases_are_compatible(void);
+static void check_locale_and_encoding(DbInfo *olddb, DbInfo *newdb);
+static bool equivalent_locale(int category, const char *loca, const char *locb);
 static void check_is_install_user(ClusterInfo *cluster);
 static void check_for_prepared_transactions(ClusterInfo *cluster);
 static void check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster);
@@ -81,8 +79,6 @@ check_and_dump_old_cluster(bool live_check)
 	if (!live_check)
 		start_postmaster(&old_cluster, true);
 
-	set_locale_and_encoding(&old_cluster);
-
 	get_pg_database_relfilenode(&old_cluster);
 
 	/* Extract a list of databases and tables from the old cluster */
@@ -127,13 +123,10 @@ check_and_dump_old_cluster(bool live_check)
 void
 check_new_cluster(void)
 {
-	set_locale_and_encoding(&new_cluster);
-
-	check_locale_and_encoding(&old_cluster.controldata, &new_cluster.controldata);
-
 	get_db_and_rel_infos(&new_cluster);
 
 	check_new_cluster_is_empty();
+	check_databases_are_compatible();
 
 	check_loadable_libraries();
 
@@ -279,93 +272,25 @@ check_cluster_compatibility(bool live_check)
 
 
 /*
- * set_locale_and_encoding()
- *
- * query the database to get the template0 locale
- */
-static void
-set_locale_and_encoding(ClusterInfo *cluster)
-{
-	ControlData *ctrl = &cluster->controldata;
-	PGconn	   *conn;
-	PGresult   *res;
-	int			i_encoding;
-	int			cluster_version = cluster->major_version;
-
-	conn = connectToServer(cluster, "template1");
-
-	/* for pg < 80400, we got the values from pg_controldata */
-	if (cluster_version >= 80400)
-	{
-		int			i_datcollate;
-		int			i_datctype;
-
-		res = executeQueryOrDie(conn,
-								"SELECT datcollate, datctype "
-								"FROM	pg_catalog.pg_database "
-								"WHERE	datname = 'template0' ");
-		assert(PQntuples(res) == 1);
-
-		i_datcollate = PQfnumber(res, "datcollate");
-		i_datctype = PQfnumber(res, "datctype");
-
-		if (GET_MAJOR_VERSION(cluster->major_version) < 902)
-		{
-			/*
-			 * Pre-9.2 did not canonicalize the supplied locale names to match
-			 * what the system returns, while 9.2+ does, so convert pre-9.2 to
-			 * match.
-			 */
-			ctrl->lc_collate = get_canonical_locale_name(LC_COLLATE,
-								pg_strdup(PQgetvalue(res, 0, i_datcollate)));
-			ctrl->lc_ctype = get_canonical_locale_name(LC_CTYPE,
-								  pg_strdup(PQgetvalue(res, 0, i_datctype)));
-		}
-		else
-		{
-			ctrl->lc_collate = pg_strdup(PQgetvalue(res, 0, i_datcollate));
-			ctrl->lc_ctype = pg_strdup(PQgetvalue(res, 0, i_datctype));
-		}
-
-		PQclear(res);
-	}
-
-	res = executeQueryOrDie(conn,
-							"SELECT pg_catalog.pg_encoding_to_char(encoding) "
-							"FROM	pg_catalog.pg_database "
-							"WHERE	datname = 'template0' ");
-	assert(PQntuples(res) == 1);
-
-	i_encoding = PQfnumber(res, "pg_encoding_to_char");
-	ctrl->encoding = pg_strdup(PQgetvalue(res, 0, i_encoding));
-
-	PQclear(res);
-
-	PQfinish(conn);
-}
-
-
-/*
  * check_locale_and_encoding()
  *
- * Check that old and new locale and encoding match.  Even though the backend
- * tries to canonicalize stored locale names, the platform often doesn't
- * cooperate, so it's entirely possible that one DB thinks its locale is
- * "en_US.UTF-8" while the other says "en_US.utf8".  Try to be forgiving.
+ * Check that locale and encoding of a database in the old and new clusters
+ * are compatible.
  */
 static void
-check_locale_and_encoding(ControlData *oldctrl,
-						  ControlData *newctrl)
+check_locale_and_encoding(DbInfo *olddb, DbInfo *newdb)
 {
-	if (!equivalent_locale(oldctrl->lc_collate, newctrl->lc_collate))
-		pg_fatal("lc_collate cluster values do not match:  old \"%s\", new \"%s\"\n",
-				 oldctrl->lc_collate, newctrl->lc_collate);
-	if (!equivalent_locale(oldctrl->lc_ctype, newctrl->lc_ctype))
-		pg_fatal("lc_ctype cluster values do not match:  old \"%s\", new \"%s\"\n",
-				 oldctrl->lc_ctype, newctrl->lc_ctype);
-	if (!equivalent_encoding(oldctrl->encoding, newctrl->encoding))
-		pg_fatal("encoding cluster values do not match:  old \"%s\", new \"%s\"\n",
-				 oldctrl->encoding, newctrl->encoding);
+	if (olddb->db_encoding != newdb->db_encoding)
+		pg_fatal("encodings for database \"%s\" do not match:  old \"%s\", new \"%s\"\n",
+				 olddb->db_name,
+				 pg_encoding_to_char(olddb->db_encoding),
+				 pg_encoding_to_char(newdb->db_encoding));
+	if (!equivalent_locale(LC_COLLATE, olddb->db_collate, newdb->db_collate))
+		pg_fatal("lc_collate values for database \"%s\" do not match:  old \"%s\", new \"%s\"\n",
+				 olddb->db_name, olddb->db_collate, newdb->db_collate);
+	if (!equivalent_locale(LC_CTYPE, olddb->db_ctype, newdb->db_ctype))
+		pg_fatal("lc_ctype values for database \"%s\" do not match:  old \"%s\", new \"%s\"\n",
+				 olddb->db_name, olddb->db_ctype, newdb->db_ctype);
 }
 
 /*
@@ -373,61 +298,46 @@ check_locale_and_encoding(ControlData *oldctrl,
  *
  * Best effort locale-name comparison.  Return false if we are not 100% sure
  * the locales are equivalent.
+ *
+ * Note: The encoding parts of the names are ignored. This function is
+ * currently used to compare locale names stored in pg_database, and
+ * pg_database contains a separate encoding field. That's compared directly
+ * in check_locale_and_encoding().
  */
 static bool
-equivalent_locale(const char *loca, const char *locb)
+equivalent_locale(int category, const char *loca, const char *locb)
 {
-	const char *chara = strrchr(loca, '.');
-	const char *charb = strrchr(locb, '.');
-	int			lencmp;
-
-	/* If they don't both contain an encoding part, just do strcasecmp(). */
-	if (!chara || !charb)
-		return (pg_strcasecmp(loca, locb) == 0);
+	const char *chara;
+	const char *charb;
+	char	   *canona;
+	char	   *canonb;
+	int			lena;
+	int			lenb;
 
 	/*
-	 * Compare the encoding parts.  Windows tends to use code page numbers for
-	 * the encoding part, which equivalent_encoding() won't like, so accept if
-	 * the strings are case-insensitive equal; otherwise use
-	 * equivalent_encoding() to compare.
+	 * If the names are equal, the locales are equivalent. Checking this
+	 * first avoids calling setlocale() in the common case that the names
+	 * are equal. That's a good thing, if setlocale() is buggy, for example.
 	 */
-	if (pg_strcasecmp(chara + 1, charb + 1) != 0 &&
-		!equivalent_encoding(chara + 1, charb + 1))
-		return false;
+	if (pg_strcasecmp(loca, locb) == 0)
+		return true;
 
 	/*
-	 * OK, compare the locale identifiers (e.g. en_US part of en_US.utf8).
-	 *
-	 * It's tempting to ignore non-alphanumeric chars here, but for now it's
-	 * not clear that that's necessary; just do case-insensitive comparison.
+	 * Not identical. Canonicalize both names, remove the encoding parts,
+	 * and try again.
 	 */
-	lencmp = chara - loca;
-	if (lencmp != charb - locb)
-		return false;
+	canona = get_canonical_locale_name(category, loca);
+	chara = strrchr(canona, '.');
+	lena = chara ? (chara - canona) : strlen(canona);
 
-	return (pg_strncasecmp(loca, locb, lencmp) == 0);
-}
+	canonb = get_canonical_locale_name(category, locb);
+	charb = strrchr(canonb, '.');
+	lenb = charb ? (charb - canonb) : strlen(canonb);
 
-/*
- * equivalent_encoding()
- *
- * Best effort encoding-name comparison.  Return true only if the encodings
- * are valid server-side encodings and known equivalent.
- *
- * Because the lookup in pg_valid_server_encoding() does case folding and
- * ignores non-alphanumeric characters, this will recognize many popular
- * variant spellings as equivalent, eg "utf8" and "UTF-8" will match.
- */
-static bool
-equivalent_encoding(const char *chara, const char *charb)
-{
-	int			enca = pg_valid_server_encoding(chara);
-	int			encb = pg_valid_server_encoding(charb);
+	if (lena == lenb && pg_strncasecmp(canona, canonb, lena) == 0)
+		return true;
 
-	if (enca < 0 || encb < 0)
-		return false;
-
-	return (enca == encb);
+	return false;
 }
 
 
@@ -450,7 +360,35 @@ check_new_cluster_is_empty(void)
 						 new_cluster.dbarr.dbs[dbnum].db_name);
 		}
 	}
+}
+
+/*
+ * Check that every database that already exists in the new cluster is
+ * compatible with the corresponding database in the old one.
+ */
+static void
+check_databases_are_compatible(void)
+{
+	int			newdbnum;
+	int			olddbnum;
+	DbInfo	   *newdbinfo;
+	DbInfo	   *olddbinfo;
 
+	for (newdbnum = 0; newdbnum < new_cluster.dbarr.ndbs; newdbnum++)
+	{
+		newdbinfo = &new_cluster.dbarr.dbs[newdbnum];
+
+		/* Find the corresponding database in the old cluster */
+		for (olddbnum = 0; olddbnum < old_cluster.dbarr.ndbs; olddbnum++)
+		{
+			olddbinfo = &old_cluster.dbarr.dbs[olddbnum];
+			if (strcmp(newdbinfo->db_name, olddbinfo->db_name) == 0)
+			{
+				check_locale_and_encoding(olddbinfo, newdbinfo);
+				break;
+			}
+		}
+	}
 }
 
 
@@ -470,7 +408,8 @@ create_script_for_cluster_analyze(char **analyze_script_file_name)
 	if (os_info.user_specified)
 		user_specification = psprintf("-U \"%s\" ", os_info.user);
 
-	*analyze_script_file_name = psprintf("analyze_new_cluster.%s", SCRIPT_EXT);
+	*analyze_script_file_name = psprintf("%sanalyze_new_cluster.%s",
+										 SCRIPT_PREFIX, SCRIPT_EXT);
 
 	if ((script = fopen_priv(*analyze_script_file_name, "w")) == NULL)
 		pg_fatal("Could not open file \"%s\": %s\n",
@@ -551,7 +490,8 @@ create_script_for_old_cluster_deletion(char **deletion_script_file_name)
 	int			tblnum;
 	char		old_cluster_pgdata[MAXPGPATH];
 
-	*deletion_script_file_name = psprintf("delete_old_cluster.%s", SCRIPT_EXT);
+	*deletion_script_file_name = psprintf("%sdelete_old_cluster.%s",
+										  SCRIPT_PREFIX, SCRIPT_EXT);
 
 	/*
 	 * Some users (oddly) create tablespaces inside the cluster data
diff --git a/contrib/pg_upgrade/controldata.c b/contrib/pg_upgrade/controldata.c
index 8379ebd71b..4e9d5948fa 100644
--- a/contrib/pg_upgrade/controldata.c
+++ b/contrib/pg_upgrade/controldata.c
@@ -122,10 +122,6 @@ get_control_data(ClusterInfo *cluster, bool live_check)
 		pg_fatal("Could not get control data using %s: %s\n",
 				 cmd, getErrorText(errno));
 
-	/* Only pre-8.4 has these so if they are not set below we will check later */
-	cluster->controldata.lc_collate = NULL;
-	cluster->controldata.lc_ctype = NULL;
-
 	/* Only in <= 9.2 */
 	if (GET_MAJOR_VERSION(cluster->major_version) <= 902)
 	{
@@ -404,36 +400,6 @@ get_control_data(ClusterInfo *cluster, bool live_check)
 			cluster->controldata.data_checksum_version = str2uint(p);
 			got_data_checksum_version = true;
 		}
-		/* In pre-8.4 only */
-		else if ((p = strstr(bufin, "LC_COLLATE:")) != NULL)
-		{
-			p = strchr(p, ':');
-
-			if (p == NULL || strlen(p) <= 1)
-				pg_fatal("%d: controldata retrieval problem\n", __LINE__);
-
-			p++;				/* remove ':' char */
-			/* skip leading spaces and remove trailing newline */
-			p += strspn(p, " ");
-			if (strlen(p) > 0 && *(p + strlen(p) - 1) == '\n')
-				*(p + strlen(p) - 1) = '\0';
-			cluster->controldata.lc_collate = pg_strdup(p);
-		}
-		/* In pre-8.4 only */
-		else if ((p = strstr(bufin, "LC_CTYPE:")) != NULL)
-		{
-			p = strchr(p, ':');
-
-			if (p == NULL || strlen(p) <= 1)
-				pg_fatal("%d: controldata retrieval problem\n", __LINE__);
-
-			p++;				/* remove ':' char */
-			/* skip leading spaces and remove trailing newline */
-			p += strspn(p, " ");
-			if (strlen(p) > 0 && *(p + strlen(p) - 1) == '\n')
-				*(p + strlen(p) - 1) = '\0';
-			cluster->controldata.lc_ctype = pg_strdup(p);
-		}
 	}
 
 	if (output)
diff --git a/contrib/pg_upgrade/info.c b/contrib/pg_upgrade/info.c
index a1773aa8e5..c347dfc493 100644
--- a/contrib/pg_upgrade/info.c
+++ b/contrib/pg_upgrade/info.c
@@ -239,11 +239,15 @@ get_db_infos(ClusterInfo *cluster)
 	DbInfo	   *dbinfos;
 	int			i_datname,
 				i_oid,
+				i_encoding,
+				i_datcollate,
+				i_datctype,
 				i_spclocation;
 	char		query[QUERY_ALLOC];
 
 	snprintf(query, sizeof(query),
-			 "SELECT d.oid, d.datname, %s "
+			 "SELECT d.oid, d.datname, d.encoding, d.datcollate, d.datctype, "
+			 "%s AS spclocation "
 			 "FROM pg_catalog.pg_database d "
 			 " LEFT OUTER JOIN pg_catalog.pg_tablespace t "
 			 " ON d.dattablespace = t.oid "
@@ -252,12 +256,15 @@ get_db_infos(ClusterInfo *cluster)
 			 "ORDER BY 2",
 	/* 9.2 removed the spclocation column */
 			 (GET_MAJOR_VERSION(cluster->major_version) <= 901) ?
-			 "t.spclocation" : "pg_catalog.pg_tablespace_location(t.oid) AS spclocation");
+			 "t.spclocation" : "pg_catalog.pg_tablespace_location(t.oid)");
 
 	res = executeQueryOrDie(conn, "%s", query);
 
 	i_oid = PQfnumber(res, "oid");
 	i_datname = PQfnumber(res, "datname");
+	i_encoding = PQfnumber(res, "encoding");
+	i_datcollate = PQfnumber(res, "datcollate");
+	i_datctype = PQfnumber(res, "datctype");
 	i_spclocation = PQfnumber(res, "spclocation");
 
 	ntups = PQntuples(res);
@@ -267,6 +274,9 @@ get_db_infos(ClusterInfo *cluster)
 	{
 		dbinfos[tupnum].db_oid = atooid(PQgetvalue(res, tupnum, i_oid));
 		dbinfos[tupnum].db_name = pg_strdup(PQgetvalue(res, tupnum, i_datname));
+		dbinfos[tupnum].db_encoding = atoi(PQgetvalue(res, tupnum, i_encoding));
+		dbinfos[tupnum].db_collate = pg_strdup(PQgetvalue(res, tupnum, i_datcollate));
+		dbinfos[tupnum].db_ctype = pg_strdup(PQgetvalue(res, tupnum, i_datctype));
 		snprintf(dbinfos[tupnum].db_tablespace, sizeof(dbinfos[tupnum].db_tablespace), "%s",
 				 PQgetvalue(res, tupnum, i_spclocation));
 	}
diff --git a/contrib/pg_upgrade/pg_upgrade.h b/contrib/pg_upgrade/pg_upgrade.h
index 56a7505a96..c3b81e4a08 100644
--- a/contrib/pg_upgrade/pg_upgrade.h
+++ b/contrib/pg_upgrade/pg_upgrade.h
@@ -76,6 +76,7 @@ extern char *output_files[];
 #define PATH_SEPARATOR		'/'
 #define RM_CMD				"rm -f"
 #define RMDIR_CMD			"rm -rf"
+#define SCRIPT_PREFIX		"./"
 #define SCRIPT_EXT			"sh"
 #define ECHO_QUOTE	"'"
 #define ECHO_BLANK	""
@@ -86,6 +87,7 @@ extern char *output_files[];
 #define PATH_SEPARATOR		'\\'
 #define RM_CMD				"DEL /q"
 #define RMDIR_CMD			"RMDIR /s/q"
+#define SCRIPT_PREFIX		""
 #define SCRIPT_EXT			"bat"
 #define EXE_EXT				".exe"
 #define ECHO_QUOTE	""
@@ -180,6 +182,9 @@ typedef struct
 	char	   *db_name;		/* database name */
 	char		db_tablespace[MAXPGPATH];		/* database default tablespace
 												 * path */
+	char	   *db_collate;
+	char	   *db_ctype;
+	int			db_encoding;
 	RelInfoArr	rel_arr;		/* array of all user relinfos */
 } DbInfo;
 
@@ -218,9 +223,6 @@ typedef struct
 	bool		date_is_int;
 	bool		float8_pass_by_value;
 	bool		data_checksum_version;
-	char	   *lc_collate;
-	char	   *lc_ctype;
-	char	   *encoding;
 } ControlData;
 
 /*