From bf00bbb0c4940b80b46b7e5b379cd64184f2262f Mon Sep 17 00:00:00 2001
From: Marc G. Fournier
Date: Fri, 24 Jul 1998 03:32:46 +0000
Subject: I really hope that I haven't missed anything in this one...

From: t-ishii@sra.co.jp

Attached are patches to enhance the multi-byte support.  (patches are
against 7/18 snapshot)

* determine encoding at initdb/createdb rather than compile time

Now initdb/createdb has an option to specify the encoding. Also, I
modified the syntax of CREATE DATABASE to accept encoding option. See
README.mb for more details.

For this purpose I have added new column "encoding" to pg_database.
Also pg_attribute and pg_class are changed to catch up the
modification to pg_database.  Actually I haved added pg_database_mb.h,
pg_attribute_mb.h and pg_class_mb.h. These are used only when MB is
enabled. The reason having separate files is I couldn't find a way to
use ifdef or whatever in those files. I have to admit it looks
ugly. No way.

* support for PGCLIENTENCODING when issuing COPY command

commands/copy.c modified.

* support for SQL92 syntax "SET NAMES"

See gram.y.

* support for LATIN2-5
* add UNICODE regression test case
* new test suite for MB

New directory test/mb added.

* clean up source files

Basic idea is to have MB's own subdirectory for easier maintenance.
These are include/mb and backend/utils/mb.
---
 src/include/catalog/pg_proc.h     |   5 +-
 src/include/commands/dbcommands.h |   6 +-
 src/include/mb/pg_wchar.h         | 122 ++++++++++++++++++++++++++++++++++++++
 src/include/miscadmin.h           |  11 +++-
 src/include/nodes/parsenodes.h    |   6 +-
 src/include/regex/pg_wchar.h      |  82 -------------------------
 src/include/regex/regex.h         |   2 +-
 7 files changed, 147 insertions(+), 87 deletions(-)
 create mode 100644 src/include/mb/pg_wchar.h
 delete mode 100644 src/include/regex/pg_wchar.h

(limited to 'src/include')

diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index dfbc7568249..bf825737511 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -6,7 +6,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_proc.h,v 1.62 1998/07/18 18:34:17 momjian Exp $
+ * $Id: pg_proc.h,v 1.63 1998/07/24 03:32:16 scrappy Exp $
  *
  * NOTES
  *	  The script catalog/genbki.sh reads this file and generates .bki
@@ -2077,12 +2077,15 @@ DESCR("trim both ends of string");
 DATA(insert OID =  885 (  btrim		   PGUID 14 f t f 1 f 25 "25" 100 0 0 100  "select btrim($1, \' \')" - ));
 DESCR("trim both ends of string");
 
+
 /* SEQUENCEs nextval & currval functions */
 DATA(insert OID =  1317 (  nextval	   PGUID 11 f t f 1 f 23 "25" 100 0 0 100  foo bar ));
 DESCR("sequence next value");
 DATA(insert OID =  1319 (  currval	   PGUID 11 f t f 1 f 23 "25" 100 0 0 100  foo bar ));
 DESCR("sequence current value");
 
+/* for multi-byte support */
+DATA(insert OID = 1039 (  getdatabaseencoding	   PGUID 11 f t f 0 f 19 "0" 100 0 0 100  foo bar ));
 
 /*
  * prototypes for functions pg_proc.c
diff --git a/src/include/commands/dbcommands.h b/src/include/commands/dbcommands.h
index 06a291070e9..8675afa5c71 100644
--- a/src/include/commands/dbcommands.h
+++ b/src/include/commands/dbcommands.h
@@ -6,7 +6,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: dbcommands.h,v 1.2 1998/07/09 03:28:56 scrappy Exp $
+ * $Id: dbcommands.h,v 1.3 1998/07/24 03:32:19 scrappy Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -19,7 +19,11 @@
  */
 #define SIGKILLDAEMON1	SIGTERM
 
+#ifdef MB
+extern void createdb(char *dbname, char *dbpath, int encoding);
+#else
 extern void createdb(char *dbname, char *dbpath);
+#endif
 extern void destroydb(char *dbname);
 
 #endif							/* DBCOMMANDS_H */
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
new file mode 100644
index 00000000000..8453f38558d
--- /dev/null
+++ b/src/include/mb/pg_wchar.h
@@ -0,0 +1,122 @@
+/* $Id: pg_wchar.h,v 1.1 1998/07/24 03:32:24 scrappy Exp $ */
+
+#ifndef PG_WCHAR_H
+#define PG_WCHAR_H
+
+#include <sys/types.h>
+#include "postgres.h"
+#include "miscadmin.h"	/* for getdatabaseencoding() */
+
+#define EUC_JP 0	/* EUC for Japanese */
+#define EUC_CN 1	/* EUC for Chinese */
+#define EUC_KR 2	/* EUC for Korean */
+#define EUC_TW 3	/* EUC for Taiwan */
+#define UNICODE 4	/* Unicode UTF-8 */
+#define MULE_INTERNAL 5	/* Mule internal code */
+#define LATIN1 6	/* ISO-8859 Latin 1 */
+#define LATIN2 7	/* ISO-8859 Latin 2 */
+#define LATIN3 8	/* ISO-8859 Latin 3 */
+#define LATIN4 9	/* ISO-8859 Latin 4 */
+#define LATIN5 10	/* ISO-8859 Latin 5 */
+#define LATIN6 11	/* ISO-8859 Latin 6 */
+#define LATIN7 12	/* ISO-8859 Latin 7 */
+#define LATIN8 13	/* ISO-8859 Latin 8 */
+#define LATIN9 14	/* ISO-8859 Latin 9 */
+/* followings are for client encoding only */
+#define SJIS 32		/* Shift JIS */
+
+#ifdef MB
+typedef unsigned int pg_wchar;
+#else
+#define pg_wchar char
+#endif
+
+/*
+ * various definitions for EUC
+ */
+#define SS2 0x8e	/* single shift 2 */
+#define SS3 0x8f	/* single shift 3 */
+
+/*
+ * various definitions for mule internal code
+ */
+#define IS_LC1(c)	((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8f)
+#define IS_LCPRV1(c)	((unsigned char)(c) == 0x9a || (unsigned char)(c) == 0x9b)
+#define IS_LC2(c)	((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99)
+#define IS_LCPRV2(c)	((unsigned char)(c) == 0x9c || (unsigned char)(c) == 0x9d)
+
+/*
+ * leading characters
+ */
+#define	LC_ISO8859_1	0x81	/* ISO8859 Latin 1 */
+#define	LC_ISO8859_2	0x82	/* ISO8859 Latin 2 */
+#define	LC_ISO8859_3	0x83	/* ISO8859 Latin 3 */
+#define	LC_ISO8859_4	0x84	/* ISO8859 Latin 4 */
+#define	LC_ISO8859_5	0x8d	/* ISO8859 Latin 5 */
+#define	LC_JISX0201K	0x89	/* Japanese 1 byte kana */
+#define	LC_JISX0201R	0x90	/* Japanese 1 byte Roman */
+#define	LC_GB2312_80	0x91	/* Chinese */
+#define	LC_JISX0208	0x92	/* Japanese Kanji */
+#define	LC_KS5601	0x93	/* Korean */
+#define	LC_JISX0212	0x94	/* Japanese Kanji (JISX0212) */
+#define	LC_CNS11643_1	0x95	/* CNS 11643-1992 Plane 1 */
+#define	LC_CNS11643_2	0x96	/* CNS 11643-1992 Plane 2 */
+#define	LC_CNS11643_3	0xf6	/* CNS 11643-1992 Plane 3 */
+#define	LC_CNS11643_4	0xf7	/* CNS 11643-1992 Plane 4 */
+#define	LC_CNS11643_5	0xf8	/* CNS 11643-1992 Plane 5 */
+#define	LC_CNS11643_6	0xf9	/* CNS 11643-1992 Plane 6 */
+#define	LC_CNS11643_7	0xfa	/* CNS 11643-1992 Plane 7 */
+
+#ifdef MB
+typedef struct {
+  int encoding;		/* encoding symbol value */
+  char *name;		/* encoding name */
+  int is_client_only;	/* 0: server/client bothg supported
+			   1: client only */
+  void (*to_mic)();	/* client encoding to MIC */
+  void (*from_mic)();	/* MIC to client encoding */
+} pg_encoding_conv_tbl;
+
+extern pg_encoding_conv_tbl pg_conv_tbl[];
+
+typedef struct {
+  void	(*mb2wchar_with_len)();	/* convert a multi-byte string to a wchar */
+  int	(*mblen)();		/* returns the length of a multi-byte word */
+} pg_wchar_tbl;
+
+extern pg_wchar_tbl pg_wchar_table[];
+
+extern void pg_mb2wchar(const unsigned char *, pg_wchar *);
+extern void pg_mb2wchar_with_len(const unsigned char *, pg_wchar *, int);
+extern int pg_char_and_wchar_strcmp(const char *, const pg_wchar *);
+extern int pg_wchar_strncmp(const pg_wchar *, const pg_wchar *, size_t);
+extern int pg_char_and_wchar_strncmp(const char *, const pg_wchar *, size_t);
+extern size_t pg_wchar_strlen(const pg_wchar *);
+extern int pg_mblen(const unsigned char *);
+extern int pg_encoding_mblen(int, const unsigned char *);
+extern int pg_mule_mblen(const unsigned char *);
+extern int pg_mic_mblen(const unsigned char *);
+extern int pg_mbstrlen(const unsigned char *);
+extern int pg_mbstrlen_with_len(const unsigned char *, int);
+extern pg_encoding_conv_tbl *pg_get_encent_by_encoding(int);
+extern bool show_client_encoding(void);
+extern bool reset_client_encoding(void);
+extern bool parse_client_encoding(const char *);
+extern bool show_server_encoding(void);
+extern bool reset_server_encoding(void);
+extern bool parse_server_encoding(const char *);
+extern int pg_set_client_encoding(int);
+extern int pg_get_client_encoding(void);
+extern unsigned char *pg_client_to_server(unsigned char *, int);
+extern unsigned char *pg_server_to_client(unsigned char *, int);
+extern int pg_valid_client_encoding(const char *);
+extern const char *pg_encoding_to_char(int);
+extern int pg_char_to_encoding(const char *);
+extern int GetDatabaseEncoding(void);
+extern void SetDatabaseEncoding(int);
+extern void SetTemplateEncoding(int);
+extern int GetTemplateEncoding(void);
+
+#endif	/* MB */
+
+#endif	/* PG_WCHAR_H */
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index a6f22432994..74c8bf71f19 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -11,7 +11,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: miscadmin.h,v 1.27 1998/07/09 03:28:55 scrappy Exp $
+ * $Id: miscadmin.h,v 1.28 1998/07/24 03:32:13 scrappy Exp $
  *
  * NOTES
  *	  some of the information in this file will be moved to
@@ -116,13 +116,22 @@ extern char *DatabaseName;
 extern char *DatabasePath;
 
 /* in utils/misc/database.c */
+#ifdef MB
+extern void GetRawDatabaseInfo(char *name, Oid *owner, Oid *db_id, char *path, int *encoding);
+#else
 extern void GetRawDatabaseInfo(char *name, Oid *owner, Oid *db_id, char *path);
+#endif
 extern int	GetDatabaseInfo(char *name, Oid *owner, char *path);
 extern char *ExpandDatabasePath(char *path);
 
 /* now in utils/init/miscinit.c */
 extern void SetDatabaseName(char *name);
 extern void SetDatabasePath(char *path);
+/* even if MB is not enabled, this function is neccesary
+ * since pg_proc.h does have.
+ */
+extern const char *getdatabaseencoding(void);
+
 extern char *getpgusername(void);
 extern void SetPgUserName(void);
 extern Oid	GetUserId(void);
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 95a014409d9..c2c2c0fc7bb 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -6,7 +6,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: parsenodes.h,v 1.50 1998/07/12 21:29:31 momjian Exp $
+ * $Id: parsenodes.h,v 1.51 1998/07/24 03:32:26 scrappy Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -480,6 +480,10 @@ typedef struct CreatedbStmt
 	NodeTag		type;
 	char	   *dbname;			/* database to create */
 	char	   *dbpath;			/* location of database */
+#ifdef MB
+	int	   encoding;			/* default encoding
+						   (see regex/pg_wchar.h) */
+#endif
 } CreatedbStmt;
 
 /* ----------------------
diff --git a/src/include/regex/pg_wchar.h b/src/include/regex/pg_wchar.h
deleted file mode 100644
index a37c253e1f2..00000000000
--- a/src/include/regex/pg_wchar.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/* $Id: pg_wchar.h,v 1.4 1998/07/18 18:34:24 momjian Exp $ */
-
-#ifndef PG_WCHAR_H
-#define PG_WCHAR_H
-
-#include <sys/types.h>
-
-#define EUC_JP 0	/* EUC for Japanese */
-#define EUC_CN 1	/* EUC for Chinese */
-#define EUC_KR 2	/* EUC for Korean */
-#define EUC_TW 3	/* EUC for Taiwan */
-#define UNICODE 4	/* Unicode UTF-8 */
-#define MULE_INTERNAL 5	/* Mule internal code */
-#define LATIN1 6	/* ISO-8859 Latin 1 */
-#define LATIN2 7	/* ISO-8859 Latin 2 */
-#define LATIN3 8	/* ISO-8859 Latin 3 */
-#define LATIN4 9	/* ISO-8859 Latin 4 */
-#define LATIN5 10	/* ISO-8859 Latin 5 */
-/* followings are for client encoding only */
-#define SJIS 16		/* Shift JIS */
-
-#ifdef MULTIBYTE
-# if LATIN1 <= MULTIBYTE && MULTIBYTE <= LATIN5
-typedef unsigned char pg_wchar;
-# else
-typedef unsigned int pg_wchar;
-# endif
-#else
-#define pg_wchar char
-#endif
-
-/*
- * various definitions for EUC
- */
-#define SS2 0x8e	/* single shift 2 */
-#define SS3 0x8f	/* single shift 3 */
-
-/*
- * various definitions for mule internal code
- */
-#define IS_LC1(c)	((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8f)
-#define IS_LCPRV1(c)	((unsigned char)(c) == 0x9a || (unsigned char)(c) == 0x9b)
-#define IS_LC2(c)	((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99)
-#define IS_LCPRV2(c)	((unsigned char)(c) == 0x9c || (unsigned char)(c) == 0x9d)
-
-/*
- * leading characters
- */
-#define	LC_ISO8859_1	0x81	/* ISO8859 Latin 1 */
-#define	LC_ISO8859_2	0x82	/* ISO8859 Latin 2 */
-#define	LC_ISO8859_3	0x83	/* ISO8859 Latin 3 */
-#define	LC_ISO8859_4	0x84	/* ISO8859 Latin 4 */
-#define	LC_ISO8859_5	0x8d	/* ISO8859 Latin 5 */
-#define	LC_JISX0201K	0x89	/* Japanese 1 byte kana */
-#define	LC_JISX0201R	0x90	/* Japanese 1 byte Roman */
-#define	LC_GB2312_80	0x91	/* Chinese */
-#define	LC_JISX0208	0x92	/* Japanese Kanji */
-#define	LC_KS5601	0x93	/* Korean */
-#define	LC_JISX0212	0x94	/* Japanese Kanji (JISX0212) */
-#define	LC_CNS11643_1	0x95	/* CNS 11643-1992 Plane 1 */
-#define	LC_CNS11643_2	0x96	/* CNS 11643-1992 Plane 2 */
-#define	LC_CNS11643_3	0xf6	/* CNS 11643-1992 Plane 3 */
-#define	LC_CNS11643_4	0xf7	/* CNS 11643-1992 Plane 4 */
-#define	LC_CNS11643_5	0xf8	/* CNS 11643-1992 Plane 5 */
-#define	LC_CNS11643_6	0xf9	/* CNS 11643-1992 Plane 6 */
-#define	LC_CNS11643_7	0xfa	/* CNS 11643-1992 Plane 7 */
-
-#ifdef MULTIBYTE
-extern void pg_mb2wchar(const unsigned char *, pg_wchar *);
-extern void pg_mb2wchar_with_len(const unsigned char *, pg_wchar *, int);
-extern int pg_char_and_wchar_strcmp(const char *, const pg_wchar *);
-extern int pg_wchar_strncmp(const pg_wchar *, const pg_wchar *, size_t);
-extern int pg_char_and_wchar_strncmp(const char *, const pg_wchar *, size_t);
-extern size_t pg_wchar_strlen(const pg_wchar *);
-extern int pg_mblen(const unsigned char *);
-extern int pg_encoding_mblen(int, const unsigned char *);
-extern int pg_mic_mblen(const unsigned char *);
-extern int pg_mbstrlen(const unsigned char *);
-extern int pg_mbstrlen_with_len(const unsigned char *, int);
-#endif
-
-#endif
diff --git a/src/include/regex/regex.h b/src/include/regex/regex.h
index 672ed2a382f..c4a306e4c23 100644
--- a/src/include/regex/regex.h
+++ b/src/include/regex/regex.h
@@ -41,7 +41,7 @@
 #define _REGEX_H_
 
 #include <sys/types.h>
-#include <regex/pg_wchar.h>
+#include <mb/pg_wchar.h>
 
 /* types */
 typedef off_t regoff_t;
-- 
cgit v1.2.3