From bf00bbb0c4940b80b46b7e5b379cd64184f2262f Mon Sep 17 00:00:00 2001 From: Marc G. Fournier Date: Fri, 24 Jul 1998 03:32:46 +0000 Subject: I really hope that I haven't missed anything in this one... From: t-ishii@sra.co.jp Attached are patches to enhance the multi-byte support. (patches are against 7/18 snapshot) * determine encoding at initdb/createdb rather than compile time Now initdb/createdb has an option to specify the encoding. Also, I modified the syntax of CREATE DATABASE to accept encoding option. See README.mb for more details. For this purpose I have added new column "encoding" to pg_database. Also pg_attribute and pg_class are changed to catch up the modification to pg_database. Actually I haved added pg_database_mb.h, pg_attribute_mb.h and pg_class_mb.h. These are used only when MB is enabled. The reason having separate files is I couldn't find a way to use ifdef or whatever in those files. I have to admit it looks ugly. No way. * support for PGCLIENTENCODING when issuing COPY command commands/copy.c modified. * support for SQL92 syntax "SET NAMES" See gram.y. * support for LATIN2-5 * add UNICODE regression test case * new test suite for MB New directory test/mb added. * clean up source files Basic idea is to have MB's own subdirectory for easier maintenance. These are include/mb and backend/utils/mb. --- src/include/catalog/pg_proc.h | 5 +- src/include/commands/dbcommands.h | 6 +- src/include/mb/pg_wchar.h | 122 ++++++++++++++++++++++++++++++++++++++ src/include/miscadmin.h | 11 +++- src/include/nodes/parsenodes.h | 6 +- src/include/regex/pg_wchar.h | 82 ------------------------- src/include/regex/regex.h | 2 +- 7 files changed, 147 insertions(+), 87 deletions(-) create mode 100644 src/include/mb/pg_wchar.h delete mode 100644 src/include/regex/pg_wchar.h (limited to 'src/include') diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index dfbc7568249..bf825737511 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: pg_proc.h,v 1.62 1998/07/18 18:34:17 momjian Exp $ + * $Id: pg_proc.h,v 1.63 1998/07/24 03:32:16 scrappy Exp $ * * NOTES * The script catalog/genbki.sh reads this file and generates .bki @@ -2077,12 +2077,15 @@ DESCR("trim both ends of string"); DATA(insert OID = 885 ( btrim PGUID 14 f t f 1 f 25 "25" 100 0 0 100 "select btrim($1, \' \')" - )); DESCR("trim both ends of string"); + /* SEQUENCEs nextval & currval functions */ DATA(insert OID = 1317 ( nextval PGUID 11 f t f 1 f 23 "25" 100 0 0 100 foo bar )); DESCR("sequence next value"); DATA(insert OID = 1319 ( currval PGUID 11 f t f 1 f 23 "25" 100 0 0 100 foo bar )); DESCR("sequence current value"); +/* for multi-byte support */ +DATA(insert OID = 1039 ( getdatabaseencoding PGUID 11 f t f 0 f 19 "0" 100 0 0 100 foo bar )); /* * prototypes for functions pg_proc.c diff --git a/src/include/commands/dbcommands.h b/src/include/commands/dbcommands.h index 06a291070e9..8675afa5c71 100644 --- a/src/include/commands/dbcommands.h +++ b/src/include/commands/dbcommands.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: dbcommands.h,v 1.2 1998/07/09 03:28:56 scrappy Exp $ + * $Id: dbcommands.h,v 1.3 1998/07/24 03:32:19 scrappy Exp $ * *------------------------------------------------------------------------- */ @@ -19,7 +19,11 @@ */ #define SIGKILLDAEMON1 SIGTERM +#ifdef MB +extern void createdb(char *dbname, char *dbpath, int encoding); +#else extern void createdb(char *dbname, char *dbpath); +#endif extern void destroydb(char *dbname); #endif /* DBCOMMANDS_H */ diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h new file mode 100644 index 00000000000..8453f38558d --- /dev/null +++ b/src/include/mb/pg_wchar.h @@ -0,0 +1,122 @@ +/* $Id: pg_wchar.h,v 1.1 1998/07/24 03:32:24 scrappy Exp $ */ + +#ifndef PG_WCHAR_H +#define PG_WCHAR_H + +#include +#include "postgres.h" +#include "miscadmin.h" /* for getdatabaseencoding() */ + +#define EUC_JP 0 /* EUC for Japanese */ +#define EUC_CN 1 /* EUC for Chinese */ +#define EUC_KR 2 /* EUC for Korean */ +#define EUC_TW 3 /* EUC for Taiwan */ +#define UNICODE 4 /* Unicode UTF-8 */ +#define MULE_INTERNAL 5 /* Mule internal code */ +#define LATIN1 6 /* ISO-8859 Latin 1 */ +#define LATIN2 7 /* ISO-8859 Latin 2 */ +#define LATIN3 8 /* ISO-8859 Latin 3 */ +#define LATIN4 9 /* ISO-8859 Latin 4 */ +#define LATIN5 10 /* ISO-8859 Latin 5 */ +#define LATIN6 11 /* ISO-8859 Latin 6 */ +#define LATIN7 12 /* ISO-8859 Latin 7 */ +#define LATIN8 13 /* ISO-8859 Latin 8 */ +#define LATIN9 14 /* ISO-8859 Latin 9 */ +/* followings are for client encoding only */ +#define SJIS 32 /* Shift JIS */ + +#ifdef MB +typedef unsigned int pg_wchar; +#else +#define pg_wchar char +#endif + +/* + * various definitions for EUC + */ +#define SS2 0x8e /* single shift 2 */ +#define SS3 0x8f /* single shift 3 */ + +/* + * various definitions for mule internal code + */ +#define IS_LC1(c) ((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8f) +#define IS_LCPRV1(c) ((unsigned char)(c) == 0x9a || (unsigned char)(c) == 0x9b) +#define IS_LC2(c) ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99) +#define IS_LCPRV2(c) ((unsigned char)(c) == 0x9c || (unsigned char)(c) == 0x9d) + +/* + * leading characters + */ +#define LC_ISO8859_1 0x81 /* ISO8859 Latin 1 */ +#define LC_ISO8859_2 0x82 /* ISO8859 Latin 2 */ +#define LC_ISO8859_3 0x83 /* ISO8859 Latin 3 */ +#define LC_ISO8859_4 0x84 /* ISO8859 Latin 4 */ +#define LC_ISO8859_5 0x8d /* ISO8859 Latin 5 */ +#define LC_JISX0201K 0x89 /* Japanese 1 byte kana */ +#define LC_JISX0201R 0x90 /* Japanese 1 byte Roman */ +#define LC_GB2312_80 0x91 /* Chinese */ +#define LC_JISX0208 0x92 /* Japanese Kanji */ +#define LC_KS5601 0x93 /* Korean */ +#define LC_JISX0212 0x94 /* Japanese Kanji (JISX0212) */ +#define LC_CNS11643_1 0x95 /* CNS 11643-1992 Plane 1 */ +#define LC_CNS11643_2 0x96 /* CNS 11643-1992 Plane 2 */ +#define LC_CNS11643_3 0xf6 /* CNS 11643-1992 Plane 3 */ +#define LC_CNS11643_4 0xf7 /* CNS 11643-1992 Plane 4 */ +#define LC_CNS11643_5 0xf8 /* CNS 11643-1992 Plane 5 */ +#define LC_CNS11643_6 0xf9 /* CNS 11643-1992 Plane 6 */ +#define LC_CNS11643_7 0xfa /* CNS 11643-1992 Plane 7 */ + +#ifdef MB +typedef struct { + int encoding; /* encoding symbol value */ + char *name; /* encoding name */ + int is_client_only; /* 0: server/client bothg supported + 1: client only */ + void (*to_mic)(); /* client encoding to MIC */ + void (*from_mic)(); /* MIC to client encoding */ +} pg_encoding_conv_tbl; + +extern pg_encoding_conv_tbl pg_conv_tbl[]; + +typedef struct { + void (*mb2wchar_with_len)(); /* convert a multi-byte string to a wchar */ + int (*mblen)(); /* returns the length of a multi-byte word */ +} pg_wchar_tbl; + +extern pg_wchar_tbl pg_wchar_table[]; + +extern void pg_mb2wchar(const unsigned char *, pg_wchar *); +extern void pg_mb2wchar_with_len(const unsigned char *, pg_wchar *, int); +extern int pg_char_and_wchar_strcmp(const char *, const pg_wchar *); +extern int pg_wchar_strncmp(const pg_wchar *, const pg_wchar *, size_t); +extern int pg_char_and_wchar_strncmp(const char *, const pg_wchar *, size_t); +extern size_t pg_wchar_strlen(const pg_wchar *); +extern int pg_mblen(const unsigned char *); +extern int pg_encoding_mblen(int, const unsigned char *); +extern int pg_mule_mblen(const unsigned char *); +extern int pg_mic_mblen(const unsigned char *); +extern int pg_mbstrlen(const unsigned char *); +extern int pg_mbstrlen_with_len(const unsigned char *, int); +extern pg_encoding_conv_tbl *pg_get_encent_by_encoding(int); +extern bool show_client_encoding(void); +extern bool reset_client_encoding(void); +extern bool parse_client_encoding(const char *); +extern bool show_server_encoding(void); +extern bool reset_server_encoding(void); +extern bool parse_server_encoding(const char *); +extern int pg_set_client_encoding(int); +extern int pg_get_client_encoding(void); +extern unsigned char *pg_client_to_server(unsigned char *, int); +extern unsigned char *pg_server_to_client(unsigned char *, int); +extern int pg_valid_client_encoding(const char *); +extern const char *pg_encoding_to_char(int); +extern int pg_char_to_encoding(const char *); +extern int GetDatabaseEncoding(void); +extern void SetDatabaseEncoding(int); +extern void SetTemplateEncoding(int); +extern int GetTemplateEncoding(void); + +#endif /* MB */ + +#endif /* PG_WCHAR_H */ diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index a6f22432994..74c8bf71f19 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -11,7 +11,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: miscadmin.h,v 1.27 1998/07/09 03:28:55 scrappy Exp $ + * $Id: miscadmin.h,v 1.28 1998/07/24 03:32:13 scrappy Exp $ * * NOTES * some of the information in this file will be moved to @@ -116,13 +116,22 @@ extern char *DatabaseName; extern char *DatabasePath; /* in utils/misc/database.c */ +#ifdef MB +extern void GetRawDatabaseInfo(char *name, Oid *owner, Oid *db_id, char *path, int *encoding); +#else extern void GetRawDatabaseInfo(char *name, Oid *owner, Oid *db_id, char *path); +#endif extern int GetDatabaseInfo(char *name, Oid *owner, char *path); extern char *ExpandDatabasePath(char *path); /* now in utils/init/miscinit.c */ extern void SetDatabaseName(char *name); extern void SetDatabasePath(char *path); +/* even if MB is not enabled, this function is neccesary + * since pg_proc.h does have. + */ +extern const char *getdatabaseencoding(void); + extern char *getpgusername(void); extern void SetPgUserName(void); extern Oid GetUserId(void); diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 95a014409d9..c2c2c0fc7bb 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: parsenodes.h,v 1.50 1998/07/12 21:29:31 momjian Exp $ + * $Id: parsenodes.h,v 1.51 1998/07/24 03:32:26 scrappy Exp $ * *------------------------------------------------------------------------- */ @@ -480,6 +480,10 @@ typedef struct CreatedbStmt NodeTag type; char *dbname; /* database to create */ char *dbpath; /* location of database */ +#ifdef MB + int encoding; /* default encoding + (see regex/pg_wchar.h) */ +#endif } CreatedbStmt; /* ---------------------- diff --git a/src/include/regex/pg_wchar.h b/src/include/regex/pg_wchar.h deleted file mode 100644 index a37c253e1f2..00000000000 --- a/src/include/regex/pg_wchar.h +++ /dev/null @@ -1,82 +0,0 @@ -/* $Id: pg_wchar.h,v 1.4 1998/07/18 18:34:24 momjian Exp $ */ - -#ifndef PG_WCHAR_H -#define PG_WCHAR_H - -#include - -#define EUC_JP 0 /* EUC for Japanese */ -#define EUC_CN 1 /* EUC for Chinese */ -#define EUC_KR 2 /* EUC for Korean */ -#define EUC_TW 3 /* EUC for Taiwan */ -#define UNICODE 4 /* Unicode UTF-8 */ -#define MULE_INTERNAL 5 /* Mule internal code */ -#define LATIN1 6 /* ISO-8859 Latin 1 */ -#define LATIN2 7 /* ISO-8859 Latin 2 */ -#define LATIN3 8 /* ISO-8859 Latin 3 */ -#define LATIN4 9 /* ISO-8859 Latin 4 */ -#define LATIN5 10 /* ISO-8859 Latin 5 */ -/* followings are for client encoding only */ -#define SJIS 16 /* Shift JIS */ - -#ifdef MULTIBYTE -# if LATIN1 <= MULTIBYTE && MULTIBYTE <= LATIN5 -typedef unsigned char pg_wchar; -# else -typedef unsigned int pg_wchar; -# endif -#else -#define pg_wchar char -#endif - -/* - * various definitions for EUC - */ -#define SS2 0x8e /* single shift 2 */ -#define SS3 0x8f /* single shift 3 */ - -/* - * various definitions for mule internal code - */ -#define IS_LC1(c) ((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8f) -#define IS_LCPRV1(c) ((unsigned char)(c) == 0x9a || (unsigned char)(c) == 0x9b) -#define IS_LC2(c) ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99) -#define IS_LCPRV2(c) ((unsigned char)(c) == 0x9c || (unsigned char)(c) == 0x9d) - -/* - * leading characters - */ -#define LC_ISO8859_1 0x81 /* ISO8859 Latin 1 */ -#define LC_ISO8859_2 0x82 /* ISO8859 Latin 2 */ -#define LC_ISO8859_3 0x83 /* ISO8859 Latin 3 */ -#define LC_ISO8859_4 0x84 /* ISO8859 Latin 4 */ -#define LC_ISO8859_5 0x8d /* ISO8859 Latin 5 */ -#define LC_JISX0201K 0x89 /* Japanese 1 byte kana */ -#define LC_JISX0201R 0x90 /* Japanese 1 byte Roman */ -#define LC_GB2312_80 0x91 /* Chinese */ -#define LC_JISX0208 0x92 /* Japanese Kanji */ -#define LC_KS5601 0x93 /* Korean */ -#define LC_JISX0212 0x94 /* Japanese Kanji (JISX0212) */ -#define LC_CNS11643_1 0x95 /* CNS 11643-1992 Plane 1 */ -#define LC_CNS11643_2 0x96 /* CNS 11643-1992 Plane 2 */ -#define LC_CNS11643_3 0xf6 /* CNS 11643-1992 Plane 3 */ -#define LC_CNS11643_4 0xf7 /* CNS 11643-1992 Plane 4 */ -#define LC_CNS11643_5 0xf8 /* CNS 11643-1992 Plane 5 */ -#define LC_CNS11643_6 0xf9 /* CNS 11643-1992 Plane 6 */ -#define LC_CNS11643_7 0xfa /* CNS 11643-1992 Plane 7 */ - -#ifdef MULTIBYTE -extern void pg_mb2wchar(const unsigned char *, pg_wchar *); -extern void pg_mb2wchar_with_len(const unsigned char *, pg_wchar *, int); -extern int pg_char_and_wchar_strcmp(const char *, const pg_wchar *); -extern int pg_wchar_strncmp(const pg_wchar *, const pg_wchar *, size_t); -extern int pg_char_and_wchar_strncmp(const char *, const pg_wchar *, size_t); -extern size_t pg_wchar_strlen(const pg_wchar *); -extern int pg_mblen(const unsigned char *); -extern int pg_encoding_mblen(int, const unsigned char *); -extern int pg_mic_mblen(const unsigned char *); -extern int pg_mbstrlen(const unsigned char *); -extern int pg_mbstrlen_with_len(const unsigned char *, int); -#endif - -#endif diff --git a/src/include/regex/regex.h b/src/include/regex/regex.h index 672ed2a382f..c4a306e4c23 100644 --- a/src/include/regex/regex.h +++ b/src/include/regex/regex.h @@ -41,7 +41,7 @@ #define _REGEX_H_ #include -#include +#include /* types */ typedef off_t regoff_t; -- cgit v1.2.3