diff options
author | Bruce Momjian | 1998-06-16 07:29:54 +0000 |
---|---|---|
committer | Bruce Momjian | 1998-06-16 07:29:54 +0000 |
commit | cb7cbc16fa4b5933fb5d63052568e3ed6859857b (patch) | |
tree | bed17594c4880549288373de4d400512cbe2f82d /src/include/regex | |
parent | 0d8e7f6381291b85ad6264365e01143357d70a75 (diff) |
Hi, here are the patches to enhance existing MB handling. This time
I have implemented a framework of encoding translation between the
backend and the frontend. Also I have added a new variable setting
command:
SET CLIENT_ENCODING TO 'encoding';
Other features include:
Latin1 support more 8 bit cleaness
See doc/README.mb for more details. Note that the pacthes are
against May 30 snapshot.
Tatsuo Ishii
Diffstat (limited to 'src/include/regex')
-rw-r--r-- | src/include/regex/pg_wchar.h | 37 | ||||
-rw-r--r-- | src/include/regex/regex2.h | 2 |
2 files changed, 38 insertions, 1 deletions
diff --git a/src/include/regex/pg_wchar.h b/src/include/regex/pg_wchar.h index b135df82f12..bfbd8ba37bb 100644 --- a/src/include/regex/pg_wchar.h +++ b/src/include/regex/pg_wchar.h @@ -1,4 +1,4 @@ -/* $Id: pg_wchar.h,v 1.2 1998/04/27 17:09:12 scrappy Exp $ */ +/* $Id: pg_wchar.h,v 1.3 1998/06/16 07:29:43 momjian Exp $ */ #ifndef PG_WCHAR_H #define PG_WCHAR_H @@ -11,9 +11,20 @@ #define EUC_TW 3 /* EUC for Taiwan */ #define UNICODE 4 /* Unicode UTF-8 */ #define MULE_INTERNAL 5 /* Mule internal code */ +#define LATIN1 6 /* ISO-8859 Latin 1 */ +#define LATIN2 7 /* ISO-8859 Latin 2 */ +#define LATIN3 8 /* ISO-8859 Latin 3 */ +#define LATIN4 9 /* ISO-8859 Latin 4 */ +#define LATIN5 10 /* ISO-8859 Latin 5 */ +/* followings are for client encoding only */ +#define SJIS 16 /* Shift JIS */ #ifdef MB +# if LATIN1 <= MB && MB <= LATIN5 +typedef unsigned char pg_wchar; +# else typedef unsigned int pg_wchar; +# endif #else #define pg_wchar char #endif @@ -32,6 +43,28 @@ typedef unsigned int pg_wchar; #define IS_LC2(c) ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99) #define IS_LCPRV2(c) ((unsigned char)(c) == 0x9c || (unsigned char)(c) == 0x9d) +/* + * leading characters + */ +#define LC_ISO8859_1 0x81 /* ISO8859 Latin 1 */ +#define LC_ISO8859_2 0x82 /* ISO8859 Latin 2 */ +#define LC_ISO8859_3 0x83 /* ISO8859 Latin 3 */ +#define LC_ISO8859_4 0x84 /* ISO8859 Latin 4 */ +#define LC_ISO8859_5 0x8d /* ISO8859 Latin 5 */ +#define LC_JISX0201K 0x89 /* Japanese 1 byte kana */ +#define LC_JISX0201R 0x90 /* Japanese 1 byte Roman */ +#define LC_GB2312_80 0x91 /* Chinese */ +#define LC_JISX0208 0x92 /* Japanese Kanji */ +#define LC_KS5601 0x93 /* Korean */ +#define LC_JISX0212 0x94 /* Japanese Kanji (JISX0212) */ +#define LC_CNS11643_1 0x95 /* CNS 11643-1992 Plane 1 */ +#define LC_CNS11643_2 0x96 /* CNS 11643-1992 Plane 2 */ +#define LC_CNS11643_3 0xf6 /* CNS 11643-1992 Plane 3 */ +#define LC_CNS11643_4 0xf7 /* CNS 11643-1992 Plane 4 */ +#define LC_CNS11643_5 0xf8 /* CNS 11643-1992 Plane 5 */ +#define LC_CNS11643_6 0xf9 /* CNS 11643-1992 Plane 6 */ +#define LC_CNS11643_7 0xfa /* CNS 11643-1992 Plane 7 */ + #ifdef MB extern void pg_mb2wchar(const unsigned char *, pg_wchar *); extern void pg_mb2wchar_with_len(const unsigned char *, pg_wchar *, int); @@ -40,6 +73,8 @@ extern int pg_wchar_strncmp(const pg_wchar *, const pg_wchar *, size_t); extern int pg_char_and_wchar_strncmp(const char *, const pg_wchar *, size_t); extern size_t pg_wchar_strlen(const pg_wchar *); extern int pg_mblen(const unsigned char *); +extern int pg_encoding_mblen(int, const unsigned char *); +extern int pg_mic_mblen(const unsigned char *); extern int pg_mbstrlen(const unsigned char *); extern int pg_mbstrlen_with_len(const unsigned char *, int); #endif diff --git a/src/include/regex/regex2.h b/src/include/regex/regex2.h index 01cdadff451..4590862486c 100644 --- a/src/include/regex/regex2.h +++ b/src/include/regex/regex2.h @@ -203,6 +203,8 @@ struct re_guts # define OUT (USHRT_MAX+1) /* 2 bytes */ # elif MB == UNICODE # define OUT (USHRT_MAX+1) /* 2 bytes. assuming UCS-2 */ +# else +# define OUT (UCHAR_MAX+1) /* other codes. assuming 1 byte */ # endif #else # define OUT (CHAR_MAX+1) /* a non-character value */ |