summaryrefslogtreecommitdiff
path: root/src/include/regex
diff options
context:
space:
mode:
authorBruce Momjian1998-06-16 07:29:54 +0000
committerBruce Momjian1998-06-16 07:29:54 +0000
commitcb7cbc16fa4b5933fb5d63052568e3ed6859857b (patch)
treebed17594c4880549288373de4d400512cbe2f82d /src/include/regex
parent0d8e7f6381291b85ad6264365e01143357d70a75 (diff)
Hi, here are the patches to enhance existing MB handling. This time
I have implemented a framework of encoding translation between the backend and the frontend. Also I have added a new variable setting command: SET CLIENT_ENCODING TO 'encoding'; Other features include: Latin1 support more 8 bit cleaness See doc/README.mb for more details. Note that the pacthes are against May 30 snapshot. Tatsuo Ishii
Diffstat (limited to 'src/include/regex')
-rw-r--r--src/include/regex/pg_wchar.h37
-rw-r--r--src/include/regex/regex2.h2
2 files changed, 38 insertions, 1 deletions
diff --git a/src/include/regex/pg_wchar.h b/src/include/regex/pg_wchar.h
index b135df82f12..bfbd8ba37bb 100644
--- a/src/include/regex/pg_wchar.h
+++ b/src/include/regex/pg_wchar.h
@@ -1,4 +1,4 @@
-/* $Id: pg_wchar.h,v 1.2 1998/04/27 17:09:12 scrappy Exp $ */
+/* $Id: pg_wchar.h,v 1.3 1998/06/16 07:29:43 momjian Exp $ */
#ifndef PG_WCHAR_H
#define PG_WCHAR_H
@@ -11,9 +11,20 @@
#define EUC_TW 3 /* EUC for Taiwan */
#define UNICODE 4 /* Unicode UTF-8 */
#define MULE_INTERNAL 5 /* Mule internal code */
+#define LATIN1 6 /* ISO-8859 Latin 1 */
+#define LATIN2 7 /* ISO-8859 Latin 2 */
+#define LATIN3 8 /* ISO-8859 Latin 3 */
+#define LATIN4 9 /* ISO-8859 Latin 4 */
+#define LATIN5 10 /* ISO-8859 Latin 5 */
+/* followings are for client encoding only */
+#define SJIS 16 /* Shift JIS */
#ifdef MB
+# if LATIN1 <= MB && MB <= LATIN5
+typedef unsigned char pg_wchar;
+# else
typedef unsigned int pg_wchar;
+# endif
#else
#define pg_wchar char
#endif
@@ -32,6 +43,28 @@ typedef unsigned int pg_wchar;
#define IS_LC2(c) ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99)
#define IS_LCPRV2(c) ((unsigned char)(c) == 0x9c || (unsigned char)(c) == 0x9d)
+/*
+ * leading characters
+ */
+#define LC_ISO8859_1 0x81 /* ISO8859 Latin 1 */
+#define LC_ISO8859_2 0x82 /* ISO8859 Latin 2 */
+#define LC_ISO8859_3 0x83 /* ISO8859 Latin 3 */
+#define LC_ISO8859_4 0x84 /* ISO8859 Latin 4 */
+#define LC_ISO8859_5 0x8d /* ISO8859 Latin 5 */
+#define LC_JISX0201K 0x89 /* Japanese 1 byte kana */
+#define LC_JISX0201R 0x90 /* Japanese 1 byte Roman */
+#define LC_GB2312_80 0x91 /* Chinese */
+#define LC_JISX0208 0x92 /* Japanese Kanji */
+#define LC_KS5601 0x93 /* Korean */
+#define LC_JISX0212 0x94 /* Japanese Kanji (JISX0212) */
+#define LC_CNS11643_1 0x95 /* CNS 11643-1992 Plane 1 */
+#define LC_CNS11643_2 0x96 /* CNS 11643-1992 Plane 2 */
+#define LC_CNS11643_3 0xf6 /* CNS 11643-1992 Plane 3 */
+#define LC_CNS11643_4 0xf7 /* CNS 11643-1992 Plane 4 */
+#define LC_CNS11643_5 0xf8 /* CNS 11643-1992 Plane 5 */
+#define LC_CNS11643_6 0xf9 /* CNS 11643-1992 Plane 6 */
+#define LC_CNS11643_7 0xfa /* CNS 11643-1992 Plane 7 */
+
#ifdef MB
extern void pg_mb2wchar(const unsigned char *, pg_wchar *);
extern void pg_mb2wchar_with_len(const unsigned char *, pg_wchar *, int);
@@ -40,6 +73,8 @@ extern int pg_wchar_strncmp(const pg_wchar *, const pg_wchar *, size_t);
extern int pg_char_and_wchar_strncmp(const char *, const pg_wchar *, size_t);
extern size_t pg_wchar_strlen(const pg_wchar *);
extern int pg_mblen(const unsigned char *);
+extern int pg_encoding_mblen(int, const unsigned char *);
+extern int pg_mic_mblen(const unsigned char *);
extern int pg_mbstrlen(const unsigned char *);
extern int pg_mbstrlen_with_len(const unsigned char *, int);
#endif
diff --git a/src/include/regex/regex2.h b/src/include/regex/regex2.h
index 01cdadff451..4590862486c 100644
--- a/src/include/regex/regex2.h
+++ b/src/include/regex/regex2.h
@@ -203,6 +203,8 @@ struct re_guts
# define OUT (USHRT_MAX+1) /* 2 bytes */
# elif MB == UNICODE
# define OUT (USHRT_MAX+1) /* 2 bytes. assuming UCS-2 */
+# else
+# define OUT (UCHAR_MAX+1) /* other codes. assuming 1 byte */
# endif
#else
# define OUT (CHAR_MAX+1) /* a non-character value */