diff options
-rw-r--r-- | src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c | 144 |
1 files changed, 134 insertions, 10 deletions
diff --git a/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c index db9deb653ba..66c242d7f36 100644 --- a/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c +++ b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c @@ -37,6 +37,8 @@ PG_FUNCTION_INFO_V1(mic_to_big5); * ---------- */ +static void euc_tw2big5(const unsigned char *euc, unsigned char *p, int len); +static void big52euc_tw(const unsigned char *euc, unsigned char *p, int len); static void big52mic(const unsigned char *big5, unsigned char *p, int len); static void mic2big5(const unsigned char *mic, unsigned char *p, int len); static void euc_tw2mic(const unsigned char *euc, unsigned char *p, int len); @@ -48,14 +50,10 @@ euc_tw_to_big5(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); - unsigned char *buf; CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_BIG5); - buf = palloc(len * ENCODING_GROWTH_RATE + 1); - euc_tw2mic(src, buf, len); - mic2big5(buf, dest, strlen((char *) buf)); - pfree(buf); + euc_tw2big5(src, dest, len); PG_RETURN_VOID(); } @@ -66,14 +64,10 @@ big5_to_euc_tw(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); - unsigned char *buf; CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_EUC_TW); - buf = palloc(len * ENCODING_GROWTH_RATE + 1); - big52mic(src, buf, len); - mic2euc_tw(buf, dest, strlen((char *) buf)); - pfree(buf); + big52euc_tw(src, dest, len); PG_RETURN_VOID(); } @@ -134,6 +128,136 @@ mic_to_big5(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } + +/* + * EUC_TW ---> Big5 + */ +static void +euc_tw2big5(const unsigned char *euc, unsigned char *p, int len) +{ + unsigned char c1; + unsigned short big5buf, + cnsBuf; + unsigned char lc; + int l; + + while (len > 0) + { + c1 = *euc; + if (IS_HIGHBIT_SET(c1)) + { + /* Verify and decode the next EUC_TW input character */ + l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len); + if (l < 0) + report_invalid_encoding(PG_EUC_TW, + (const char *) euc, len); + if (c1 == SS2) + { + c1 = euc[1]; /* plane No. */ + if (c1 == 0xa1) + lc = LC_CNS11643_1; + else if (c1 == 0xa2) + lc = LC_CNS11643_2; + else + lc = c1 - 0xa3 + LC_CNS11643_3; + cnsBuf = (euc[2] << 8) | euc[3]; + } + else + { /* CNS11643-1 */ + lc = LC_CNS11643_1; + cnsBuf = (c1 << 8) | euc[1]; + } + + /* Write it out in Big5 */ + big5buf = CNStoBIG5(cnsBuf, lc); + if (big5buf == 0) + report_untranslatable_char(PG_EUC_TW, PG_BIG5, + (const char *) euc, len); + *p++ = (big5buf >> 8) & 0x00ff; + *p++ = big5buf & 0x00ff; + + euc += l; + len -= l; + } + else + { /* should be ASCII */ + if (c1 == 0) + report_invalid_encoding(PG_EUC_TW, + (const char *) euc, len); + *p++ = c1; + euc++; + len--; + } + } + *p = '\0'; +} + +/* + * Big5 ---> EUC_TW + */ +static void +big52euc_tw(const unsigned char *big5, unsigned char *p, int len) +{ + unsigned short c1; + unsigned short big5buf, + cnsBuf; + unsigned char lc; + int l; + + while (len > 0) + { + /* Verify and decode the next Big5 input character */ + c1 = *big5; + if (IS_HIGHBIT_SET(c1)) + { + l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len); + if (l < 0) + report_invalid_encoding(PG_BIG5, + (const char *) big5, len); + big5buf = (c1 << 8) | big5[1]; + cnsBuf = BIG5toCNS(big5buf, &lc); + + if (lc == LC_CNS11643_1) + { + *p++ = (cnsBuf >> 8) & 0x00ff; + *p++ = cnsBuf & 0x00ff; + } + else if (lc == LC_CNS11643_2) + { + *p++ = SS2; + *p++ = 0xa2; + *p++ = (cnsBuf >> 8) & 0x00ff; + *p++ = cnsBuf & 0x00ff; + } + else if (lc >= LC_CNS11643_3 && lc <= LC_CNS11643_7) + { + *p++ = SS2; + *p++ = lc - LC_CNS11643_3 + 0xa3; + *p++ = (cnsBuf >> 8) & 0x00ff; + *p++ = cnsBuf & 0x00ff; + } + else + report_untranslatable_char(PG_BIG5, PG_EUC_TW, + (const char *) big5, len); + + big5 += l; + len -= l; + } + else + { + /* ASCII */ + if (c1 == 0) + report_invalid_encoding(PG_BIG5, + (const char *) big5, len); + *p++ = c1; + big5++; + len--; + continue; + } + } + *p = '\0'; +} + /* * EUC_TW ---> MIC */ |