PostgreSQL Source Code git master
euc_tw_and_big5.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * EUC_TW, BIG5 and MULE_INTERNAL
4 *
5 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
6 * Portions Copyright (c) 1994, Regents of the University of California
7 *
8 * IDENTIFICATION
9 * src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c
10 *
11 *-------------------------------------------------------------------------
12 */
13
14#include "postgres.h"
15#include "fmgr.h"
16#include "mb/pg_wchar.h"
17
19 .name = "euc_tw_and_big5",
20 .version = PG_VERSION
21);
22
29
30/* ----------
31 * conv_proc(
32 * INTEGER, -- source encoding id
33 * INTEGER, -- destination encoding id
34 * CSTRING, -- source string (null terminated C string)
35 * CSTRING, -- destination string (null terminated C string)
36 * INTEGER, -- source string length
37 * BOOL -- if true, don't throw an error if conversion fails
38 * ) returns INTEGER;
39 *
40 * Returns the number of bytes successfully converted.
41 * ----------
42 */
43
44static int euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError);
45static int big52euc_tw(const unsigned char *big5, unsigned char *p, int len, bool noError);
46static int big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError);
47static int mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError);
48static int euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
49static int mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError);
50
53{
54 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
55 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
56 int len = PG_GETARG_INT32(4);
57 bool noError = PG_GETARG_BOOL(5);
58 int converted;
59
61
62 converted = euc_tw2big5(src, dest, len, noError);
63
64 PG_RETURN_INT32(converted);
65}
66
69{
70 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
71 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
72 int len = PG_GETARG_INT32(4);
73 bool noError = PG_GETARG_BOOL(5);
74 int converted;
75
77
78 converted = big52euc_tw(src, dest, len, noError);
79
80 PG_RETURN_INT32(converted);
81}
82
85{
86 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
87 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
88 int len = PG_GETARG_INT32(4);
89 bool noError = PG_GETARG_BOOL(5);
90 int converted;
91
93
94 converted = euc_tw2mic(src, dest, len, noError);
95
96 PG_RETURN_INT32(converted);
97}
98
101{
102 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
103 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
104 int len = PG_GETARG_INT32(4);
105 bool noError = PG_GETARG_BOOL(5);
106 int converted;
107
109
110 converted = mic2euc_tw(src, dest, len, noError);
111
112 PG_RETURN_INT32(converted);
113}
114
115Datum
117{
118 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
119 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
120 int len = PG_GETARG_INT32(4);
121 bool noError = PG_GETARG_BOOL(5);
122 int converted;
123
125
126 converted = big52mic(src, dest, len, noError);
127
128 PG_RETURN_INT32(converted);
129}
130
131Datum
133{
134 unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
135 unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
136 int len = PG_GETARG_INT32(4);
137 bool noError = PG_GETARG_BOOL(5);
138 int converted;
139
141
142 converted = mic2big5(src, dest, len, noError);
143
144 PG_RETURN_INT32(converted);
145}
146
147
148/*
149 * EUC_TW ---> Big5
150 */
151static int
152euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError)
153{
154 const unsigned char *start = euc;
155 unsigned char c1;
156 unsigned short big5buf,
157 cnsBuf;
158 unsigned char lc;
159 int l;
160
161 while (len > 0)
162 {
163 c1 = *euc;
164 if (IS_HIGHBIT_SET(c1))
165 {
166 /* Verify and decode the next EUC_TW input character */
167 l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
168 if (l < 0)
169 {
170 if (noError)
171 break;
173 (const char *) euc, len);
174 }
175 if (c1 == SS2)
176 {
177 c1 = euc[1]; /* plane No. */
178 if (c1 == 0xa1)
179 lc = LC_CNS11643_1;
180 else if (c1 == 0xa2)
181 lc = LC_CNS11643_2;
182 else
183 lc = c1 - 0xa3 + LC_CNS11643_3;
184 cnsBuf = (euc[2] << 8) | euc[3];
185 }
186 else
187 { /* CNS11643-1 */
188 lc = LC_CNS11643_1;
189 cnsBuf = (c1 << 8) | euc[1];
190 }
191
192 /* Write it out in Big5 */
193 big5buf = CNStoBIG5(cnsBuf, lc);
194 if (big5buf == 0)
195 {
196 if (noError)
197 break;
199 (const char *) euc, len);
200 }
201 *p++ = (big5buf >> 8) & 0x00ff;
202 *p++ = big5buf & 0x00ff;
203
204 euc += l;
205 len -= l;
206 }
207 else
208 { /* should be ASCII */
209 if (c1 == 0)
210 {
211 if (noError)
212 break;
214 (const char *) euc, len);
215 }
216 *p++ = c1;
217 euc++;
218 len--;
219 }
220 }
221 *p = '\0';
222
223 return euc - start;
224}
225
226/*
227 * Big5 ---> EUC_TW
228 */
229static int
230big52euc_tw(const unsigned char *big5, unsigned char *p, int len, bool noError)
231{
232 const unsigned char *start = big5;
233 unsigned short c1;
234 unsigned short big5buf,
235 cnsBuf;
236 unsigned char lc;
237 int l;
238
239 while (len > 0)
240 {
241 /* Verify and decode the next Big5 input character */
242 c1 = *big5;
243 if (IS_HIGHBIT_SET(c1))
244 {
245 l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
246 if (l < 0)
247 {
248 if (noError)
249 break;
251 (const char *) big5, len);
252 }
253 big5buf = (c1 << 8) | big5[1];
254 cnsBuf = BIG5toCNS(big5buf, &lc);
255
256 if (lc == LC_CNS11643_1)
257 {
258 *p++ = (cnsBuf >> 8) & 0x00ff;
259 *p++ = cnsBuf & 0x00ff;
260 }
261 else if (lc == LC_CNS11643_2)
262 {
263 *p++ = SS2;
264 *p++ = 0xa2;
265 *p++ = (cnsBuf >> 8) & 0x00ff;
266 *p++ = cnsBuf & 0x00ff;
267 }
268 else if (lc >= LC_CNS11643_3 && lc <= LC_CNS11643_7)
269 {
270 *p++ = SS2;
271 *p++ = lc - LC_CNS11643_3 + 0xa3;
272 *p++ = (cnsBuf >> 8) & 0x00ff;
273 *p++ = cnsBuf & 0x00ff;
274 }
275 else
276 {
277 if (noError)
278 break;
280 (const char *) big5, len);
281 }
282
283 big5 += l;
284 len -= l;
285 }
286 else
287 {
288 /* ASCII */
289 if (c1 == 0)
290 {
291 if (noError)
292 break;
294 (const char *) big5, len);
295 }
296 *p++ = c1;
297 big5++;
298 len--;
299 continue;
300 }
301 }
302 *p = '\0';
303
304 return big5 - start;
305}
306
307/*
308 * EUC_TW ---> MIC
309 */
310static int
311euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
312{
313 const unsigned char *start = euc;
314 int c1;
315 int l;
316
317 while (len > 0)
318 {
319 c1 = *euc;
320 if (IS_HIGHBIT_SET(c1))
321 {
322 l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
323 if (l < 0)
324 {
325 if (noError)
326 break;
328 (const char *) euc, len);
329 }
330 if (c1 == SS2)
331 {
332 c1 = euc[1]; /* plane No. */
333 if (c1 == 0xa1)
334 *p++ = LC_CNS11643_1;
335 else if (c1 == 0xa2)
336 *p++ = LC_CNS11643_2;
337 else
338 {
339 /* other planes are MULE private charsets */
340 *p++ = LCPRV2_B;
341 *p++ = c1 - 0xa3 + LC_CNS11643_3;
342 }
343 *p++ = euc[2];
344 *p++ = euc[3];
345 }
346 else
347 { /* CNS11643-1 */
348 *p++ = LC_CNS11643_1;
349 *p++ = c1;
350 *p++ = euc[1];
351 }
352 euc += l;
353 len -= l;
354 }
355 else
356 { /* should be ASCII */
357 if (c1 == 0)
358 {
359 if (noError)
360 break;
362 (const char *) euc, len);
363 }
364 *p++ = c1;
365 euc++;
366 len--;
367 }
368 }
369 *p = '\0';
370
371 return euc - start;
372}
373
374/*
375 * MIC ---> EUC_TW
376 */
377static int
378mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError)
379{
380 const unsigned char *start = mic;
381 int c1;
382 int l;
383
384 while (len > 0)
385 {
386 c1 = *mic;
387 if (!IS_HIGHBIT_SET(c1))
388 {
389 /* ASCII */
390 if (c1 == 0)
391 {
392 if (noError)
393 break;
395 (const char *) mic, len);
396 }
397 *p++ = c1;
398 mic++;
399 len--;
400 continue;
401 }
402 l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
403 if (l < 0)
404 {
405 if (noError)
406 break;
408 (const char *) mic, len);
409 }
410 if (c1 == LC_CNS11643_1)
411 {
412 *p++ = mic[1];
413 *p++ = mic[2];
414 }
415 else if (c1 == LC_CNS11643_2)
416 {
417 *p++ = SS2;
418 *p++ = 0xa2;
419 *p++ = mic[1];
420 *p++ = mic[2];
421 }
422 else if (c1 == LCPRV2_B &&
423 mic[1] >= LC_CNS11643_3 && mic[1] <= LC_CNS11643_7)
424 {
425 *p++ = SS2;
426 *p++ = mic[1] - LC_CNS11643_3 + 0xa3;
427 *p++ = mic[2];
428 *p++ = mic[3];
429 }
430 else
431 {
432 if (noError)
433 break;
435 (const char *) mic, len);
436 }
437 mic += l;
438 len -= l;
439 }
440 *p = '\0';
441
442 return mic - start;
443}
444
445/*
446 * Big5 ---> MIC
447 */
448static int
449big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError)
450{
451 const unsigned char *start = big5;
452 unsigned short c1;
453 unsigned short big5buf,
454 cnsBuf;
455 unsigned char lc;
456 int l;
457
458 while (len > 0)
459 {
460 c1 = *big5;
461 if (!IS_HIGHBIT_SET(c1))
462 {
463 /* ASCII */
464 if (c1 == 0)
465 {
466 if (noError)
467 break;
469 (const char *) big5, len);
470 }
471 *p++ = c1;
472 big5++;
473 len--;
474 continue;
475 }
476 l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
477 if (l < 0)
478 {
479 if (noError)
480 break;
482 (const char *) big5, len);
483 }
484 big5buf = (c1 << 8) | big5[1];
485 cnsBuf = BIG5toCNS(big5buf, &lc);
486 if (lc != 0)
487 {
488 /* Planes 3 and 4 are MULE private charsets */
489 if (lc == LC_CNS11643_3 || lc == LC_CNS11643_4)
490 *p++ = LCPRV2_B;
491 *p++ = lc; /* Plane No. */
492 *p++ = (cnsBuf >> 8) & 0x00ff;
493 *p++ = cnsBuf & 0x00ff;
494 }
495 else
496 {
497 if (noError)
498 break;
500 (const char *) big5, len);
501 }
502 big5 += l;
503 len -= l;
504 }
505 *p = '\0';
506
507 return big5 - start;
508}
509
510/*
511 * MIC ---> Big5
512 */
513static int
514mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError)
515{
516 const unsigned char *start = mic;
517 unsigned short c1;
518 unsigned short big5buf,
519 cnsBuf;
520 int l;
521
522 while (len > 0)
523 {
524 c1 = *mic;
525 if (!IS_HIGHBIT_SET(c1))
526 {
527 /* ASCII */
528 if (c1 == 0)
529 {
530 if (noError)
531 break;
533 (const char *) mic, len);
534 }
535 *p++ = c1;
536 mic++;
537 len--;
538 continue;
539 }
540 l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
541 if (l < 0)
542 {
543 if (noError)
544 break;
546 (const char *) mic, len);
547 }
548 if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == LCPRV2_B)
549 {
550 if (c1 == LCPRV2_B)
551 {
552 c1 = mic[1]; /* get plane no. */
553 cnsBuf = (mic[2] << 8) | mic[3];
554 }
555 else
556 {
557 cnsBuf = (mic[1] << 8) | mic[2];
558 }
559 big5buf = CNStoBIG5(cnsBuf, c1);
560 if (big5buf == 0)
561 {
562 if (noError)
563 break;
565 (const char *) mic, len);
566 }
567 *p++ = (big5buf >> 8) & 0x00ff;
568 *p++ = big5buf & 0x00ff;
569 }
570 else
571 {
572 if (noError)
573 break;
575 (const char *) mic, len);
576 }
577 mic += l;
578 len -= l;
579 }
580 *p = '\0';
581
582 return mic - start;
583}
unsigned short CNStoBIG5(unsigned short cns, unsigned char lc)
Definition: big5.c:345
unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc)
Definition: big5.c:292
#define IS_HIGHBIT_SET(ch)
Definition: c.h:1126
static int euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
static int big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError)
Datum euc_tw_to_mic(PG_FUNCTION_ARGS)
Datum big5_to_euc_tw(PG_FUNCTION_ARGS)
PG_FUNCTION_INFO_V1(euc_tw_to_big5)
Datum mic_to_big5(PG_FUNCTION_ARGS)
PG_MODULE_MAGIC_EXT(.name="euc_tw_and_big5",.version=PG_VERSION)
Datum big5_to_mic(PG_FUNCTION_ARGS)
Datum euc_tw_to_big5(PG_FUNCTION_ARGS)
static int mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError)
Datum mic_to_euc_tw(PG_FUNCTION_ARGS)
static int mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError)
static int euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError)
static int big52euc_tw(const unsigned char *big5, unsigned char *p, int len, bool noError)
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
return str start
void report_untranslatable_char(int src_encoding, int dest_encoding, const char *mbstr, int len)
Definition: mbutils.c:1730
void report_invalid_encoding(int encoding, const char *mbstr, int len)
Definition: mbutils.c:1698
const void size_t len
#define LC_CNS11643_7
Definition: pg_wchar.h:196
@ PG_MULE_INTERNAL
Definition: pg_wchar.h:233
@ PG_EUC_TW
Definition: pg_wchar.h:230
@ PG_BIG5
Definition: pg_wchar.h:265
#define LC_CNS11643_3
Definition: pg_wchar.h:192
#define SS2
Definition: pg_wchar.h:38
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding, destencoding)
Definition: pg_wchar.h:507
#define LC_CNS11643_1
Definition: pg_wchar.h:137
#define LC_CNS11643_4
Definition: pg_wchar.h:193
#define LC_CNS11643_2
Definition: pg_wchar.h:138
#define LCPRV2_B
Definition: pg_wchar.h:163
uintptr_t Datum
Definition: postgres.h:69
const char * name
int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len)
Definition: wchar.c:2150