1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
|
/** @file
* Low-level UTF8 handling.
*/
/*
* Copyright (c) 2009 Marko Kreen
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef _USUAL_UTF8_H_
#define _USUAL_UTF8_H_
#include <usual/base.h>
/**
* Parse Unicode codepoint from UTF8 stream.
*
* On invalid UTF8 sequence returns negative byte value and
* inreases src_p by one.
*
* @param src_p Location of data pointer. Will be incremented in-place.
* @param srcend Pointer to end of data.
* @return UNOCODE codepoint or negative byte value on error.
*/
int utf8_get_char(const char **src_p, const char *srcend);
/**
* Write Unicode codepoint as UTF8 sequence.
*
* Skips invalid Unicode values without error.
*
* @param c Unicode codepoint.
* @param dst_p Location of dest pointer, will be increased in-place.
* @param dstend Pointer to end of buffer.
* @return false if not room, true otherwise.
*/
bool utf8_put_char(unsigned int c, char **dst_p, const char *dstend);
/** Return UTF8 seq length based on unicode codepoint */
int utf8_char_size(unsigned int c);
/** Return UTF8 seq length based on first byte */
int utf8_seq_size(unsigned char c);
/** Return sequence length if all bytes are valid, 0 otherwise. */
int utf8_validate_seq(const char *src, const char *srcend);
bool utf8_validate_string(const char *src, const char *end);
#endif
|