File: utf8.h

package info (click to toggle)
scummvm 2.9.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 450,580 kB
  • sloc: cpp: 4,299,825; asm: 28,322; python: 12,901; sh: 11,302; java: 9,289; xml: 7,895; perl: 2,639; ansic: 2,465; yacc: 1,670; javascript: 1,020; makefile: 933; lex: 578; awk: 275; objc: 82; sed: 11; php: 1
file content (115 lines) | stat: -rw-r--r-- 3,296 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/* ScummVM - Graphic Adventure Engine
 *
 * ScummVM is the legal property of its developers, whose names
 * are too numerous to list here. Please refer to the COPYRIGHT
 * file distributed with this source distribution.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

 //=============================================================================
 //
 // UTF-8 utilities.
 // Based on utf8 code from https://c9x.me/irc/ (public domain)
 //
 //=============================================================================

#ifndef AGS_SHARED_UTIL_UTF8_H
#define AGS_SHARED_UTIL_UTF8_H

#include "common/std/algorithm.h"
#include "ags/shared/core/types.h"

namespace AGS3 {
namespace Utf8 {

typedef int32_t Rune;
const size_t UtfSz = 4;
const Rune RuneInvalid = 0xFFFD;

const unsigned char utfbyte[UtfSz + 1] = { 0x80,    0, 0xC0, 0xE0, 0xF0 };
const unsigned char utfmask[UtfSz + 1] = { 0xC0, 0x80, 0xE0, 0xF0, 0xF8 };
const Rune utfmin[UtfSz + 1] = { 0,    0,  0x80,  0x800,  0x10000 };
const Rune utfmax[UtfSz + 1] = { 0x10FFFF, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF };


inline size_t Validate(Rune *u, size_t i) {
	if (*u < utfmin[i] || *u > utfmax[i] || (0xD800 <= *u && *u <= 0xDFFF))
		*u = RuneInvalid;
	for (i = 1; *u > utfmax[i]; ++i)
		;
	return i;
}

inline Rune DecodeByte(unsigned char c, size_t *i) {
	for (*i = 0; *i < UtfSz + 1; ++(*i))
		if ((c & utfmask[*i]) == utfbyte[*i])
			return c & ~utfmask[*i];
	return 0;
}

inline char EncodeByte(Rune u, size_t i) {
	return utfbyte[i] | (u & ~utfmask[i]);
}

// Read a single utf8 codepoint from the c-string;
// returns codepoint's size in bytes (may be used to advance string pos)
inline size_t GetChar(const char *c, size_t clen, Rune *u) {
	size_t i, j, len, type;
	Rune udecoded;
	*u = RuneInvalid;
	if (!clen || !*c)
		return 0;
	udecoded = DecodeByte(c[0], &len);
	if (len < 1 || len > UtfSz)
		return 1;
	for (i = 1, j = 1; i < clen && j < len; ++i, ++j) {
		udecoded = (udecoded << 6) | DecodeByte(c[i], &type);
		if (type != 0)
			return j;
	}
	if (j < len)
		return 0;
	*u = udecoded;
	Validate(u, len);
	return len;
}

// Convert utf8 codepoint to the string representation and write to the buffer
inline size_t SetChar(Rune u, char *c, size_t clen) {
	size_t len, i;
	len = Validate(&u, 0);
	if (len > UtfSz || len > clen)
		return 0;
	for (i = len - 1; i != 0; --i) {
		c[i] = EncodeByte(u, 0);
		u >>= 6;
	}
	c[0] = EncodeByte(u, len);
	return len;
}

// Calculates utf8 string length in characters
inline size_t GetLength(const char *c) {
	size_t len = 0;
	Rune r;
	for (size_t chr_sz = 0; (chr_sz = GetChar(c, UtfSz, &r)) > 0; c += chr_sz, ++len);
	return len;
}

} // namespace Utf8
} // namespace AGS3

#endif