File: ustr.h

package info (click to toggle)
scummvm 2.9.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 450,580 kB
  • sloc: cpp: 4,299,825; asm: 28,322; python: 12,901; sh: 11,302; java: 9,289; xml: 7,895; perl: 2,639; ansic: 2,465; yacc: 1,670; javascript: 1,020; makefile: 933; lex: 578; awk: 275; objc: 82; sed: 11; php: 1
file content (247 lines) | stat: -rw-r--r-- 8,886 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
/* ScummVM - Graphic Adventure Engine
 *
 * ScummVM is the legal property of its developers, whose names
 * are too numerous to list here. Please refer to the COPYRIGHT
 * file distributed with this source distribution.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

#ifndef COMMON_USTR_H
#define COMMON_USTR_H

#include "common/scummsys.h"
#include "common/util.h"
#include "common/str-enc.h"
#include "common/str-base.h"

namespace Common {

/**
 * @defgroup common_ustr UTF-32 strings
 * @ingroup common
 *
 * @brief API for working with UTF-32 strings.
 *
 * @{
 */

class String;

/**
 * A simple string class for UTF-32 strings in ScummVM. The main intention
 * behind this class is to feature a simple way of displaying UTF-32 strings
 * through the Graphics::Font API.
 *
 * Note that operations like equals, deleteCharacter, toUppercase, etc.
 * are only simplified convenience operations. They might not fully work
 * as you would expect for a proper UTF-32 string class.
 *
 * The presence of \0 characters in the string will cause undefined
 * behavior in some operations.
 */
typedef char32_t u32char_type_t;

class U32String : public BaseString<u32char_type_t> {
public:
	typedef uint32 unsigned_type; /*!< Unsigned version of the underlying type. */
public:
	/** Construct a new empty string. */
	constexpr U32String() : BaseString<u32char_type_t>() {}

	/** Construct a new string from the given null-terminated C string. */
	explicit U32String(const value_type *str) : BaseString<u32char_type_t>(str) {}

	/** Construct a new string containing exactly @p len characters read from address @p str. */
	U32String(const value_type *str, uint32 len) : BaseString<u32char_type_t>(str, len) {}

	explicit U32String(const uint32 *str) : BaseString<u32char_type_t>((const value_type *) str) {}
	U32String(const uint32 *str, uint32 len) : BaseString<u32char_type_t>((const value_type *) str, len) {}
	U32String(const uint32 *beginP, const uint32 *endP) : BaseString<u32char_type_t>((const value_type *) beginP, (const value_type *) endP) {}

	/** Construct a new string containing the characters between @p beginP (including) and @p endP (excluding). */
	U32String(const value_type *beginP, const value_type *endP) : BaseString<u32char_type_t>(beginP, endP) {}

	/** Construct a copy of the given string. */
	U32String(const U32String &str) : BaseString<u32char_type_t>(str) {}

	/** Construct a string by moving an existing string. */
	U32String(U32String &&str) : BaseString<u32char_type_t>(static_cast<BaseString<u32char_type_t> &&>(str)) {}

	/** Construct a new string from the given null-terminated C string that uses the given @p page encoding. */
	explicit U32String(const char *str, CodePage page = kUtf8);

	/** Construct a new string containing exactly @p len characters read from address @p str. */
	U32String(const char *str, uint32 len, CodePage page = kUtf8);

	/** Construct a new string containing the characters between @p beginP (including) and @p endP (excluding). */
	U32String(const char *beginP, const char *endP, CodePage page = kUtf8);

	/** Construct a copy of the given string. */
	U32String(const String &str, CodePage page = kUtf8);

	/** Construct a string consisting of the given character. */
	explicit U32String(value_type c);

	/** Assign a given string to this string. */
	U32String &operator=(const U32String &str);

	/** Move a given string to this string. */
	U32String &operator=(U32String &&str);

	/** @overload */
	U32String &operator=(const String &str);

	/** @overload */
	U32String &operator=(const value_type *str);

	/** @overload */
	U32String &operator=(const char *str);

	/** @overload */
	U32String &operator=(value_type c);

	/** Append the given string to this string. */
	U32String &operator+=(const U32String &str);

	/** @overload */
	U32String &operator+=(const value_type *str);

	/** @overload */
	U32String &operator+=(value_type c);

	using BaseString<value_type>::operator==;
	using BaseString<value_type>::operator!=;

	/** Check whether this string is identical to string @p x. */
	bool operator==(const String &x) const;

	/** @overload */
	bool operator==(const char *x) const;

	/** Check whether this string is different than string @p x. */
	bool operator!=(const String &x) const;

	/** @overload */
	bool operator!=(const char *x) const;

	/** Convert the string to the given @p page encoding and return the result as a new String. */
	String encode(CodePage page = kUtf8) const;

	/** Convert the string to the given @p page encoding and output in string @p outString,
		replacing invalid characters with @p errorChar. */
	StringEncodingResult encode(String &outString, CodePage page, char errorChar) const;

	/**
	 * Print formatted data into a U32String object.
	 *
	 * Similar to sprintf, except that it stores the result
	 * in a (variably sized) string instead of a fixed-size buffer.
	 */
	template<class... TParam>
	static U32String format(const U32String &fmt, TParam... param);

	/** @overload **/
	static U32String format(const char *fmt, ...);

	/**
	 * Print formatted data into a U32String object.
	 * The method takes in the output by reference and works with iterators.
	 */
	static int vformat(const value_type *fmt, const value_type *fmtEnd, U32String &output, va_list args);

	/** Return a substring of this string */
	U32String substr(size_t pos = 0, size_t len = npos) const;

	const uint32 *u32_str() const {   /*!< Return the string as a UTF-32 pointer. */
		return (const uint32 *) _str;
	}

	/** Decode a big endian UTF-16 string into a U32String. */
	static Common::U32String decodeUTF16BE(const uint16 *start, uint len);

	/** Decode a little endian UTF-16 string into a U32String. */
	static Common::U32String decodeUTF16LE(const uint16 *start, uint len);

	/** Decode a native UTF-16 string into a U32String. */
	static Common::U32String decodeUTF16Native(const uint16 *start, uint len);

	/** Transform a U32String into UTF-16 representation (big endian). The result must be freed. */
	uint16 *encodeUTF16BE(uint *len = nullptr) const;

	/** Transform a U32String into UTF-16 representation (native endian). The result must be freed. */
	uint16 *encodeUTF16LE(uint *len = nullptr) const;

	/** Transform a U32String into UTF-16 representation (native encoding). The result must be freed. */
	uint16 *encodeUTF16Native(uint *len = nullptr) const;

	/** Transform Traditional Chinese string into Simplified. */
	U32String transcodeChineseT2S() const;

private:
	static U32String formatInternal(const U32String *fmt, ...);

	/**
	 * Helper function for vformat. Convert an int to a string.
	 * Minimal implementation, only for base 10.
	 */
	static value_type* ustr_helper_itoa(int num, value_type* str, uint base);

	/**
	 * Helper function for vformat. Convert an unsigned int to a string.
	 * Minimal implementation, only for base 10.
	 */
	static value_type* ustr_helper_uitoa(uint num, value_type* str, uint base);

	void decodeInternal(const char *str, uint32 len, CodePage page);
	void decodeOneByte(const char *str, uint32 len, CodePage page);
	void decodeWindows932(const char *src, uint32 len);
	void decodeWindows936(const char *src, uint32 len);
	void decodeWindows949(const char *src, uint32 len);
	void decodeWindows950(const char *src, uint32 len);
	void decodeJohab(const char *src, uint32 len);
	void decodeUTF8(const char *str, uint32 len);

	friend class String;
};

template<class... TParam>
inline U32String U32String::format(const U32String &fmt, TParam... param) {
	return formatInternal(&fmt, Common::forward<TParam>(param)...);
}

/** Concatenate strings @p x and @p y. */
U32String operator+(const U32String &x, const U32String &y);

/** Append the given @p y character to the given @p x string. */
U32String operator+(const U32String &x, U32String::value_type y);

/**
 * Converts string with all non-printable characters properly escaped
 * with use of C++ escape sequences.
 * Unlike the String version, this does not escape characters with
 * codepoints > 127.
 *
 * @param src The source string.
 * @param keepNewLines Whether keep newlines or convert them to '\n', default: true.
 * @return The converted string.
 */
U32String toPrintable(const U32String &src, bool keepNewLines = true);

/** @} */

} // End of namespace Common

#endif