File: Util.cpp

package info (click to toggle)
spring 103.0%2Bdfsg2-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 43,720 kB
  • ctags: 63,685
  • sloc: cpp: 368,283; ansic: 33,988; python: 12,417; java: 12,203; awk: 5,879; sh: 1,846; xml: 655; perl: 405; php: 211; objc: 194; makefile: 77; sed: 2
file content (264 lines) | stat: -rw-r--r-- 6,571 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
/* This file is part of the Spring engine (GPL v2 or later), see LICENSE.html */

#include "System/Util.h"
#if defined(_MSC_VER) && (_MSC_VER >= 1310)
	#include <intrin.h>
#endif
#include <cstring>
#include <boost/cstdint.hpp>


std::string StringReplace(const std::string& text,
                          const std::string& from,
                          const std::string& to)
{
	std::string working = text;
	std::string::size_type pos = 0;
	while (true) {
		pos = working.find(from, pos);
		if (pos == std::string::npos) {
			break;
		}
		std::string tmp = working.substr(0, pos);
		tmp += to;
		tmp += working.substr(pos + from.size(), std::string::npos);
		pos += to.size();
		working = tmp;
	}
	return working;
}

std::string StringStrip(const std::string& str, const std::string& chars)
{
	std::string ret;
	ret.reserve(str.size());

	for (size_t n = 0; n < str.size(); n++) {
		if (chars.find(str[n]) != std::string::npos)
			continue;

		ret.push_back(str[n]);
	}

	return ret;
}



/// @see http://www.codeproject.com/KB/stl/stdstringtrim.aspx
void StringTrimInPlace(std::string& str, const std::string& ws)
{
	std::string::size_type pos = str.find_last_not_of(ws);
	if (pos != std::string::npos) {
		str.erase(pos + 1);
		pos = str.find_first_not_of(ws);
		if (pos != std::string::npos) {
			str.erase(0, pos);
		}
	} else {
		str.erase(str.begin(), str.end());
	}
}

std::string StringTrim(const std::string& str, const std::string& ws)
{
	std::string copy(str);
	StringTrimInPlace(copy, ws);
	return copy;
}

bool StringToBool(std::string str)
{
	bool value = true;

	StringTrimInPlace(str);
	StringToLowerInPlace(str);

	// regex would probably be more appropriate,
	// but it is better not to rely on any external lib here
	if (
			(str.empty())    ||
			(str == "0")     ||
			(str == "n")     ||
			(str == "no")    ||
			(str == "f")     ||
			(str == "false") ||
			(str == "off")
		) {
		value = false;
	}

	return value;
}

bool StringStartsWith(const std::string& str, const char* prefix)
{
	if ((prefix == NULL) || (str.size() < strlen(prefix))) {
		return false;
	} else {
		return (str.compare(0, strlen(prefix), prefix) == 0);
	}
}

bool StringEndsWith(const std::string& str, const char* postfix)
{
	if ((postfix == NULL) || (str.size() < strlen(postfix))) {
		return false;
	} else {
		return (str.compare(str.size() - strlen(postfix), str.size(), postfix) == 0);
	}
}


void InverseOrSetBool(bool& container, const std::string& argValue, const bool inverseArg)
{
	if (argValue.empty()) {
		// toggle
		container = !container;
	} else {
		// set
		const bool value = StringToBool(argValue);
		container = inverseArg ? (!value) : (value);
	}
}



static inline unsigned count_leading_ones(boost::uint8_t x)
{
	boost::uint32_t i = ~x;
	i = (i<<24) | 0x00FFFFFF;
#ifdef _MSC_VER
	unsigned long r;
	_BitScanReverse(&r, (unsigned long)i);
	return 31 - r;
#else
	return __builtin_clz(i);
#endif
}


char32_t Utf8GetNextChar(const std::string& text, int& pos)
{
	// UTF8 looks like this
	// 1Byte == ASCII:      0xxxxxxxxx
	// 2Bytes encoded char: 110xxxxxxx 10xxxxxx
	// 3Bytes encoded char: 1110xxxxxx 10xxxxxx 10xxxxxx
	// 4Bytes encoded char: 11110xxxxx 10xxxxxx 10xxxxxx 10xxxxxx
	// Originaly there were 5&6 byte versions too, but they were dropped in RFC 3629.
	// So UTF8 maps to UTF16 range only.

	static const auto UTF8_CONT_MASK = 0xC0; // 11xxxxxx
	static const auto UTF8_CONT_OKAY = 0x80; // 10xxxxxx

	union UTF8_4Byte {
		boost::uint32_t i;
		boost::uint8_t  c[4];
	};

	// read next 4bytes and check if it is an utf8 sequence
	UTF8_4Byte utf8 = { 0 };
	const int remainingChars = text.length() - pos;
	if (remainingChars >= 4) {
		// we need to use memcpy cause text[pos] isn't memory aligned as ints need to be
		memcpy(&utf8.i, &text[pos], sizeof(boost::uint32_t));
	} else {
		// read ahead of end of string
		if (remainingChars <= 0)
			return 0;

		// end of string reached, only read till end
		switch (remainingChars) {
			case 3: utf8.c[2] = boost::uint8_t(text[pos + 2]);
			case 2: utf8.c[1] = boost::uint8_t(text[pos + 1]);
			case 1: utf8.c[0] = boost::uint8_t(text[pos    ]);
			default: {}
		};
	}

	// how many bytes are requested for our multi-byte utf8 sequence
	unsigned clo = count_leading_ones(utf8.c[0]);
	if (clo>4 || clo==0) clo = 1; // ignore >=5 byte ones cause of RFC 3629

	// how many healthy utf8 bytes are following
	unsigned numValidUtf8Bytes = 1; // first char is always valid
	numValidUtf8Bytes += int((utf8.c[1] & UTF8_CONT_MASK) == UTF8_CONT_OKAY);
	numValidUtf8Bytes += int((utf8.c[2] & UTF8_CONT_MASK) == UTF8_CONT_OKAY);
	numValidUtf8Bytes += int((utf8.c[3] & UTF8_CONT_MASK) == UTF8_CONT_OKAY);

	// check if enough trailing utf8 bytes are healthy
	// else ignore utf8 and parse it as 8bit Latin-1 char (extended ASCII)
	// this adds backwardcompatibility with the old renderer
	// which supported extended ASCII with umlauts etc.
	const auto usedUtf8Bytes = (clo <= numValidUtf8Bytes) ? clo : 1u;

	char32_t u = 0;
	switch (usedUtf8Bytes) {
		case 0:
		case 1: {
			u  = utf8.c[0];
		} break;
		case 2: {
			u  = (char32_t(utf8.c[0] & 0x1F)) << 6;
			u |= (char32_t(utf8.c[1] & 0x3F));
		} break;
		case 3: {
			u  = (char32_t(utf8.c[0] & 0x0F)) << 12;
			u |= (char32_t(utf8.c[1] & 0x3F)) << 6;
			u |= (char32_t(utf8.c[2] & 0x3F));
		} break;
		case 4: {
			u  = (char32_t(utf8.c[0] & 0x07)) << 18;
			u |= (char32_t(utf8.c[1] & 0x3F)) << 12;
			u |= (char32_t(utf8.c[2] & 0x3F)) << 6;
			u |= (char32_t(utf8.c[3] & 0x3F));
			//TODO limit range to UTF16!
		} break;
	}
	pos += usedUtf8Bytes;

	// replace tabs with spaces
	if (u == 0x9)
		u = 0x2007;

	return u;
}


std::string UnicodeToUtf8(char32_t ch)
{
	std::string str;

	// in:  0000 0000  0000 0000  0000 0000  0aaa aaaa
	// out:                                  0aaa aaaa
	if(ch<(1<<7))
	{
		str += (char)ch;
	}
	// in:  0000 0000  0000 0000  0000 0bbb  bbaa aaaa
	// out:                       110b bbbb  10aa aaaa
	else if(ch<(1<<11))
	{
		str += 0xC0 | (char)(ch>>6);
		str += 0x80 | (char)(ch&0x3F);
	}
	// in:  0000 0000  0000 0000  cccc bbbb  bbaa aaaa
	// out:            1110 cccc  10bb bbbb  10aa aaaa
	else if(ch<(1<<16))
	{
		str += 0xE0 | (char)(ch>>12);
		str += 0x80 | (char)((ch>>6)&0x3F);
		str += 0x80 | (char)(ch&0x3F);
	}
	// in:  0000 0000  000d ddcc  cccc bbbb  bbaa aaaa
	// out: 1111 0ddd  10cc cccc  10bb bbbb  10aa aaaa
	else if(ch<(1<<21))
	{
		str += 0xF0 | (char)(ch>>18);
		str += 0x80 | (char)((ch>>12)&0x3F);
		str += 0x80 | (char)((ch>>6)&0x3F);
		str += 0x80 | (char)(ch&0x3F);
	}

	return str;
}