File: TextOperations.h

package info (click to toggle)
vcmi 1.6.5%2Bdfsg-2
  • links: PTS, VCS
  • area: contrib
  • in suites: forky, sid, trixie
  • size: 32,060 kB
  • sloc: cpp: 238,971; python: 265; sh: 224; xml: 157; ansic: 78; objc: 61; makefile: 49
file content (103 lines) | stat: -rw-r--r-- 4,319 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
/*
 * TextOperations.h, part of VCMI engine
 *
 * Authors: listed in file AUTHORS in main folder
 *
 * License: GNU General Public License v2.0 or later
 * Full text of license available in license.txt file, in main folder
 *
 */
#pragma once

VCMI_LIB_NAMESPACE_BEGIN

/// Namespace that provides utilities for unicode support (UTF-8)
namespace TextOperations
{
	/// returns 32-bit UTF codepoint for UTF-8 character symbol
	uint32_t DLL_LINKAGE getUnicodeCodepoint(const char *data, size_t maxSize);

	/// returns 32-bit UTF codepoint for character symbol in selected single-byte encoding
	uint32_t DLL_LINKAGE getUnicodeCodepoint(char data, const std::string & encoding );

	/// returns length (in bytes) of UTF-8 character starting from specified character
	size_t DLL_LINKAGE getUnicodeCharacterSize(char firstByte);

	/// test if character is a valid UTF-8 symbol
	/// maxSize - maximum number of bytes this symbol may consist from ( = remainder of string)
	bool DLL_LINKAGE isValidUnicodeCharacter(const char * character, size_t maxSize);

	/// returns true if text contains valid ASCII-string
	/// Note that since UTF-8 extends ASCII, any ASCII string is also UTF-8 string
	bool DLL_LINKAGE isValidASCII(const std::string & text);
	bool DLL_LINKAGE isValidASCII(const char * data, size_t size);

	/// test if text contains valid UTF-8 sequence
	bool DLL_LINKAGE isValidUnicodeString(const std::string & text);
	bool DLL_LINKAGE isValidUnicodeString(const char * data, size_t size);

	/// converts text to UTF-8 from specified encoding or from one specified in settings
	std::string DLL_LINKAGE toUnicode(const std::string & text, const std::string & encoding);

	/// converts text from unicode to specified encoding or to one specified in settings
	/// NOTE: usage of these functions should be avoided if possible
	std::string DLL_LINKAGE fromUnicode(const std::string & text, const std::string & encoding);

	///delete specified amount of UTF-8 characters from right
	DLL_LINKAGE void trimRightUnicode(std::string & text, size_t amount = 1);

	/// give back amount of unicode characters
	size_t DLL_LINKAGE getUnicodeCharactersCount(const std::string & text);

	/// converts number into string using metric system prefixes, e.g. 'k' or 'M' to keep resulting strings within specified size
	/// Note that resulting string may have more symbols than digits: minus sign and prefix symbol
	template<typename Arithmetic>
	inline std::string formatMetric(Arithmetic number, int maxDigits);

	/// replaces all symbols that normally need escaping with appropriate escape sequences
	std::string escapeString(std::string input);

	/// get formatted DateTime depending on the language selected
	DLL_LINKAGE std::string getFormattedDateTimeLocal(std::time_t dt);

	/// get formatted current DateTime depending on the language selected
	/// timeOffset - optional parameter to modify current time by specified time in seconds
	DLL_LINKAGE std::string getCurrentFormattedDateTimeLocal(std::chrono::seconds timeOffset = {});

	/// get formatted time (without date)
	DLL_LINKAGE std::string getFormattedTimeLocal(std::time_t dt);

	/// get formatted time (without date)
	/// timeOffset - optional parameter to modify current time by specified time in seconds
	DLL_LINKAGE std::string getCurrentFormattedTimeLocal(std::chrono::seconds timeOffset = {});

	/// Algorithm for detection of typos in words
	/// Determines how 'different' two strings are - how many changes must be done to turn one string into another one
	/// https://en.wikipedia.org/wiki/Levenshtein_distance#Iterative_with_two_matrix_rows
	DLL_LINKAGE int getLevenshteinDistance(const std::string & s, const std::string & t);

	/// Check if texts have similarity when typing into search boxes
	DLL_LINKAGE bool textSearchSimilar(const std::string & s, const std::string & t);
};

template<typename Arithmetic>
inline std::string TextOperations::formatMetric(Arithmetic number, int maxDigits)
{
	Arithmetic max = std::pow(10, maxDigits);
	if (std::abs(number) < max)
		return std::to_string(number);

	std::string symbols = " kMGTPE";
	auto iter = symbols.begin();

	while (std::abs(number) >= max)
	{
		number /= 1000;
		iter++;

		assert(iter != symbols.end());//should be enough even for int64
	}
	return std::to_string(number) + *iter;
}

VCMI_LIB_NAMESPACE_END