1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
|
/*
* This file is part of Warzone 2100.
* Copyright (C) 2018-2020 Warzone 2100 Project
*
* Warzone 2100 is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Warzone 2100 is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Warzone 2100; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef _LIB_FRAMEWORK_WZSTRING_H
#define _LIB_FRAMEWORK_WZSTRING_H
#include <cstdint>
#include <string>
#include <vector>
#include <locale>
#include "wzglobal.h"
// Stores a unicode codepoint
// Internally, this stores the codepoint as UTF-32
class WzUniCodepoint {
public:
static WzUniCodepoint fromUTF32(uint32_t utf_32_codepoint)
{
return WzUniCodepoint(utf_32_codepoint);
}
static WzUniCodepoint fromASCII(unsigned char charLiteral);
uint32_t UTF32() const { return _codepoint; }
bool isNull() const { return _codepoint == 0; }
std::vector<WzUniCodepoint> caseFolded() const;
public:
bool operator==(const WzUniCodepoint& ch) const { return _codepoint == ch._codepoint; }
protected:
explicit WzUniCodepoint(uint32_t utf_32_codepoint)
: _codepoint(utf_32_codepoint)
{ }
private:
uint32_t _codepoint;
};
class WzString {
public:
WzString() = default;
WzString(size_t size, const WzUniCodepoint& ch);
WzString(WzString&& other) noexcept : _utf8String(std::move(other._utf8String)) { }
WzString(const WzString& other) noexcept : _utf8String(other._utf8String) { }
WzString(const char * str, int size = -1);
static WzString fromUtf8(const char *str, int size = -1);
static WzString fromUtf8(const std::string &str);
static WzString fromUtf16(const std::vector<uint16_t>& utf16);
static WzString fromUtf32(const std::vector<uint32_t>& utf32);
static WzString fromCodepoint(const WzUniCodepoint& codepoint);
#if defined(__clang__)
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wgcc-compat"
#endif
template <typename... P>
static inline WzString format(char const *format, P &&... params) WZ_DECL_FORMAT_CXX(WZ_PRINTF_FORMAT, 1, 2)
{
return WzString::fromUtf8(astringf(format, std::forward<P>(params)...));
}
#if defined(__clang__)
#pragma clang diagnostic pop // "-Wgcc-compat"
#endif
const std::string& toUtf8() const;
// Same as `toUtf8()`
const std::string& toStdString() const;
std::vector<uint16_t> toUtf16() const;
std::vector<uint32_t> toUtf32() const;
int toInt(bool *ok = nullptr, int base = 10) const;
bool isEmpty() const;
// Returns the number of Unicode codepoints in this string.
int length() const;
// Same as `length()`
int size() const { return length(); }
WzUniCodepoint at(int position) const;
WzString& append(const WzString &str);
WzString& append(const WzUniCodepoint &c);
WzString& append(const char* str);
WzString& insert(size_t position, const WzString &str);
WzString& insert(size_t i, WzUniCodepoint c);
WzString& remove(size_t i, int len);
WzString& replace(size_t position, int n, const WzUniCodepoint &after);
WzString& replace(const WzUniCodepoint &before, const WzUniCodepoint &after);
WzString& replace(const WzUniCodepoint &before, const WzString &after);
WzString& replace(const WzString &before, const WzString &after);
WzString& remove(const WzString &substr);
void truncate(int position);
bool pop_back();
void clear();
WzString toLower() const;
WzString trimmed(const std::locale &loc = std::locale::classic()) const;
std::vector<WzString> split(const WzString &delimiter) const;
WzString substr(size_t start, size_t length) const;
WzString substr(size_t start) const;
public:
// Normalization
enum NormalizationForm {
NormalizationForm_KD
};
WzString normalized(WzString::NormalizationForm mode) const;
public:
// Create from numbers
static WzString number(int n);
static WzString number(unsigned int n);
static WzString number(long n);
static WzString number(unsigned long n);
static WzString number(long long n);
static WzString number(unsigned long long n);
static WzString number(double n);
// Useful when padding is needed
WzString& leftPadToMinimumLength(const WzUniCodepoint &ch, size_t minimumStringLength);
public:
WzString& operator+=(const WzString &other);
WzString& operator+=(const WzUniCodepoint &ch);
WzString& operator+=(const char* str);
WzString& operator=(const WzString &other);
WzString& operator=(const WzUniCodepoint& ch);
WzString& operator=(WzString&& other) noexcept;
WzString operator+(const WzString &other) const;
WzString operator+(const char* other) const;
bool operator==(const WzString &other) const;
bool operator!=(const WzString &other) const;
bool operator < (const WzString& str) const;
int compare(const WzString &other) const;
int compare(const char *other) const;
bool startsWith(const WzString &other) const;
bool startsWith(const char* other) const;
bool endsWith(const WzString &other) const;
bool contains(const WzUniCodepoint &codepoint) const;
bool contains(const WzString &other) const;
// Used to expose a modifiable "view" of a WzUniCodepoint inside a WzString
class WzUniCodepointRef {
public:
WzUniCodepointRef(uint32_t utf_32_codepoint, WzString& parentString, int position)
: _codepoint(WzUniCodepoint::fromUTF32(utf_32_codepoint)),
_parentString(parentString),
_position(position)
{ }
public:
WzUniCodepointRef& operator=(const WzUniCodepoint& ch);
bool operator==(const WzUniCodepointRef& ch) const;
inline const WzUniCodepoint& value() const { return _codepoint; }
private:
WzUniCodepoint _codepoint;
WzString& _parentString;
const int _position;
};
WzUniCodepointRef operator[](int position);
private:
explicit WzString(std::string utf8String)
: _utf8String(std::move(utf8String))
{ }
template <typename octet_iterator, typename distance_type>
bool _utf8_advance (octet_iterator& it, distance_type n, octet_iterator end) const;
static bool isValidUtf8(const char * str, size_t len);
private:
std::string _utf8String;
};
// NOTE: The char * should be valid UTF-8.
inline WzString operator+ (const char* a, const WzString &b)
{
return WzString::fromUtf8(a) + b;
}
namespace std
{
template <>
struct hash<WzString>
{
typedef std::size_t result_type;
result_type operator()( const WzString& k ) const
{
// Simply use the hash of the internal _utf8String
return std::hash<std::string>{}(k.toUtf8());
}
};
}
#endif // _LIB_FRAMEWORK_WZSTRING_H
|