1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
|
/* Copyright 2016 Software Freedom Conservancy Inc.
*
* This software is licensed under the GNU Lesser General Public License
* (version 2.1 or later). See the COPYING file in this distribution.
*/
public const int DEFAULT_USER_TEXT_INPUT_LENGTH = 1024;
public inline bool is_string_empty(string? s) {
return (s == null || s[0] == '\0');
}
// utf8 case sensitive compare
public int utf8_cs_compare(string a, string b) {
return a.collate(b);
}
// utf8 case insensitive compare
public int utf8_ci_compare(string a, string b) {
return a.down().collate(b.down());
}
// utf8 array to string
public string uchar_array_to_string(uchar[] data, int length = -1) {
if (length < 0)
length = data.length;
StringBuilder builder = new StringBuilder();
for (int ctr = 0; ctr < length; ctr++) {
if (data[ctr] != '\0')
builder.append_c((char) data[ctr]);
else
break;
}
return builder.str;
}
// string to uchar array
public uchar[] string_to_uchar_array(string str) {
uchar[] data = new uchar[0];
for (int ctr = 0; ctr < str.length; ctr++)
data += (uchar) str[ctr];
return data;
}
// Markup.escape_text() will crash if the UTF-8 text is not valid; it relies on a call to
// g_utf8_next_char(), which demands that the string be validated before use, which escape_text()
// does not do. This handles this problem by kicking back an empty string if the text is not
// valid. Text should be validated upon entry to the system as well to guard against this
// problem.
//
// Null strings are accepted; they will result in an empty string returned.
public inline string guarded_markup_escape_text(string? plain) {
return (!is_string_empty(plain) && plain.validate()) ? Markup.escape_text(plain) : "";
}
public long find_last_offset(string str, char c) {
long offset = str.length;
while (--offset >= 0) {
if (str[offset] == c)
return offset;
}
return -1;
}
// Helper function for searching an array of case-insensitive strings. The array should be
// all lowercase.
public bool is_in_ci_array(string str, string[] strings) {
string strdown = str.down();
foreach (string str_element in strings) {
if (strdown == str_element)
return true;
}
return false;
}
[Flags]
public enum PrepareInputTextOptions {
EMPTY_IS_NULL,
VALIDATE,
INVALID_IS_NULL,
STRIP,
STRIP_CRLF,
NORMALIZE,
DEFAULT = EMPTY_IS_NULL | VALIDATE | INVALID_IS_NULL | STRIP_CRLF | STRIP | NORMALIZE;
}
private string? guess_convert(string text) {
string? output = null;
size_t bytes_read = 0;
unowned string charset = null;
debug ("CONVERT: Text did not validate as UTF-8, trying conversion");
// Try with locale
if (!GLib.get_charset(out charset)) {
output = text.locale_to_utf8(text.length, out bytes_read, null, null);
if (bytes_read == text.length) {
debug ("CONVERT: Locale is not UTF-8, convert from %s", charset);
return output;
}
}
try {
output = GLib.convert (text, text.length, "UTF-8", "WINDOWS-1252", out bytes_read);
charset = "WINDOWS-1252";
} catch (ConvertError error) {
if (error is ConvertError.NO_CONVERSION) {
try {
output = GLib.convert (text, text.length, "UTF-8", "ISO-8859-1", out bytes_read);
charset = "ISO-8859-1";
} catch (Error error) { /* do nothing */ }
}
}
if (bytes_read == text.length) {
debug ("CONVERT: Guessed conversion from %s", charset);
return output;
}
return null;
}
public string? prepare_input_text(string? text, PrepareInputTextOptions options, int dest_length) {
if (text == null)
return null;
string? prepped = text;
if (PrepareInputTextOptions.VALIDATE in options) {
if (!text.validate()) {
prepped = guess_convert (text);
if (prepped == null) {
return (options & PrepareInputTextOptions.INVALID_IS_NULL) != 0 ? null : "";
}
}
}
// Using composed form rather than GLib's default (decomposed) as NFC is the preferred form in
// Linux and WWW. More importantly, Pango seems to have serious problems displaying decomposed
// forms of Korean language glyphs (and perhaps others). See:
// https://bugzilla.gnome.org/show_bug.cgi?id=716914
if ((options & PrepareInputTextOptions.NORMALIZE) != 0)
prepped = prepped.normalize(-1, NormalizeMode.NFC);
if ((options & PrepareInputTextOptions.STRIP) != 0)
prepped = prepped.strip();
// Ticket #3245 - Prevent carriage return mayhem
// in image titles, tag names, etc.
if ((options & PrepareInputTextOptions.STRIP_CRLF) != 0)
prepped = prepped.delimit("\n\r", ' ');
if ((options & PrepareInputTextOptions.EMPTY_IS_NULL) != 0 && is_string_empty(prepped))
return null;
// Ticket #3196 - Allow calling functions to limit the length of the
// string we return to them. Passing any negative value is interpreted
// as 'do not truncate'.
if (dest_length >= 0) {
StringBuilder sb = new StringBuilder(prepped);
sb.truncate(dest_length);
return sb.str;
}
// otherwise, return normally.
return prepped;
}
namespace String {
public inline bool contains_char(string haystack, unichar needle) {
return haystack.index_of_char(needle) >= 0;
}
// Note that this method currently turns a word of all zeros into empty space ("000" -> "")
public string strip_leading_zeroes(string str) {
StringBuilder stripped = new StringBuilder();
bool prev_is_space = true;
for (unowned string iter = str; iter.get_char() != 0; iter = iter.next_char()) {
unichar ch = iter.get_char();
if (!prev_is_space || ch != '0') {
stripped.append_unichar(ch);
prev_is_space = ch.isspace();
}
}
return stripped.str;
}
public string remove_diacritics(string istring) {
var builder = new StringBuilder ();
unichar ch;
int i = 0;
while(istring.normalize().get_next_char(ref i, out ch)) {
switch(ch.type()) {
case UnicodeType.CONTROL:
case UnicodeType.FORMAT:
case UnicodeType.UNASSIGNED:
case UnicodeType.NON_SPACING_MARK:
case UnicodeType.COMBINING_MARK:
case UnicodeType.ENCLOSING_MARK:
// Ignore those
continue;
default:
break;
}
builder.append_unichar(ch);
}
return builder.str;
}
public string to_hex_string(string str) {
StringBuilder builder = new StringBuilder();
uint8 *data = (uint8 *) str;
while (*data != 0)
builder.append_printf("%02Xh%s", *data++, (*data != 0) ? " " : "");
return builder.str;
}
// A note on the collated_* and precollated_* methods:
//
// A bug report (https://bugzilla.gnome.org/show_bug.cgi?id=717135) indicated that two different Hirigana characters
// as Tag names would trigger an assertion. Investigation showed that the characters' collation
// keys computed as equal when the locale was set to anything but the default locale (C) or
// Japanese. A related bug was that another hash table was using str_equal, which does not use
// collation, meaning that in one table the strings were seen as the same and in another as
// different.
//
// The solution we arrived at is to use collation whenever possible, but if two strings have the
// same collation, then fall back on strcmp(), which looks for byte-for-byte comparisons. Note
// that this technique requires that both strings have been properly composed (use
// prepare_input_text() for that task) so that equal UTF-8 strings are byte-for-byte equal as
// well.
// See note above.
public uint collated_hash(void *ptr) {
string str = (string) ptr;
return str_hash(str.collate_key());
}
// See note above.
public uint precollated_hash(void *ptr) {
return str_hash((string) ptr);
}
// See note above.
public int collated_compare(void *a, void *b) {
string astr = (string) a;
string bstr = (string) b;
int result = astr.collate(bstr);
return (result != 0) ? result : strcmp(astr, bstr);
}
// See note above.
public int precollated_compare(string astr, string akey, string bstr, string bkey) {
int result = strcmp(akey, bkey);
return (result != 0) ? result : strcmp(astr, bstr);
}
// See note above.
public bool collated_equals(void *a, void *b) {
return collated_compare(a, b) == 0;
}
// See note above.
public bool precollated_equals(string astr, string akey, string bstr, string bkey) {
return precollated_compare(astr, akey, bstr, bkey) == 0;
}
}
|