1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
|
/* ScummVM - Graphic Adventure Engine
*
* ScummVM is the legal property of its developers, whose names
* are too numerous to list here. Please refer to the COPYRIGHT
* file distributed with this source distribution.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
#include "common/tokenizer.h"
namespace Common {
StringTokenizer::StringTokenizer(const String &str, const String &delimiters) : _str(str), _delimiters(delimiters) {
reset();
}
void StringTokenizer::reset() {
_tokenBegin = _tokenEnd = 0;
}
bool StringTokenizer::empty() const {
// Search for the next token's start (i.e. the next non-delimiter character)
for (uint i = _tokenEnd; i < _str.size(); i++) {
if (!_delimiters.contains(_str[i]))
return false; // Found a token so the tokenizer is not empty
}
// Didn't find any more tokens so the tokenizer is empty
return true;
}
String StringTokenizer::nextToken() {
// Seek to next token's start (i.e. jump over the delimiters before next token)
for (_tokenBegin = _tokenEnd; _tokenBegin < _str.size() && _delimiters.contains(_str[_tokenBegin]); _tokenBegin++)
;
// Seek to the token's end (i.e. jump over the non-delimiters)
for (_tokenEnd = _tokenBegin; _tokenEnd < _str.size() && !_delimiters.contains(_str[_tokenEnd]); _tokenEnd++)
;
// Return the found token
return String(_str.c_str() + _tokenBegin, _tokenEnd - _tokenBegin);
}
StringArray StringTokenizer::split() {
StringArray res;
while (!empty())
res.push_back(nextToken());
return res;
}
String StringTokenizer::delimitersAtTokenBegin() const {
// First token appears at beginning of the string, or no tokens have been extracted yet
if (_tokenBegin == 0)
return String();
// Iterate backwards until we hit either the previous token, or the beginning of the input string
int delimitersBegin;
for (delimitersBegin = _tokenBegin - 1; delimitersBegin >= 0 && _delimiters.contains(_str[delimitersBegin]); delimitersBegin--)
;
++delimitersBegin;
// Return the delimiters
return String(_str.c_str() + delimitersBegin, _tokenBegin - delimitersBegin);
}
String StringTokenizer::delimitersAtTokenEnd() const {
// Last token appears at end of the string, or no tokens have been extracted yet
if (_tokenEnd == 0 || _tokenEnd == _str.size())
return String();
// Iterate forwards until we hit either the next token, or the end of the input string
uint delimitersEnd;
for (delimitersEnd = _tokenEnd; delimitersEnd < _str.size() && _delimiters.contains(_str[delimitersEnd]); delimitersEnd++)
;
// Return the delimiters
return String(_str.c_str() + _tokenEnd, delimitersEnd - _tokenEnd);
}
U32StringTokenizer::U32StringTokenizer(const U32String &str, const String &delimiters) : _str(str), _delimiters(delimiters) {
reset();
}
void U32StringTokenizer::reset() {
_tokenBegin = _tokenEnd = _str.begin();
}
bool U32StringTokenizer::empty() const {
// Search for the next token's start (i.e. the next non-delimiter character)
for (U32String::const_iterator itr = _tokenEnd; itr != _str.end(); itr++) {
if (!_delimiters.contains(*itr)) {
return false; // Found a token so the tokenizer is not empty
}
}
// Didn't find any more tokens so the tokenizer is empty
return true;
}
U32String U32StringTokenizer::nextToken() {
// Skip delimiters when present at the beginning, to point to the next token
// For example, the below loop will set _tokenBegin & _tokenEnd to 'H' for the string -> "!!--=Hello World"
// And subsequently, skip all delimiters in the beginning of the next word.
_tokenBegin = _tokenEnd;
while (_tokenBegin != _str.end() && _delimiters.contains(*_tokenBegin))
_tokenBegin++;
_tokenEnd = _tokenBegin;
// Loop and advance _tokenEnd until we find a delimiter at the end of a word/string
while (_tokenBegin != _str.end() && _tokenEnd != _str.end()) {
if (_delimiters.contains(*_tokenEnd)) {
return U32String(_tokenBegin, _tokenEnd);
}
_tokenEnd++;
}
// Returning the last word if _tokenBegin iterator isn't at the end.
if (_tokenBegin != _str.end())
return U32String(_tokenBegin, _tokenEnd);
else
return U32String();
}
U32StringArray U32StringTokenizer::split() {
U32StringArray res;
while (!empty())
res.push_back(nextToken());
return res;
}
U32String U32StringTokenizer::delimitersAtTokenBegin() const {
// First token appears at beginning of the string, or no tokens have been extracted yet
if (_tokenBegin == _str.begin())
return U32String();
// Iterate backwards until we hit either the previous token, or the beginning of the input string
U32String::const_iterator delimitersBegin;
for (delimitersBegin = _tokenBegin - 1; delimitersBegin >= _str.begin() && _delimiters.contains(*delimitersBegin); delimitersBegin--)
;
++delimitersBegin;
// Return the delimiters
return U32String(delimitersBegin, _tokenBegin - delimitersBegin);
}
U32String U32StringTokenizer::delimitersAtTokenEnd() const {
// Last token appears at end of the string, or no tokens have been extracted yet
if (_tokenEnd == _str.begin() || _tokenEnd == _str.end())
return String();
// Iterate forwards until we hit either the next token, or the end of the input string
U32String::const_iterator delimitersEnd;
for (delimitersEnd = _tokenEnd; delimitersEnd < _str.end() && _delimiters.contains(*delimitersEnd); delimitersEnd++)
;
// Return the delimiters
return U32String(_tokenEnd, delimitersEnd - _tokenEnd);
}
} // End of namespace Common
|