1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
|
#include "tokenizer.h"
#include <iostream>
std::wstring Token::toString() const
{
if (Word == type)
return L"Word: '" + content + L"'";
else if (Para == type)
return L"Para";
else if (Eof == type)
return L"Eof";
else
return L"Unknown";
}
static bool wisspace(wchar_t ch)
{
//std::cout << "'" << ch << "' " << ((int)ch) << " " << L'\n' << std::endl;
return (L' ' == ch) || (L'\n' == ch) || (L'\r' == ch)
|| (L'\t' == ch) || (L'\f' == ch) || (L'\v' == ch);
}
bool Tokenizer::skipSpaces(bool notSearch)
{
int len = text.length();
bool foundDoubleReturn = false;
while ((len > currentPos) && wisspace(text[currentPos])) {
currentPos++;
if ((! notSearch) && (L'\n' == text[currentPos - 1])
&& (currentPos < len) && (L'\n' == text[currentPos]))
notSearch = foundDoubleReturn = true;
}
return foundDoubleReturn;
}
Token Tokenizer::getNextToken()
{
if (0 < stack.size()) {
Token t(*stack.begin());
stack.pop_front();
return t;
}
int len = text.length();
if (skipSpaces(! currentPos) && (currentPos < len))
return Token(Token::Para);
if (currentPos >= len)
return Token(Token::Eof);
int wordStart = currentPos;
while ((len > currentPos) && (! wisspace(text[currentPos])))
currentPos++;
return Token(Token::Word, text.substr(wordStart, currentPos - wordStart));
}
void Tokenizer::unget(const Token &token)
{
stack.push_back(token);
}
bool Tokenizer::isFinished()
{
if (0 < stack.size())
return false;
return currentPos >= (int)text.length();
}
|