1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
|
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "tokenstream.h"
#include "../math/emath.h"
namespace embree
{
/* shorthands for common sets of characters */
const std::string TokenStream::alpha = "abcdefghijklmnopqrstuvwxyz";
const std::string TokenStream::ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
const std::string TokenStream::numbers = "0123456789";
const std::string TokenStream::separators = "\n\t\r ";
const std::string TokenStream::stringChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 _.,+-=:/*\\";
/* creates map for fast categorization of characters */
static void createCharMap(bool map[256], const std::string& chrs) {
for (size_t i=0; i<256; i++) map[i] = false;
for (size_t i=0; i<chrs.size(); i++) map[uint8_t(chrs[i])] = true;
}
/* build full tokenizer that takes list of valid characters and keywords */
TokenStream::TokenStream(const Ref<Stream<int> >& cin, //< stream to read from
const std::string& alpha, //< valid characters for identifiers
const std::string& seps, //< characters that act as separators
const std::vector<std::string>& symbols) //< symbols
: cin(cin), symbols(symbols)
{
createCharMap(isAlphaMap,alpha);
createCharMap(isSepMap,seps);
createCharMap(isStringCharMap,stringChars);
}
bool TokenStream::decDigits(std::string& str_o)
{
bool ok = false;
std::string str;
if (cin->peek() == '+' || cin->peek() == '-') str += (char)cin->get();
while (isDigit(cin->peek())) { ok = true; str += (char)cin->get(); }
if (ok) str_o += str;
else cin->unget(str.size());
return ok;
}
bool TokenStream::decDigits1(std::string& str_o)
{
bool ok = false;
std::string str;
while (isDigit(cin->peek())) { ok = true; str += (char)cin->get(); }
if (ok) str_o += str; else cin->unget(str.size());
return ok;
}
bool TokenStream::trySymbol(const std::string& symbol)
{
size_t pos = 0;
while (pos < symbol.size()) {
if (symbol[pos] != cin->peek()) { cin->unget(pos); return false; }
cin->drop(); pos++;
}
return true;
}
bool TokenStream::trySymbols(Token& token, const ParseLocation& loc)
{
for (size_t i=0; i<symbols.size(); i++) {
if (!trySymbol(symbols[i])) continue;
token = Token(symbols[i],Token::TY_SYMBOL,loc);
return true;
}
return false;
}
bool TokenStream::tryFloat(Token& token, const ParseLocation& loc)
{
bool ok = false;
std::string str;
if (trySymbol("nan")) {
token = Token(float(nan));
return true;
}
if (trySymbol("+inf")) {
token = Token(float(pos_inf));
return true;
}
if (trySymbol("-inf")) {
token = Token(float(neg_inf));
return true;
}
if (decDigits(str))
{
if (cin->peek() == '.') {
str += (char)cin->get();
decDigits(str);
if (cin->peek() == 'e' || cin->peek() == 'E') {
str += (char)cin->get();
if (decDigits(str)) ok = true; // 1.[2]E2
}
else ok = true; // 1.[2]
}
else if (cin->peek() == 'e' || cin->peek() == 'E') {
str += (char)cin->get();
if (decDigits(str)) ok = true; // 1E2
}
}
else
{
if (cin->peek() == '.') {
str += (char)cin->get();
if (decDigits(str)) {
if (cin->peek() == 'e' || cin->peek() == 'E') {
str += (char)cin->get();
if (decDigits(str)) ok = true; // .3E2
}
else ok = true; // .3
}
}
}
if (ok) {
token = Token((float)atof(str.c_str()),loc);
}
else cin->unget(str.size());
return ok;
}
bool TokenStream::tryInt(Token& token, const ParseLocation& loc) {
std::string str;
if (decDigits(str)) {
token = Token(atoi(str.c_str()),loc);
return true;
}
return false;
}
bool TokenStream::tryString(Token& token, const ParseLocation& loc)
{
std::string str;
if (cin->peek() != '\"') return false;
cin->drop();
while (cin->peek() != '\"') {
const int c = cin->get();
if (!isStringChar(c)) THROW_RUNTIME_ERROR("invalid string character "+std::string(1,c)+" at "+loc.str());
str += (char)c;
}
cin->drop();
token = Token(str,Token::TY_STRING,loc);
return true;
}
bool TokenStream::tryIdentifier(Token& token, const ParseLocation& loc)
{
std::string str;
if (!isAlpha(cin->peek())) return false;
str += (char)cin->get();
while (isAlphaNum(cin->peek())) str += (char)cin->get();
token = Token(str,Token::TY_IDENTIFIER,loc);
return true;
}
void TokenStream::skipSeparators()
{
/* skip separators */
while (cin->peek() != EOF && isSeparator(cin->peek()))
cin->drop();
}
Token TokenStream::next()
{
Token token;
skipSeparators();
ParseLocation loc = cin->loc();
if (trySymbols (token,loc)) return token; /**< try to parse a symbol */
if (tryFloat (token,loc)) return token; /**< try to parse float */
if (tryInt (token,loc)) return token; /**< try to parse integer */
if (tryString (token,loc)) return token; /**< try to parse string */
if (tryIdentifier(token,loc)) return token; /**< try to parse identifier */
if (cin->peek() == EOF ) return Token(loc); /**< return EOF token */
return Token((char)cin->get(),loc); /**< return invalid character token */
}
}
|