1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
|
/* This file is part of KDevelop
Copyright 2002-2005 Roberto Raggi <roberto@kdevelop.org>
Copyright 2007-2008 David Nolden <david.nolden.kdevelop@art-master.de>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License version 2 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public License
along with this library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.
*/
#ifndef LEXER_H
#define LEXER_H
#include "symbol.h"
#include <cppparserexport.h>
#include <QtCore/QString>
#include <cstdlib>
#include <language/duchain/indexedstring.h>
#include <language/interfaces/iproblem.h>
struct NameSymbol;
class Lexer;
class Control;
class ParseSession;
typedef void (Lexer::*scan_fun_ptr)();
/**Token.*/
class KDEVCPPPARSER_EXPORT Token
{
public:
///kind of the token @see TOKEN_KIND enum reference.
int kind;
///position in the preprocessed buffer
uint position;
///size of the token in the preprocessed buffer. Do not confuse this with symbolLength.
uint size;
///pointer to the parse session.
const ParseSession* session;
//Symbol associated to the token. This only works if this is a simple symbol
//only consisting of one identifier(not comments), does not work for operators like "->" or numbers like "50"
KDevelop::IndexedString symbol() const;
//The index of the symbol associated to the token.
//The notes from @c symbol() apply as well.
uint symbolIndex() const;
//This always works, but is expensive
QString symbolString() const;
QByteArray symbolByteArray() const;
uint symbolLength() const;
};
/**Stream of tokens found by lexer.
Internally works like an array of @ref Token continuosly allocated.
All tokens are destructed when this stream is deleted.
The stream has a "cursor" which is simply an integer which defines
the offset (index) of the token currently "observed" from the beginning of
the stream.
TODO: reuse some pool / container class for the token array
NOTE: token_count is actually the *size* of the token pool
the last actually used token is lastToken
*/
class TokenStream : public QVector<Token>
{
private:
TokenStream(const TokenStream &);
void operator = (const TokenStream &);
public:
/**Creates a token stream with the default reserved size of 1024 tokens.*/
inline TokenStream(uint size = 1024)
: index(0)
{
reserve(size);
}
/**@return the token at position @p index.*/
inline const Token &token(int index) const
{ return at(index); }
/**@return the "cursor" - the offset (index) of the token
currently "observed" from the beginning of the stream.*/
inline uint cursor() const
{ return index; }
/**Sets the cursor to the position @p i.*/
inline void rewind(int i)
{ index = i; }
/**Updates the cursor position to point to the next token and returns
the cursor.*/
inline uint nextToken()
{ return index++; }
/**@return the kind of the next (LA) token in the stream.*/
inline int lookAhead(uint i = 0) const
{ return at(index + i).kind; }
/**@return the kind of the current token in the stream.*/
inline int kind(uint i) const
{ return at(i).kind; }
/**@return the position of the current token in the c++ source buffer.*/
inline uint position(uint i) const
{ return at(i).position; }
/**
* Split the right shift token at @p index into two distinct right angle brackets.
*
* Required to support 14.2/3 of the spec, see also:
* http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1757.html
*/
void splitRightShift(uint index);
private:
uint index;
};
/**C++ Lexer.*/
class Lexer
{
public:
/**
* Constructor.
*
* \param token_stream Provides a stream of tokens to the lexer.
* \param location_table a table which will be filled with non-preprocessed line -> offset values
* \param line_table a table which will be filled with (non-preproccessed line which contains a preprocessor line) -> offset values
*/
Lexer(Control *control);
/**Finds tokens in the @p contents buffer and fills the @ref token_stream.*/
void tokenize(ParseSession* session);
ParseSession* session;
private:
void skipComment();
/**Fills the scan table with method pointers.*/
void initialize_scan_table();
void scan_newline();
void scan_white_spaces();
void scan_identifier_or_keyword();
void scan_identifier_or_literal();
void scan_int_constant();
void scan_char_constant();
void scan_string_constant();
void scan_raw_string_constant();
void scan_raw_string_or_identifier();
void scan_invalid_input();
void scan_preprocessor();
// keywords
void scanKeyword0();
void scanKeyword2();
void scanKeyword3();
void scanKeyword4();
void scanKeyword5();
void scanKeyword6();
void scanKeyword7();
void scanKeyword8();
void scanKeyword9();
void scanKeyword10();
void scanKeyword11();
void scanKeyword12();
void scanKeyword13();
void scanKeyword14();
void scanKeyword16();
// operators
void scan_not();
void scan_remainder();
void scan_and();
void scan_left_paren();
void scan_right_paren();
void scan_star();
void scan_plus();
void scan_comma();
void scan_minus();
void scan_dot();
void scan_divide();
void scan_colon();
void scan_semicolon();
void scan_less();
void scan_equal();
void scan_greater();
void scan_question();
void scan_left_bracket();
void scan_right_bracket();
void scan_xor();
void scan_left_brace();
void scan_or();
void scan_right_brace();
void scan_tilde();
void scan_EOF();
KDevelop::ProblemPointer createProblem() const;
private:
Control *control;
struct SpecialCursor {
bool operator==(uint index) const {
return *current == index;
}
bool operator==(char character) const {
return *current == (character | 0xffff0000);
}
bool isChar() const {
return ((*current) & 0xffff0000) == 0xffff0000;
}
inline char operator*() const {
if(isChar())
return (char)*current;
else
return 'a'; //Return a valid character, because the identifiers created by the preprocessor are alpha-numerical
}
void operator++() {
++current;
}
void operator+=(int offset) {
current += offset;
}
bool operator !=(const SpecialCursor& rhs) const {
return current != rhs.current;
}
bool operator !=(const uint* rhs) const {
return current != rhs;
}
void operator--() {
--current;
}
bool operator<(const uint* end) const {
return current < end;
}
int operator -(const SpecialCursor& rhs) const {
return (((char*)current) - ((char*)rhs.current)) / sizeof(uint);
}
uint offsetIn(const uint* base) const {
return ((char*)current - (char*)base) / sizeof(uint);
}
SpecialCursor operator +(int offset) {
SpecialCursor ret(*this);
ret.current += offset;
return ret;
}
// useful for debugging
QString toString() const;
uint* current;
};
SpecialCursor cursor;
const uint* endCursor;
uint index;
bool m_leaveSize; //Marks the current token that its size should not be automatically set
bool m_canMergeComment; //Whether we may append new comments to the last encountered one
bool m_firstInLine; //Whether the next token is the first one in a line
///scan table contains pointers to the methods to scan for various token types
static scan_fun_ptr s_scan_table[];
static scan_fun_ptr s_scan_keyword_table[];
static bool s_initialized;
};
#endif // LEXER_H
|